From 69a029d5abfcc70beca7ebe28c2276f5fef0a06c Mon Sep 17 00:00:00 2001
From: Keigh Rim <keigh.rim@gmail.com>
Date: Sun, 22 Feb 2026 23:26:14 -0500
Subject: [PATCH] added IO type researcher code and a report

---
 io-type-report.md                  | 439 ++++++++++++++++++++++
 scripts/generate_io_type_report.py | 579 +++++++++++++++++++++++++++++
 2 files changed, 1018 insertions(+)
 create mode 100644 io-type-report.md
 create mode 100644 scripts/generate_io_type_report.py

diff --git a/io-type-report.md b/io-type-report.md
new file mode 100644
index 0000000..268d78a
--- /dev/null
+++ b/io-type-report.md
@@ -0,0 +1,439 @@
+# CLAMS App I/O Types Report
+
+*Auto‑generated by `scripts/io_type_report.py` on 2026-02-22*
+
+This report lists the input/output annotation types declared by the latest version of every published CLAMS app and groups apps that share the same types.
+
+## Summary
+
+| Metric | Count |
+|--------|------:|
+| Published apps (latest versions) | 30 |
+| Distinct output types | 12 |
+| Distinct input types | 10 |
+
+## Apps Grouped by Output Type Pattern
+
+Grouped by the combination of output annotation types, inferred from declared I/O metadata.
+
+### TextDocument + TimeFrame (+ Alignment) (6)
+
+| App | Version | Output Types |
+|-----|---------|--------------|
+| AAPB-PUA Kaldi Wrapper | v2 | TextDocument, TimeFrame, Alignment, Token |
+| CLAMS NFA Wrapper | v0.1 | Token, TimeFrame, Alignment |
+| Distil Whisper Wrapper | v1.2 | TextDocument, TimeFrame, Alignment, Sentence |
+| Gentle Forced Aligner Wrapper | v1.0 | Token, TimeFrame, Alignment |
+| Parakeet Wrapper | v1.0 | TextDocument, TimeFrame, Alignment, Token, Sentence |
+| Whisper Wrapper | v15 | TextDocument, TimeFrame, Alignment, Token, Sentence |
+
+### TextDocument + BoundingBox (+ Alignment) (3)
+
+| App | Version | Output Types |
+|-----|---------|--------------|
+| CLAMS docTR Wrapper | v1.4 | TextDocument, Token, Sentence, Paragraph, Alignment, BoundingBox |
+| Easyocr Wrapper | v1.1 | TextDocument, Alignment, BoundingBox, TimePoint |
+| TesseractOCR Wrapper | v2.1 | TextDocument, Token, Sentence, Paragraph, Alignment, BoundingBox |
+
+### TextDocument from visual input (no BoundingBox) (2)
+
+| App | Version | Output Types |
+|-----|---------|--------------|
+| LLaVA Captioner | v1.3 | Alignment, TextDocument |
+| SmolVLM2 Captioner | v0.3 | Alignment, TextDocument |
+
+### TimeFrame (11)
+
+| App | Version | Output Types |
+|-----|---------|--------------|
+| Bars Detection | v1.1 | TimeFrame |
+| Brandeis ACS Wrapper | v2 | TimeFrame |
+| Chyron Detection | v1.0 | TimeFrame |
+| Few Shot Classifier | v1.0 | TimeFrame |
+| inaSpeechSegmenter Wrapper | v2.1 | TimeFrame |
+| Pyscenedetect Wrapper | v4 | TimeFrame |
+| Scenes-with-text Detection | v8.4 | TimeFrame, TimePoint |
+| Simple Timepoints Stitcher | v3.0 | TimeFrame |
+| Slate Detection | v2.1 | TimeFrame |
+| Spoken Language Identification | v0.3 | TimeFrame |
+| Tonedetection | v2.0 | TimeFrame |
+
+### NamedEntity (2)
+
+| App | Version | Output Types |
+|-----|---------|--------------|
+| CLAMS wrapper for spaCy NLP | v2.1 | Token, Token#pos, Token#lemma, NounChunk, Sentence, NamedEntity |
+| Dbpedia Spotlight Wrapper | v1.2 | NamedEntity |
+
+### BoundingBox (no TextDocument) (1)
+
+| App | Version | Output Types |
+|-----|---------|--------------|
+| EAST Text Detection | v1.2 | BoundingBox |
+
+### Unclassified (5)
+
+| App | Version | Output Types |
+|-----|---------|--------------|
+| Heuristic Chyron Understanding | v0.2 | TextDocument |
+| Parseq OCR Wrapper | v1.0 | TextDocument, Alignment |
+| Tesseract OCR Wrapper | v1.0 | TextDocument, Alignment |
+| Text Slicer | v1.0 | TextDocument, Alignment |
+| Tfidf Keywordextractor | v1.0 | TextDocument, Alignment |
+
+## Apps Grouped by Shared Output Type
+
+### `TimeFrame` (MMIF) — 17 app(s)
+
+| App | Version | Inputs |
+|-----|---------|--------|
+| AAPB-PUA Kaldi Wrapper | v2 | AudioDocument | VideoDocument |
+| Bars Detection | v1.1 | VideoDocument |
+| Brandeis ACS Wrapper | v2 | AudioDocument |
+| Chyron Detection | v1.0 | VideoDocument |
+| CLAMS NFA Wrapper | v0.1 | AudioDocument | VideoDocument, TextDocument |
+| Distil Whisper Wrapper | v1.2 | AudioDocument | VideoDocument |
+| Few Shot Classifier | v1.0 | VideoDocument |
+| Gentle Forced Aligner Wrapper | v1.0 | TextDocument, AudioDocument, TimeFrame, Token |
+| inaSpeechSegmenter Wrapper | v2.1 | AudioDocument | VideoDocument |
+| Parakeet Wrapper | v1.0 | AudioDocument | VideoDocument |
+| Pyscenedetect Wrapper | v4 | VideoDocument |
+| Scenes-with-text Detection | v8.4 | VideoDocument |
+| Simple Timepoints Stitcher | v3.0 | AudioDocument | VideoDocument, TimePoint |
+| Slate Detection | v2.1 | VideoDocument |
+| Spoken Language Identification | v0.3 | AudioDocument | VideoDocument |
+| Tonedetection | v2.0 | AudioDocument | VideoDocument |
+| Whisper Wrapper | v15 | AudioDocument | VideoDocument |
+
+### `Alignment` (MMIF) — 15 app(s)
+
+| App | Version | Inputs |
+|-----|---------|--------|
+| AAPB-PUA Kaldi Wrapper | v2 | AudioDocument | VideoDocument |
+| CLAMS docTR Wrapper | v1.4 | VideoDocument, TimeFrame |
+| CLAMS NFA Wrapper | v0.1 | AudioDocument | VideoDocument, TextDocument |
+| Distil Whisper Wrapper | v1.2 | AudioDocument | VideoDocument |
+| Easyocr Wrapper | v1.1 | VideoDocument, TimeFrame |
+| Gentle Forced Aligner Wrapper | v1.0 | TextDocument, AudioDocument, TimeFrame, Token |
+| LLaVA Captioner | v1.3 | VideoDocument, ImageDocument, TimeFrame |
+| Parakeet Wrapper | v1.0 | AudioDocument | VideoDocument |
+| Parseq OCR Wrapper | v1.0 | VideoDocument, BoundingBox |
+| SmolVLM2 Captioner | v0.3 | VideoDocument, ImageDocument, TimeFrame |
+| Tesseract OCR Wrapper | v1.0 | VideoDocument, BoundingBox, TimeFrame |
+| TesseractOCR Wrapper | v2.1 | VideoDocument, TimeFrame |
+| Text Slicer | v1.0 | TimeFrame, TextDocument |
+| Tfidf Keywordextractor | v1.0 | TextDocument |
+| Whisper Wrapper | v15 | AudioDocument | VideoDocument |
+
+### `TextDocument` (MMIF) — 14 app(s)
+
+| App | Version | Inputs |
+|-----|---------|--------|
+| AAPB-PUA Kaldi Wrapper | v2 | AudioDocument | VideoDocument |
+| CLAMS docTR Wrapper | v1.4 | VideoDocument, TimeFrame |
+| Distil Whisper Wrapper | v1.2 | AudioDocument | VideoDocument |
+| Easyocr Wrapper | v1.1 | VideoDocument, TimeFrame |
+| Heuristic Chyron Understanding | v0.2 | TextDocument |
+| LLaVA Captioner | v1.3 | VideoDocument, ImageDocument, TimeFrame |
+| Parakeet Wrapper | v1.0 | AudioDocument | VideoDocument |
+| Parseq OCR Wrapper | v1.0 | VideoDocument, BoundingBox |
+| SmolVLM2 Captioner | v0.3 | VideoDocument, ImageDocument, TimeFrame |
+| Tesseract OCR Wrapper | v1.0 | VideoDocument, BoundingBox, TimeFrame |
+| TesseractOCR Wrapper | v2.1 | VideoDocument, TimeFrame |
+| Text Slicer | v1.0 | TimeFrame, TextDocument |
+| Tfidf Keywordextractor | v1.0 | TextDocument |
+| Whisper Wrapper | v15 | AudioDocument | VideoDocument |
+
+### `Token` (LAPPS) — 8 app(s)
+
+| App | Version | Inputs |
+|-----|---------|--------|
+| AAPB-PUA Kaldi Wrapper | v2 | AudioDocument | VideoDocument |
+| CLAMS docTR Wrapper | v1.4 | VideoDocument, TimeFrame |
+| CLAMS NFA Wrapper | v0.1 | AudioDocument | VideoDocument, TextDocument |
+| CLAMS wrapper for spaCy NLP | v2.1 | TextDocument, Token |
+| Gentle Forced Aligner Wrapper | v1.0 | TextDocument, AudioDocument, TimeFrame, Token |
+| Parakeet Wrapper | v1.0 | AudioDocument | VideoDocument |
+| TesseractOCR Wrapper | v2.1 | VideoDocument, TimeFrame |
+| Whisper Wrapper | v15 | AudioDocument | VideoDocument |
+
+### `Sentence` (LAPPS) — 6 app(s)
+
+| App | Version | Inputs |
+|-----|---------|--------|
+| CLAMS docTR Wrapper | v1.4 | VideoDocument, TimeFrame |
+| CLAMS wrapper for spaCy NLP | v2.1 | TextDocument, Token |
+| Distil Whisper Wrapper | v1.2 | AudioDocument | VideoDocument |
+| Parakeet Wrapper | v1.0 | AudioDocument | VideoDocument |
+| TesseractOCR Wrapper | v2.1 | VideoDocument, TimeFrame |
+| Whisper Wrapper | v15 | AudioDocument | VideoDocument |
+
+### `BoundingBox` (MMIF) — 4 app(s)
+
+| App | Version | Inputs |
+|-----|---------|--------|
+| CLAMS docTR Wrapper | v1.4 | VideoDocument, TimeFrame |
+| EAST Text Detection | v1.2 | VideoDocument | ImageDocument, TimeFrame |
+| Easyocr Wrapper | v1.1 | VideoDocument, TimeFrame |
+| TesseractOCR Wrapper | v2.1 | VideoDocument, TimeFrame |
+
+### `NamedEntity` (LAPPS) — 2 app(s)
+
+| App | Version | Inputs |
+|-----|---------|--------|
+| CLAMS wrapper for spaCy NLP | v2.1 | TextDocument, Token |
+| Dbpedia Spotlight Wrapper | v1.2 | TextDocument |
+
+### `Paragraph` (LAPPS) — 2 app(s)
+
+| App | Version | Inputs |
+|-----|---------|--------|
+| CLAMS docTR Wrapper | v1.4 | VideoDocument, TimeFrame |
+| TesseractOCR Wrapper | v2.1 | VideoDocument, TimeFrame |
+
+### `TimePoint` (MMIF) — 2 app(s)
+
+| App | Version | Inputs |
+|-----|---------|--------|
+| Easyocr Wrapper | v1.1 | VideoDocument, TimeFrame |
+| Scenes-with-text Detection | v8.4 | VideoDocument |
+
+### `NounChunk` (LAPPS) — 1 app(s)
+
+| App | Version | Inputs |
+|-----|---------|--------|
+| CLAMS wrapper for spaCy NLP | v2.1 | TextDocument, Token |
+
+### `Token#lemma` (LAPPS) — 1 app(s)
+
+| App | Version | Inputs |
+|-----|---------|--------|
+| CLAMS wrapper for spaCy NLP | v2.1 | TextDocument, Token |
+
+### `Token#pos` (LAPPS) — 1 app(s)
+
+| App | Version | Inputs |
+|-----|---------|--------|
+| CLAMS wrapper for spaCy NLP | v2.1 | TextDocument, Token |
+
+## Apps Grouped by Shared Input Type
+
+### `VideoDocument` (MMIF) — 13 app(s)
+
+| App | Version | Outputs |
+|-----|---------|---------|
+| Bars Detection | v1.1 | TimeFrame |
+| Chyron Detection | v1.0 | TimeFrame |
+| CLAMS docTR Wrapper | v1.4 | TextDocument, Token, Sentence, Paragraph, Alignment, BoundingBox |
+| Easyocr Wrapper | v1.1 | TextDocument, Alignment, BoundingBox, TimePoint |
+| Few Shot Classifier | v1.0 | TimeFrame |
+| LLaVA Captioner | v1.3 | Alignment, TextDocument |
+| Parseq OCR Wrapper | v1.0 | TextDocument, Alignment |
+| Pyscenedetect Wrapper | v4 | TimeFrame |
+| Scenes-with-text Detection | v8.4 | TimeFrame, TimePoint |
+| Slate Detection | v2.1 | TimeFrame |
+| SmolVLM2 Captioner | v0.3 | Alignment, TextDocument |
+| Tesseract OCR Wrapper | v1.0 | TextDocument, Alignment |
+| TesseractOCR Wrapper | v2.1 | TextDocument, Token, Sentence, Paragraph, Alignment, BoundingBox |
+
+### `AudioDocument | VideoDocument` (MMIF) — 9 app(s)
+
+| App | Version | Outputs |
+|-----|---------|---------|
+| AAPB-PUA Kaldi Wrapper | v2 | TextDocument, TimeFrame, Alignment, Token |
+| CLAMS NFA Wrapper | v0.1 | Token, TimeFrame, Alignment |
+| Distil Whisper Wrapper | v1.2 | TextDocument, TimeFrame, Alignment, Sentence |
+| inaSpeechSegmenter Wrapper | v2.1 | TimeFrame |
+| Parakeet Wrapper | v1.0 | TextDocument, TimeFrame, Alignment, Token, Sentence |
+| Simple Timepoints Stitcher | v3.0 | TimeFrame |
+| Spoken Language Identification | v0.3 | TimeFrame |
+| Tonedetection | v2.0 | TimeFrame |
+| Whisper Wrapper | v15 | TextDocument, TimeFrame, Alignment, Token, Sentence |
+
+### `TimeFrame` (MMIF) — 9 app(s)
+
+| App | Version | Outputs |
+|-----|---------|---------|
+| CLAMS docTR Wrapper | v1.4 | TextDocument, Token, Sentence, Paragraph, Alignment, BoundingBox |
+| EAST Text Detection | v1.2 | BoundingBox |
+| Easyocr Wrapper | v1.1 | TextDocument, Alignment, BoundingBox, TimePoint |
+| Gentle Forced Aligner Wrapper | v1.0 | Token, TimeFrame, Alignment |
+| LLaVA Captioner | v1.3 | Alignment, TextDocument |
+| SmolVLM2 Captioner | v0.3 | Alignment, TextDocument |
+| Tesseract OCR Wrapper | v1.0 | TextDocument, Alignment |
+| TesseractOCR Wrapper | v2.1 | TextDocument, Token, Sentence, Paragraph, Alignment, BoundingBox |
+| Text Slicer | v1.0 | TextDocument, Alignment |
+
+### `TextDocument` (MMIF) — 7 app(s)
+
+| App | Version | Outputs |
+|-----|---------|---------|
+| CLAMS NFA Wrapper | v0.1 | Token, TimeFrame, Alignment |
+| CLAMS wrapper for spaCy NLP | v2.1 | Token, Token#pos, Token#lemma, NounChunk, Sentence, NamedEntity |
+| Dbpedia Spotlight Wrapper | v1.2 | NamedEntity |
+| Gentle Forced Aligner Wrapper | v1.0 | Token, TimeFrame, Alignment |
+| Heuristic Chyron Understanding | v0.2 | TextDocument |
+| Text Slicer | v1.0 | TextDocument, Alignment |
+| Tfidf Keywordextractor | v1.0 | TextDocument, Alignment |
+
+### `AudioDocument` (MMIF) — 2 app(s)
+
+| App | Version | Outputs |
+|-----|---------|---------|
+| Brandeis ACS Wrapper | v2 | TimeFrame |
+| Gentle Forced Aligner Wrapper | v1.0 | Token, TimeFrame, Alignment |
+
+### `BoundingBox` (MMIF) — 2 app(s)
+
+| App | Version | Outputs |
+|-----|---------|---------|
+| Parseq OCR Wrapper | v1.0 | TextDocument, Alignment |
+| Tesseract OCR Wrapper | v1.0 | TextDocument, Alignment |
+
+### `ImageDocument` (MMIF) — 2 app(s)
+
+| App | Version | Outputs |
+|-----|---------|---------|
+| LLaVA Captioner | v1.3 | Alignment, TextDocument |
+| SmolVLM2 Captioner | v0.3 | Alignment, TextDocument |
+
+### `Token` (LAPPS) — 2 app(s)
+
+| App | Version | Outputs |
+|-----|---------|---------|
+| CLAMS wrapper for spaCy NLP | v2.1 | Token, Token#pos, Token#lemma, NounChunk, Sentence, NamedEntity |
+| Gentle Forced Aligner Wrapper | v1.0 | Token, TimeFrame, Alignment |
+
+### `TimePoint` (MMIF) — 1 app(s)
+
+| App | Version | Outputs |
+|-----|---------|---------|
+| Simple Timepoints Stitcher | v3.0 | TimeFrame |
+
+### `VideoDocument | ImageDocument` (MMIF) — 1 app(s)
+
+| App | Version | Outputs |
+|-----|---------|---------|
+| EAST Text Detection | v1.2 | BoundingBox |
+
+## Full App I/O Reference
+
+| App | Version | Output Pattern | Input Types | Output Types |
+|-----|---------|----------------|-------------|--------------|
+| AAPB-PUA Kaldi Wrapper | v2 | TextDocument + TimeFrame (+ Alignment) | AudioDocument | VideoDocument | TextDocument, TimeFrame, Alignment, Token |
+| Bars Detection | v1.1 | TimeFrame | VideoDocument | TimeFrame |
+| Brandeis ACS Wrapper | v2 | TimeFrame | AudioDocument | TimeFrame |
+| Chyron Detection | v1.0 | TimeFrame | VideoDocument | TimeFrame |
+| CLAMS docTR Wrapper | v1.4 | TextDocument + BoundingBox (+ Alignment) | VideoDocument, TimeFrame | TextDocument, Token, Sentence, Paragraph, Alignment, BoundingBox |
+| CLAMS NFA Wrapper | v0.1 | TextDocument + TimeFrame (+ Alignment) | AudioDocument | VideoDocument, TextDocument | Token, TimeFrame, Alignment |
+| CLAMS wrapper for spaCy NLP | v2.1 | NamedEntity | TextDocument, Token | Token, Token#pos, Token#lemma, NounChunk, Sentence, NamedEntity |
+| Dbpedia Spotlight Wrapper | v1.2 | NamedEntity | TextDocument | NamedEntity |
+| Distil Whisper Wrapper | v1.2 | TextDocument + TimeFrame (+ Alignment) | AudioDocument | VideoDocument | TextDocument, TimeFrame, Alignment, Sentence |
+| EAST Text Detection | v1.2 | BoundingBox (no TextDocument) | VideoDocument | ImageDocument, TimeFrame | BoundingBox |
+| Easyocr Wrapper | v1.1 | TextDocument + BoundingBox (+ Alignment) | VideoDocument, TimeFrame | TextDocument, Alignment, BoundingBox, TimePoint |
+| Few Shot Classifier | v1.0 | TimeFrame | VideoDocument | TimeFrame |
+| Gentle Forced Aligner Wrapper | v1.0 | TextDocument + TimeFrame (+ Alignment) | TextDocument, AudioDocument, TimeFrame, Token | Token, TimeFrame, Alignment |
+| Heuristic Chyron Understanding | v0.2 | — | TextDocument | TextDocument |
+| inaSpeechSegmenter Wrapper | v2.1 | TimeFrame | AudioDocument | VideoDocument | TimeFrame |
+| LLaVA Captioner | v1.3 | TextDocument from visual input (no BoundingBox) | VideoDocument, ImageDocument, TimeFrame | Alignment, TextDocument |
+| Parakeet Wrapper | v1.0 | TextDocument + TimeFrame (+ Alignment) | AudioDocument | VideoDocument | TextDocument, TimeFrame, Alignment, Token, Sentence |
+| Parseq OCR Wrapper | v1.0 | — | VideoDocument, BoundingBox | TextDocument, Alignment |
+| Pyscenedetect Wrapper | v4 | TimeFrame | VideoDocument | TimeFrame |
+| Scenes-with-text Detection | v8.4 | TimeFrame | VideoDocument | TimeFrame, TimePoint |
+| Simple Timepoints Stitcher | v3.0 | TimeFrame | AudioDocument | VideoDocument, TimePoint | TimeFrame |
+| Slate Detection | v2.1 | TimeFrame | VideoDocument | TimeFrame |
+| SmolVLM2 Captioner | v0.3 | TextDocument from visual input (no BoundingBox) | VideoDocument, ImageDocument, TimeFrame | Alignment, TextDocument |
+| Spoken Language Identification | v0.3 | TimeFrame | AudioDocument | VideoDocument | TimeFrame |
+| Tesseract OCR Wrapper | v1.0 | — | VideoDocument, BoundingBox, TimeFrame | TextDocument, Alignment |
+| TesseractOCR Wrapper | v2.1 | TextDocument + BoundingBox (+ Alignment) | VideoDocument, TimeFrame | TextDocument, Token, Sentence, Paragraph, Alignment, BoundingBox |
+| Text Slicer | v1.0 | — | TimeFrame, TextDocument | TextDocument, Alignment |
+| Tfidf Keywordextractor | v1.0 | — | TextDocument | TextDocument, Alignment |
+| Tonedetection | v2.0 | TimeFrame | AudioDocument | VideoDocument | TimeFrame |
+| Whisper Wrapper | v15 | TextDocument + TimeFrame (+ Alignment) | AudioDocument | VideoDocument | TextDocument, TimeFrame, Alignment, Token, Sentence |
+
+## Anchor Type and Alignment Analysis
+
+Anchor types establish coordinate systems that other annotations reference. Temporal anchors: `TimeFrame`, `TimePoint`. Image anchors: `BoundingBox`. Textual anchors: LAPPS `Span` subtypes (`Token`, `Sentence`, `Paragraph`, `NamedEntity`, …).
+
+### Anchor Types Produced per App
+
+| App | Temporal | Image | Textual | Alignment |
+|-----|----------|-------|---------|-----------|
+| AAPB-PUA Kaldi Wrapper | TimeFrame | — | Token | yes |
+| Bars Detection | TimeFrame | — | — | — |
+| Brandeis ACS Wrapper | TimeFrame | — | — | — |
+| Chyron Detection | TimeFrame | — | — | — |
+| CLAMS docTR Wrapper | — | BoundingBox | Paragraph, Sentence, Token | yes |
+| CLAMS NFA Wrapper | TimeFrame | — | Token | yes |
+| CLAMS wrapper for spaCy NLP | — | — | NamedEntity, NounChunk, Sentence, Token | — |
+| Dbpedia Spotlight Wrapper | — | — | NamedEntity | — |
+| Distil Whisper Wrapper | TimeFrame | — | Sentence | yes |
+| EAST Text Detection | — | BoundingBox | — | — |
+| Easyocr Wrapper | TimePoint | BoundingBox | — | yes |
+| Few Shot Classifier | TimeFrame | — | — | — |
+| Gentle Forced Aligner Wrapper | TimeFrame | — | Token | yes |
+| Heuristic Chyron Understanding | — | — | — | — |
+| inaSpeechSegmenter Wrapper | TimeFrame | — | — | — |
+| LLaVA Captioner | — | — | — | yes |
+| Parakeet Wrapper | TimeFrame | — | Sentence, Token | yes |
+| Parseq OCR Wrapper | — | — | — | yes |
+| Pyscenedetect Wrapper | TimeFrame | — | — | — |
+| Scenes-with-text Detection | TimeFrame, TimePoint | — | — | — |
+| Simple Timepoints Stitcher | TimeFrame | — | — | — |
+| Slate Detection | TimeFrame | — | — | — |
+| SmolVLM2 Captioner | — | — | — | yes |
+| Spoken Language Identification | TimeFrame | — | — | — |
+| Tesseract OCR Wrapper | — | — | — | yes |
+| TesseractOCR Wrapper | — | BoundingBox | Paragraph, Sentence, Token | yes |
+| Text Slicer | — | — | — | yes |
+| Tfidf Keywordextractor | — | — | — | yes |
+| Tonedetection | TimeFrame | — | — | — |
+| Whisper Wrapper | TimeFrame | — | Sentence, Token | yes |
+
+### Alignment Anchor Pairs
+
+Source/target anchor pairs extracted from the `description` metadata field (*described*) or inferred from co-occurring anchor types in the same output list (*inferred*). *Underdeclared*: `Alignment` is declared but anchor types are insufficient to determine pairing.
+
+| App | Left | Right | Classification | Source |
+|-----|------|-------|----------------|--------|
+| AAPB-PUA Kaldi Wrapper | `TextDocument` | `TimeFrame` | document↔temporal | inferred |
+| AAPB-PUA Kaldi Wrapper | `TextDocument` | `Token` | document↔textual | inferred |
+| AAPB-PUA Kaldi Wrapper | `TimeFrame` | `Token` | temporal↔textual | inferred |
+| CLAMS docTR Wrapper | `TimePoint` | `TextDocument` | document↔temporal | described |
+| CLAMS docTR Wrapper | `TimePoint` | `Token` | temporal↔textual | described |
+| CLAMS docTR Wrapper | `TimePoint` | `Sentence` | temporal↔textual | described |
+| CLAMS docTR Wrapper | `TimePoint` | `Paragraph` | temporal↔textual | described |
+| CLAMS docTR Wrapper | `BoundingBox` | `Token` | image↔textual | described |
+| CLAMS docTR Wrapper | `BoundingBox` | `Sentence` | image↔textual | described |
+| CLAMS docTR Wrapper | `BoundingBox` | `Paragraph` | image↔textual | described |
+| CLAMS NFA Wrapper | `Token` | `TimeFrame` | temporal↔textual | described |
+| Distil Whisper Wrapper | `TimeFrame` | `SENTENCE` | single-anchor (temporal) ⚠ | described |
+| Distil Whisper Wrapper | `audio/video document` | `TextDocument` | other | described |
+| Easyocr Wrapper | `TextDocument` | `BoundingBox` | document↔image | inferred |
+| Easyocr Wrapper | `TextDocument` | `TimePoint` | document↔temporal | inferred |
+| Easyocr Wrapper | `BoundingBox` | `TimePoint` | image↔temporal | inferred |
+| Gentle Forced Aligner Wrapper | `TimeFrame` | `Token` | temporal↔textual | inferred |
+| LLaVA Captioner | `TextDocument` | `?` | single-anchor (document) ⚠ | underdeclared |
+| Parakeet Wrapper | `TextDocument` | `TimeFrame` | document↔temporal | inferred |
+| Parakeet Wrapper | `TextDocument` | `Token` | document↔textual | inferred |
+| Parakeet Wrapper | `TextDocument` | `Sentence` | document↔textual | inferred |
+| Parakeet Wrapper | `TimeFrame` | `Token` | temporal↔textual | inferred |
+| Parakeet Wrapper | `TimeFrame` | `Sentence` | temporal↔textual | inferred |
+| Parseq OCR Wrapper | `TextDocument` | `?` | single-anchor (document) ⚠ | underdeclared |
+| SmolVLM2 Captioner | `TextDocument` | `?` | single-anchor (document) ⚠ | underdeclared |
+| Tesseract OCR Wrapper | `TextDocument` | `?` | single-anchor (document) ⚠ | underdeclared |
+| TesseractOCR Wrapper | `TimePoint` | `TextDocument` | document↔temporal | described |
+| TesseractOCR Wrapper | `TimePoint` | `Token` | temporal↔textual | described |
+| TesseractOCR Wrapper | `TimePoint` | `Sentence` | temporal↔textual | described |
+| TesseractOCR Wrapper | `TimePoint` | `Paragraph` | temporal↔textual | described |
+| TesseractOCR Wrapper | `BoundingBox` | `Token` | image↔textual | described |
+| TesseractOCR Wrapper | `BoundingBox` | `Sentence` | image↔textual | described |
+| TesseractOCR Wrapper | `BoundingBox` | `Paragraph` | image↔textual | described |
+| Text Slicer | `TextDocument` | `?` | single-anchor (document) ⚠ | underdeclared |
+| Tfidf Keywordextractor | `TextDocument` | `?` | single-anchor (document) ⚠ | underdeclared |
+| Whisper Wrapper | `TextDocument` | `TimeFrame` | document↔temporal | inferred |
+| Whisper Wrapper | `TextDocument` | `Token` | document↔textual | inferred |
+| Whisper Wrapper | `TextDocument` | `Sentence` | document↔textual | inferred |
+| Whisper Wrapper | `TimeFrame` | `Token` | temporal↔textual | inferred |
+| Whisper Wrapper | `TimeFrame` | `Sentence` | temporal↔textual | inferred |
+
diff --git a/scripts/generate_io_type_report.py b/scripts/generate_io_type_report.py
new file mode 100644
index 0000000..5c8c310
--- /dev/null
+++ b/scripts/generate_io_type_report.py
@@ -0,0 +1,579 @@
+"""Generate I/O type aggregation report for published CLAMS apps.
+
+Iterates all app version directories under ``docs/_apps/``, identifies
+the latest version of each app, and groups them by shared input/output
+annotation types.  The result is written to a markdown file.
+
+Usage::
+
+    python scripts/io_type_report.py > report.md
+"""
+
+import json
+import re
+from collections import defaultdict
+from datetime import datetime
+from pathlib import Path
+
+
+APPS_DIR = Path('docs/_apps')
+
+
+# ── anchor taxonomy ──────────────────────────────────────────────────
+
+TEMPORAL_ANCHORS = frozenset({'TimeFrame', 'TimePoint'})
+IMAGE_ANCHORS = frozenset({'BoundingBox'})
+TEXTUAL_ANCHORS = frozenset({           # LAPPS Span subtypes
+    'Token', 'Sentence', 'Paragraph', 'NounChunk',
+    'NamedEntity', 'Markable', 'Span',
+})
+DOCUMENT_TYPES = frozenset({
+    'TextDocument', 'AudioDocument', 'VideoDocument', 'ImageDocument',
+})
+AUDIO_CAPABLE = frozenset({'AudioDocument', 'VideoDocument'})
+
+OUTPUT_PATTERN_ORDER = [
+    'TextDocument + TimeFrame (+ Alignment)',
+    'TextDocument + BoundingBox (+ Alignment)',
+    'TextDocument from visual input (no BoundingBox)',
+    'TimeFrame',
+    'NamedEntity',
+    'BoundingBox (no TextDocument)',
+    'TimePoint',
+    'Unclassified',
+]
+
+
+# ── helpers ──────────────────────────────────────────────────────────
+
+def version_sort_key(version_str: str) -> tuple:
+    """Return a tuple suitable for sorting version strings like v1, v2.1, v15."""
+    v = version_str.lstrip('v')
+    parts = re.split(r'[.\-]', v)
+    result = []
+    for p in parts:
+        try:
+            result.append(int(p))
+        except ValueError:
+            result.append(0)
+    return tuple(result)
+
+
+def extract_type_uri(item) -> str | tuple:
+    """Extract the ``@type`` URI from an I/O spec entry.
+
+    An entry can be:
+      - a dict with ``@type``
+      - a plain string (older metadata format)
+      - a list of dicts (OR‑alternatives)
+    """
+    if isinstance(item, dict):
+        return item.get('@type', '')
+    if isinstance(item, str):
+        return item
+    if isinstance(item, list):
+        return tuple(extract_type_uri(i) for i in item)
+    return str(item)
+
+
+def base_type_name(type_uri: str | tuple) -> str:
+    """Extract a human‑readable *base* type name from a full URI.
+
+    Examples:
+      ``http://mmif.clams.ai/vocabulary/TimeFrame/v5``  -> ``TimeFrame``
+      ``http://vocab.lappsgrid.org/Token``               -> ``Token``
+      ``http://vocab.lappsgrid.org/Token#pos``           -> ``Token#pos``
+    """
+    if isinstance(type_uri, tuple):
+        return ' | '.join(base_type_name(t) for t in type_uri)
+
+    # property sub‑types  (Token#pos, Token#lemma)
+    if '#' in type_uri:
+        base_part, prop = type_uri.rsplit('#', 1)
+        return f'{base_type_name(base_part)}#{prop}'
+
+    parts = type_uri.rstrip('/').split('/')
+    # walk backwards; the first segment that looks like a version tag means
+    # the previous segment is the type name
+    for i in range(len(parts) - 1, 0, -1):
+        segment = parts[i]
+        if re.match(r'^v\d', segment):
+            return parts[i - 1]
+    # no version tag – last segment *is* the type name (LAPPS style)
+    return parts[-1] if parts else type_uri
+
+
+def vocab_source(type_uri: str | tuple) -> str:
+    """Return ``MMIF`` or ``LAPPS`` depending on the URI namespace."""
+    if isinstance(type_uri, tuple):
+        sources = sorted({vocab_source(t) for t in type_uri})
+        return '/'.join(sources)
+    if 'mmif.clams.ai' in type_uri:
+        return 'MMIF'
+    if 'lappsgrid.org' in type_uri:
+        return 'LAPPS'
+    return 'other'
+
+
+def short_app_name(identifier: str) -> str:
+    """``http://apps.clams.ai/whisper-wrapper/v15`` -> ``whisper-wrapper``"""
+    # strip version suffix, then take last path component
+    base = identifier.rsplit('/', 1)[0]
+    return base.rstrip('/').rsplit('/', 1)[-1]
+
+
+def _flatten_type_names(items: list) -> set[str]:
+    """Return the set of all base type names from an I/O list,
+    expanding OR-alternative sub-lists into individual names."""
+    names = set()
+    for item in items:
+        uri = extract_type_uri(item)
+        if isinstance(uri, tuple):
+            for u in uri:
+                names.add(base_type_name(u))
+        else:
+            names.add(base_type_name(uri))
+    return names
+
+
+def anchor_category(type_name: str) -> str | None:
+    """Classify a base type name as an anchor category.
+
+    :returns: ``'temporal'``, ``'image'``, ``'textual'``,
+              ``'document'``, or ``None`` if not an anchor.
+    """
+    if type_name in TEMPORAL_ANCHORS:
+        return 'temporal'
+    if type_name in IMAGE_ANCHORS:
+        return 'image'
+    if type_name in TEXTUAL_ANCHORS:
+        return 'textual'
+    if type_name in DOCUMENT_TYPES:
+        return 'document'
+    return None
+
+
+def infer_output_pattern(app: dict) -> str | None:
+    """Classify an app by its output type signature.
+
+    Rules applied in priority order:
+
+    - ``TextDocument + BoundingBox``: output has both
+    - ``TextDocument + TimeFrame``: output has both; or TextDocument
+      with audio input; or TimeFrame output with text + audio inputs
+      (forced alignment)
+    - ``TextDocument from visual input``: output has TextDocument (no BB),
+      input includes visual media but not pre-located BoundingBox
+    - ``NamedEntity``: output has NamedEntity
+    - ``BoundingBox`` (no TextDocument): output has BoundingBox only
+    - ``TimeFrame``: output has TimeFrame (no TextDocument)
+    - ``TimePoint``: output has TimePoint only
+
+    :returns: Pattern string from ``OUTPUT_PATTERN_ORDER``, or ``None``.
+    """
+    out = _flatten_type_names(app.get('output', []))
+    inp = _flatten_type_names(app.get('input', []))
+
+    def has_out(*types): return any(t in out for t in types)
+    def has_inp(*types): return any(t in inp for t in types)
+
+    if has_out('TextDocument') and has_out('BoundingBox'):
+        return 'TextDocument + BoundingBox (+ Alignment)'
+    if has_out('TextDocument') and has_out('TimeFrame'):
+        return 'TextDocument + TimeFrame (+ Alignment)'
+    if has_out('TextDocument'):
+        if has_inp('AudioDocument'):
+            return 'TextDocument + TimeFrame (+ Alignment)'
+        # recognition-only: consumes pre-located BB regions
+        if has_inp('BoundingBox'):
+            return None
+        # visual media input → TD from visual source, no BB
+        if has_inp('VideoDocument') or has_inp('ImageDocument'):
+            return 'TextDocument from visual input (no BoundingBox)'
+        return None  # text-processing utility or unclassified
+    if has_out('NamedEntity'):
+        return 'NamedEntity'
+    if has_out('BoundingBox'):
+        return 'BoundingBox (no TextDocument)'
+    # Forced alignment: TimeFrame output + text + audio inputs
+    if (has_out('TimeFrame')
+            and has_inp('TextDocument')
+            and AUDIO_CAPABLE & inp):
+        return 'TextDocument + TimeFrame (+ Alignment)'
+    if has_out('TimeFrame'):
+        return 'TimeFrame'
+    if has_out('TimePoint'):
+        return 'TimePoint'
+    return None
+
+
+# ── alignment description parsing ────────────────────────────────────
+
+def _parse_alignment_pairs(desc: str) -> list[tuple[str, str]]:
+    """Extract ``(left_type, right_type)`` pairs from a free-text
+    Alignment ``description`` field.
+
+    Handles numbered-list format::
+
+        1) `A` <-> `B`/`C`, 2) `D` <-> `E`
+
+    and prose format::
+
+        Alignment between `A` and `B` annotations.
+    """
+    pairs = []
+    clauses = re.split(r'\d+\)', desc)
+    for clause in clauses:
+        halves = re.split(r'<->|↔|\band\b', clause, maxsplit=1)
+        if len(halves) < 2:
+            continue
+        left_types = re.findall(r'`([^`]+)`', halves[0])
+        right_types = re.findall(r'`([^`]+)`', halves[1])
+        if not left_types or not right_types:
+            continue
+        for ltype in left_types:
+            for rtype in right_types:
+                pairs.append((ltype.strip(), rtype.strip()))
+    return pairs
+
+
+def _is_alignment(o: dict) -> bool:
+    return isinstance(o, dict) and 'lignment' in o.get('@type', '')
+
+
+def alignment_anchor_info(
+        app: dict) -> tuple[list[tuple[str, str]], str]:
+    """Return ``(pairs, source)`` describing an app's Alignment output.
+
+    ``pairs`` is a list of ``(left_type, right_type)`` base-name strings.
+    ``source`` is one of ``'described'``, ``'inferred'``,
+    ``'underdeclared'``, or ``'n/a'``.
+
+    Source precedence:
+
+    - ``described``: extracted from the ``description`` field
+    - ``inferred``: deduced from co-occurring anchor types in output
+    - ``underdeclared``: Alignment present but insufficient anchor
+      information to determine pairing
+    """
+    align_entry = next(
+        (o for o in app.get('output', []) if _is_alignment(o)), None)
+    if align_entry is None:
+        return [], 'n/a'
+
+    desc = align_entry.get('description', '')
+    if desc:
+        pairs = _parse_alignment_pairs(desc)
+        if pairs:
+            return pairs, 'described'
+
+    # Infer from co-occurring anchor types in output
+    by_cat: dict[str, list[str]] = defaultdict(list)
+    for o in app.get('output', []):
+        if o is align_entry:
+            continue
+        n = base_type_name(extract_type_uri(o)) if isinstance(o, dict) \
+            else str(o)
+        cat = anchor_category(n)
+        if cat and n not in by_cat[cat]:
+            by_cat[cat].append(n)
+
+    cats = sorted(by_cat)
+    if len(cats) >= 2:
+        pairs = []
+        for i, c1 in enumerate(cats):
+            for c2 in cats[i + 1:]:
+                for n1 in by_cat[c1]:
+                    for n2 in by_cat[c2]:
+                        pairs.append((n1, n2))
+        return pairs, 'inferred'
+    if len(cats) == 1:
+        return [(n, '?') for n in by_cat[cats[0]]], 'underdeclared'
+    return [], 'underdeclared'
+
+
+def _pair_classification(left: str, right: str) -> str:
+    """Classify an alignment pair by anchor categories."""
+    lcat = anchor_category(left)
+    rcat = anchor_category(right) if right != '?' else None
+    if right == '?' or rcat is None:
+        lc = lcat or 'unknown'
+        return f'single-anchor ({lc}) ⚠'
+    if lcat is None or rcat is None:
+        return 'other'
+    if lcat == rcat:
+        return f'same-modal ({lcat}) ⚠'
+    cats = tuple(sorted([lcat, rcat]))
+    return f'{cats[0]}↔{cats[1]}'
+
+
+# ── data loading ─────────────────────────────────────────────────────
+
+def load_latest_apps() -> list[dict]:
+    """Walk ``docs/_apps/*/v*/metadata.json`` and return latest version
+    metadata for each app."""
+    apps = defaultdict(list)
+    for metadata_f in APPS_DIR.glob('*/v*/metadata.json'):
+        with open(metadata_f) as fh:
+            meta = json.load(fh)
+        app_dir_name = metadata_f.parent.parent.name
+        apps[app_dir_name].append(meta)
+
+    latest = []
+    for app_dir_name, versions in sorted(apps.items()):
+        versions.sort(key=lambda m: version_sort_key(m.get('app_version', '')))
+        latest.append(versions[-1])       # highest version
+    return latest
+
+
+# ── report section generators ─────────────────────────────────────────
+
+def _pattern_section(apps: list[dict]) -> list[str]:
+    """Return lines for the output type pattern grouping section."""
+    by_pattern: dict[str, list[dict]] = defaultdict(list)
+    for app in apps:
+        key = infer_output_pattern(app) or 'Unclassified'
+        by_pattern[key].append(app)
+
+    lines: list[str] = []
+    w = lines.append
+    w('## Apps Grouped by Output Type Pattern')
+    w('')
+    w('Grouped by the combination of output annotation types, '
+      'inferred from declared I/O metadata.')
+    w('')
+    for pattern in OUTPUT_PATTERN_ORDER:
+        if pattern not in by_pattern:
+            continue
+        group = sorted(
+            by_pattern[pattern], key=lambda a: a.get('name', '').lower())
+        w(f'### {pattern} ({len(group)})')
+        w('')
+        w('| App | Version | Output Types |')
+        w('|-----|---------|--------------|')
+        for app in group:
+            name = app.get('name', '?')
+            ver = app.get('app_version', '?')
+            outputs = _format_io_list(app.get('output', []), 'output')
+            w(f'| {name} | {ver} | {outputs} |')
+        w('')
+    return lines
+
+
+def _anchor_section(apps: list[dict]) -> list[str]:
+    """Return lines for the anchor type and Alignment analysis section."""
+    lines: list[str] = []
+    w = lines.append
+    w('## Anchor Type and Alignment Analysis')
+    w('')
+    w('Anchor types establish coordinate systems that other annotations '
+      'reference. Temporal anchors: `TimeFrame`, `TimePoint`. '
+      'Image anchors: `BoundingBox`. '
+      'Textual anchors: LAPPS `Span` subtypes '
+      '(`Token`, `Sentence`, `Paragraph`, `NamedEntity`, …).')
+    w('')
+
+    w('### Anchor Types Produced per App')
+    w('')
+    w('| App | Temporal | Image | Textual | Alignment |')
+    w('|-----|----------|-------|---------|-----------|')
+    for app in sorted(apps, key=lambda a: a.get('name', '').lower()):
+        name = app.get('name', '?')
+        temporal, image, textual = [], [], []
+        has_align = False
+        for o in app.get('output', []):
+            if _is_alignment(o):
+                has_align = True
+                continue
+            n = base_type_name(extract_type_uri(o)) \
+                if isinstance(o, dict) else str(o)
+            cat = anchor_category(n)
+            if cat == 'temporal':
+                temporal.append(n)
+            elif cat == 'image':
+                image.append(n)
+            elif cat == 'textual' and n not in textual:
+                textual.append(n)
+        t = ', '.join(temporal) or '—'
+        i = ', '.join(image) or '—'
+        x = ', '.join(sorted(textual)) or '—'
+        a = 'yes' if has_align else '—'
+        w(f'| {name} | {t} | {i} | {x} | {a} |')
+    w('')
+
+    w('### Alignment Anchor Pairs')
+    w('')
+    w('Source/target anchor pairs extracted from the `description` '
+      'metadata field (*described*) or inferred from co-occurring '
+      'anchor types in the same output list (*inferred*). '
+      '*Underdeclared*: `Alignment` is declared but anchor types '
+      'are insufficient to determine pairing.')
+    w('')
+    w('| App | Left | Right | Classification | Source |')
+    w('|-----|------|-------|----------------|--------|')
+    for app in sorted(apps, key=lambda a: a.get('name', '').lower()):
+        pairs, source = alignment_anchor_info(app)
+        if source == 'n/a':
+            continue
+        name = app.get('name', '?')
+        if not pairs:
+            w(f'| {name} | — | — | underdeclared | — |')
+            continue
+        for left, right in pairs:
+            cls = _pair_classification(left, right)
+            w(f'| {name} | `{left}` | `{right}` | {cls} | {source} |')
+    w('')
+    return lines
+
+
+# ── report generation ────────────────────────────────────────────────
+
+def build_io_maps(apps: list[dict]):
+    """Return dicts mapping base‑type‑name -> list of app dicts, for both
+    input and output types."""
+    out_map: dict[str, list[dict]] = defaultdict(list)
+    in_map: dict[str, list[dict]] = defaultdict(list)
+
+    for app in apps:
+        for o in app.get('output', []):
+            uri = extract_type_uri(o)
+            name = base_type_name(uri)
+            out_map[name].append(app)
+
+        for i in app.get('input', []):
+            uri = extract_type_uri(i)
+            name = base_type_name(uri)
+            in_map[name].append(app)
+
+    return out_map, in_map
+
+
+def _format_io_list(items, direction: str) -> str:
+    """Return a comma‑separated list of base type names for one app's
+    input or output list."""
+    names = []
+    for item in items:
+        uri = extract_type_uri(item)
+        names.append(base_type_name(uri))
+    return ', '.join(names) if names else '(none)'
+
+
+def generate_report(apps: list[dict]) -> str:
+    out_map, in_map = build_io_maps(apps)
+
+    lines: list[str] = []
+    w = lines.append          # shorthand
+
+    w('# CLAMS App I/O Types Report')
+    w('')
+    w(f'*Auto‑generated by `scripts/io_type_report.py` on '
+      f'{datetime.now().strftime("%Y-%m-%d")}*')
+    w('')
+    w('This report lists the input/output annotation types declared by the '
+      'latest version of every published CLAMS app and groups apps that share '
+      'the same types.')
+    w('')
+
+    # ── summary ──────────────────────────────────────────────────────
+    w('## Summary')
+    w('')
+    w(f'| Metric | Count |')
+    w(f'|--------|------:|')
+    w(f'| Published apps (latest versions) | {len(apps)} |')
+    w(f'| Distinct output types | {len(out_map)} |')
+    w(f'| Distinct input types | {len(in_map)} |')
+    w('')
+
+    # ── proposed pattern grouping ─────────────────────────────────────
+    lines.extend(_pattern_section(apps))
+
+    # ── grouped by output type ───────────────────────────────────────
+    w('## Apps Grouped by Shared Output Type')
+    w('')
+
+    for type_name, type_apps in sorted(out_map.items(),
+                                       key=lambda kv: (-len(kv[1]), kv[0])):
+        # collect a sample URI so we can show the vocab source
+        sample_uri = None
+        for app in type_apps:
+            for o in app.get('output', []):
+                uri = extract_type_uri(o)
+                if base_type_name(uri) == type_name:
+                    sample_uri = uri
+                    break
+            if sample_uri:
+                break
+        source_tag = f' ({vocab_source(sample_uri)})' if sample_uri else ''
+
+        w(f'### `{type_name}`{source_tag} — {len(type_apps)} app(s)')
+        w('')
+        w('| App | Version | Inputs |')
+        w('|-----|---------|--------|')
+        for app in sorted(type_apps, key=lambda a: a.get('name', '').lower()):
+            name = app.get('name', '?')
+            ver = app.get('app_version', '?')
+            inputs = _format_io_list(app.get('input', []), 'input')
+            w(f'| {name} | {ver} | {inputs} |')
+        w('')
+
+    # ── grouped by input type ────────────────────────────────────────
+    w('## Apps Grouped by Shared Input Type')
+    w('')
+
+    for type_name, type_apps in sorted(in_map.items(),
+                                       key=lambda kv: (-len(kv[1]), kv[0])):
+        sample_uri = None
+        for app in type_apps:
+            for i in app.get('input', []):
+                uri = extract_type_uri(i)
+                if base_type_name(uri) == type_name:
+                    sample_uri = uri
+                    break
+            if sample_uri:
+                break
+        source_tag = f' ({vocab_source(sample_uri)})' if sample_uri else ''
+
+        w(f'### `{type_name}`{source_tag} — {len(type_apps)} app(s)')
+        w('')
+        w('| App | Version | Outputs |')
+        w('|-----|---------|---------|')
+        for app in sorted(type_apps, key=lambda a: a.get('name', '').lower()):
+            name = app.get('name', '?')
+            ver = app.get('app_version', '?')
+            outputs = _format_io_list(app.get('output', []), 'output')
+            w(f'| {name} | {ver} | {outputs} |')
+        w('')
+
+    # ── full reference table ─────────────────────────────────────────
+    w('## Full App I/O Reference')
+    w('')
+    w('| App | Version | Output Pattern | Input Types | Output Types |')
+    w('|-----|---------|----------------|-------------|--------------|')
+    for app in sorted(apps, key=lambda a: a.get('name', '').lower()):
+        name = app.get('name', '?')
+        ver = app.get('app_version', '?')
+        pattern = infer_output_pattern(app) or '—'
+        inputs = _format_io_list(app.get('input', []), 'input')
+        outputs = _format_io_list(app.get('output', []), 'output')
+        w(f'| {name} | {ver} | {pattern} | {inputs} | {outputs} |')
+    w('')
+
+    # ── anchor type and alignment analysis ───────────────────────────
+    lines.extend(_anchor_section(apps))
+
+    return '\n'.join(lines)
+
+
+# ── main ─────────────────────────────────────────────────────────────
+
+def main():
+    import sys
+    latest_apps = load_latest_apps()
+    print(f'Loaded {len(latest_apps)} apps (latest versions) '
+          f'from {APPS_DIR}', file=sys.stderr)
+
+    print(generate_report(latest_apps))
+
+
+if __name__ == '__main__':
+    main()