From 398eb21277ad7783d5ac05818711d0f21d437316 Mon Sep 17 00:00:00 2001
From: Keigh Rim <keigh.rim@gmail.com>
Date: Sat, 14 Feb 2026 17:09:29 -0500
Subject: [PATCH 1/3] Updated `mmif describe` implementation to be based on
 pydantic for better documentation

---
 build-tools/requirements.docs.txt |   3 +-
 documentation/conf.py             |   9 +
 mmif/utils/cli/__init__.py        |   4 +-
 mmif/utils/cli/describe.py        |  88 ++++---
 mmif/utils/workflow_helper.py     | 414 ++++++++++++++++++------------
 requirements.txt                  |   1 +
 6 files changed, 312 insertions(+), 207 deletions(-)

diff --git a/build-tools/requirements.docs.txt b/build-tools/requirements.docs.txt
index 8d9ee33d..db2d03d8 100644
--- a/build-tools/requirements.docs.txt
+++ b/build-tools/requirements.docs.txt
@@ -1,3 +1,4 @@
-sphinx>=7.0,<8.0
+sphinx
 furo
 m2r2
+autodoc-pydantic
diff --git a/documentation/conf.py b/documentation/conf.py
index 121054ef..f309f548 100644
--- a/documentation/conf.py
+++ b/documentation/conf.py
@@ -33,6 +33,7 @@
     'undoc-members': True,
     'show-inheritance': True,
 }
+autodoc_member_order = 'bysource'
 
 
 # -- Project information -----------------------------------------------------
@@ -55,8 +56,16 @@
     'sphinx.ext.autodoc',
     'sphinx.ext.linkcode',
     'm2r2',
+    'sphinxcontrib.autodoc_pydantic',
 ]
 
+autodoc_pydantic_model_show_json = True
+autodoc_pydantic_model_show_field_summary = True
+autodoc_pydantic_model_show_config_summary = False
+autodoc_pydantic_model_show_validator_members = False
+autodoc_pydantic_model_show_validator_summary = False
+autodoc_pydantic_field_list_validators = False
+
 templates_path = ['_templates']
 exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
 # dynamically generated files
diff --git a/mmif/utils/cli/__init__.py b/mmif/utils/cli/__init__.py
index 9b91b60c..935ab0a7 100644
--- a/mmif/utils/cli/__init__.py
+++ b/mmif/utils/cli/__init__.py
@@ -28,6 +28,7 @@ def open_cli_io_arg(path_or_dash: Optional[str],
     manager.
 
     Handles the common CLI pattern where:
+
     - '-' means stdin (read mode) or stdout (write mode)
     - None means "argument not provided"; when default_stdin=True, it falls back
       to stdin/stdout
@@ -117,7 +118,8 @@ def open_cli_io_arg(path_or_dash: Optional[str],
                 "Expected str or None."
             )
 
-        yield file_handle
+        if file_handle is not None:
+            yield file_handle
 
     finally:
         if should_close and file_handle is not None:
diff --git a/mmif/utils/cli/describe.py b/mmif/utils/cli/describe.py
index 0bbd49a8..d921b329 100644
--- a/mmif/utils/cli/describe.py
+++ b/mmif/utils/cli/describe.py
@@ -1,17 +1,29 @@
 import argparse
 import json
-import os
 import sys
 import textwrap
 from pathlib import Path
-from typing import Union, cast
+from typing import Dict, Type, Union, cast
+
+from pydantic import BaseModel
 
 from mmif.utils.cli import open_cli_io_arg
-from mmif.utils.workflow_helper import generate_workflow_identifier, describe_single_mmif, \
-    describe_mmif_collection
-# gen_param_hash is imported for backward compatibility
-from mmif.utils.workflow_helper import generate_param_hash
 
+# gen_param_hash is imported for backward compatibility
+from mmif.utils.workflow_helper import (
+    CollectionMmifDesc,
+    SingleMmifDesc,
+    describe_mmif_collection,
+    describe_single_mmif,
+    generate_workflow_identifier,
+)
+
+models_to_help = [SingleMmifDesc, CollectionMmifDesc]
+model_modules = set(model.__module__ for model in models_to_help)
+def get_all_models() -> Dict[str, Type[BaseModel]]:
+    return {
+        name: cls for name, cls in models_to_help
+    }
 
 def get_pipeline_specs(mmif_file: Union[str, Path]):
     import warnings
@@ -33,30 +45,11 @@ def describe_argparser():
         'collection of MMIF files.'
     )
 
-    # get and clean docstrings
-    def _extract_describe_docstring(func):
-        doc = func.__doc__.split(':param')[0]
-        # then cut off all lines after `---`
-        doc = doc.split('---')[0]
-        return textwrap.dedent(doc).strip()
-
-    single_doc = _extract_describe_docstring(describe_single_mmif)
-    collection_doc = _extract_describe_docstring(describe_mmif_collection)
-
     additional = textwrap.dedent(f"""
     This command extracts workflow information from a single MMIF file or 
-    summarizes a directory of MMIF files. The output is serialized as JSON and 
-    includes:
+    a directory of MMIF files. The output is serialized as JSON.
     
-    =========================
-    Single MMIF file as input
-    =========================
-{single_doc}
-
-    ==================================
-    A directory of MMIF files as input
-    ==================================
-{collection_doc}
+    Use `--help-schemas` to inspect the structure of the JSON output.
     """)
     return oneliner, additional
 
@@ -67,6 +60,7 @@ def prep_argparser(**kwargs):
         formatter_class=argparse.RawDescriptionHelpFormatter,
         **kwargs
     )
+    
     parser.add_argument(
         "MMIF_FILE",
         nargs="?",
@@ -84,24 +78,43 @@ def prep_argparser(**kwargs):
         action="store_true",
         help="Pretty-print JSON output"
     )
+    parser.add_argument(
+        "--help-schemas",
+        nargs="*",
+        choices=["all"] + [m.__name__ for m in models_to_help],
+        metavar="SCHEMA_NAME",
+        help=f"Print the JSON schema for the output. For human-readable documentation, "
+             f"visit https://clams.ai/mmif-python and see the following modules: "
+             f"{', '.join(model_modules)}.\nOptions: all, {', '.join([m.__name__ for m in models_to_help])}."
+    )
     return parser
 
 
 def main(args):
     """
-    Main entry point for the describe CLI command.
-
-    Reads a MMIF file and outputs a JSON summary containing:
-    
-    - workflow_id: unique identifier for the source and app sequence
-    - stats: view counts, annotation counts (total/per-view/per-type), and lists of error/warning/empty view IDs
-    - views: map of view IDs to app configurations and profiling data
-
-    :param args: Parsed command-line arguments
+    Main block for the describe CLI command.
+    This function basically works as a wrapper around
+    :func:`describe_single_mmif` (for single file input) or 
+    :func:`describe_mmif_collection` (for directory input).
     """
+    if hasattr(args, 'help_schemas') and args.help_schemas is not None:
+        models_map = {m.__name__: m for m in models_to_help}
+        to_show = []
+        if len(args.help_schemas) == 0 or 'all' in args.help_schemas:
+            to_show = models_to_help
+        else:
+            to_show = args.help_schemas
+        
+        for name in to_show:
+            model_cls = models_map[name]
+            schema = model_cls.model_json_schema()
+            print(json.dumps(schema, indent=2))
+            print()
+        sys.exit(0)
+
     output = {}
     # if input is a directory
-    if isinstance(args.MMIF_FILE, (str, os.PathLike)) and Path(args.MMIF_FILE).is_dir():
+    if Path(str(args.MMIF_FILE)).is_dir():
         output = describe_mmif_collection(args.MMIF_FILE)
     # if input is a file or stdin
     else:
@@ -125,6 +138,7 @@ def main(args):
                 tmp_path.unlink()
 
     if output:
+        # Convert Pydantic models to dicts
         with open_cli_io_arg(args.output, 'w', default_stdin=True) as output_file:
             json.dump(output, output_file, indent=2 if args.pretty else None)
             output_file.write('\n')
diff --git a/mmif/utils/workflow_helper.py b/mmif/utils/workflow_helper.py
index c73c0cd2..bdde664a 100644
--- a/mmif/utils/workflow_helper.py
+++ b/mmif/utils/workflow_helper.py
@@ -1,13 +1,16 @@
 import datetime
 import hashlib
-from collections import Counter, defaultdict
-from pathlib import Path
-from typing import List, Any, Tuple, Optional, Union
 import itertools
-from mmif import Mmif
+from collections import Counter
+from pathlib import Path
+from typing import Any, Dict, List, Literal, Optional, Tuple, Union, overload
+
+from pydantic import BaseModel, ConfigDict, Field
 
+from mmif.serialize.mmif import Mmif, ViewsList
 
-def group_views_by_app(views: List[Any]) -> List[List[Any]]:
+
+def group_views_by_app(views: ViewsList) -> List[List[Any]]:
     """
     Groups views into app executions based on app and timestamp.
 
@@ -93,9 +96,21 @@ def _read_mmif_from_path(mmif_input: Union[str, Path, Mmif]) -> Mmif:
         )
 
 
+@overload
+def generate_workflow_identifier(mmif_input: Union[str, Path, Mmif], 
+                                 return_param_dicts: Literal[True]
+                                 ) -> Tuple[str, List[dict]]: ...
+
+
+@overload
 def generate_workflow_identifier(mmif_input: Union[str, Path, Mmif],
-                                 return_param_dicts=False) \
-        -> Union[str, Tuple[str, List[dict]]]:
+                                 return_param_dicts: Literal[False] = False
+                                 ) -> str: ...
+
+
+def generate_workflow_identifier(mmif_input: Union[str, Path, Mmif],
+                                  return_param_dicts: bool = False
+                                  ) -> Union[str, Tuple[str, List[dict]]]:
     """
     Generate a workflow identifier string from a MMIF file or object.
 
@@ -149,7 +164,53 @@ def generate_workflow_identifier(mmif_input: Union[str, Path, Mmif],
     return '/'.join(segments)
 
 
-def _get_profile_data(view) -> dict:
+## single MMIF summarization 
+
+class SingleMmifStats(BaseModel):
+    """
+    Aggregated statistics for a single MMIF file.
+    """
+    model_config = ConfigDict(populate_by_name=True)
+    
+    app_count: int = Field(..., alias="appCount", description="Total number of app executions identified.")
+    error_views: List[str] = Field(default_factory=list, alias="errorViews", description="List of view IDs that contain errors.")
+    warning_views: List[str] = Field(default_factory=list, alias="warningViews", description="List of view IDs that contain warnings.")
+    empty_views: List[str] = Field(default_factory=list, alias="emptyViews", description="List of view IDs that contain no annotations.")
+    annotation_count_by_type: Dict[str, int] = Field(default_factory=dict, alias="annotationCountByType", description="Total annotation counts across the file.")
+
+class AppProfiling(BaseModel):
+    """
+    Profiling data for a single app execution.
+    """
+    model_config = ConfigDict(populate_by_name=True)
+    
+    running_time_ms: Optional[int] = Field(default=None, alias="runningTimeMS", description="Execution time in milliseconds.")
+
+class AppExecution(BaseModel):
+    """
+    Represents a single execution of an app, which may produce multiple views.
+    """
+    model_config = ConfigDict(populate_by_name=True)
+    
+    app: str = Field(..., description="The URI of the app.")
+    view_ids: List[str] = Field(..., alias="viewIds", description="List of view IDs generated by this execution.")
+    app_configuration: Dict = Field(default_factory=dict, alias="appConfiguration", description="Configuration parameters used for this execution.")
+    app_profiling: AppProfiling = Field(default_factory=lambda: AppProfiling(), alias="appProfiling", description="Profiling data for this execution.")
+    annotation_count_by_type: Dict[str, int] = Field(default_factory=dict, alias="annotationCountByType", description="Counts of annotations produced, grouped by type.")
+
+
+class SingleMmifDesc(BaseModel):
+    """
+    Description of a workflow extracted from a single MMIF file.
+    """
+    model_config = ConfigDict(populate_by_name=True)
+    
+    workflow_id: str = Field(..., alias="workflowId", description="Unique identifier for the workflow structure.")
+    stats: SingleMmifStats = Field(..., description="Statistics about the views and annotations.")
+    apps: List[AppExecution] = Field(..., description="Sequence of app executions in the workflow.")
+
+
+def _get_profile_data(view) -> AppProfiling:
     """
     Extract profiling data from a view's metadata.
 
@@ -168,13 +229,13 @@ def _get_profile_data(view) -> dict:
         running_time_str = profiling.get("runningTime")
 
     if running_time_str is None:
-        return {}
+        return AppProfiling(runningTimeMS=None)
 
     # the format is datetime.timedelta string, e.g. '0:00:02.345678'
     # need to convert to milliseconds integer
     time_obj = datetime.datetime.strptime(running_time_str, "%H:%M:%S.%f").time()
     milliseconds = (time_obj.hour * 3600 + time_obj.minute * 60 + time_obj.second) * 1000 + time_obj.microsecond // 1000
-    return {"runningTimeMS": milliseconds}
+    return AppProfiling(runningTimeMS=milliseconds)
 
 
 def describe_single_mmif(mmif_input: Union[str, Path, Mmif]) -> dict:
@@ -188,53 +249,24 @@ def describe_single_mmif(mmif_input: Union[str, Path, Mmif]) -> dict:
     a single logical "app execution".
 
     .. note::
-        For MMIF files generated by ``clams-python`` <= 1.3.3, all views
-        are independently timestamped. This means that even if multiple views
-        were generated by a single execution of an app, their
+        For MMIF files generated by apps based on ``clams-python`` <= 1.3.3, all 
+        views are independently timestamped. This means that even if multiple 
+        views were generated by a single execution of an app, their
         ``metadata.timestamp`` values will be unique. As a result, the grouping
         logic will treat each view as a separate app execution. The change
         that aligns timestamps for views from a single app execution is
         implemented in `clams-python PR #271
         <https://github.com/clamsproject/clams-python/pull/271>`_.
 
-    The output format is a dictionary with the following keys:
-
-    * ``workflowId``
-        A unique identifier for the workflow, based on the
-        sequence of app executions (app, version, parameter hashes). App
-        executions with errors are excluded from this identifier. App
-        executions with warnings are still considered successful for the purpose
-        of this identifier.
-    * ``stats``
-        A dictionary with the following keys:
-
-      * ``appCount``
-            Total number of identified app executions.
-      * ``errorViews``
-            A list of view IDs that reported errors.
-      * ``warningViews``
-            A list of view IDs that reported warnings.
-      * ``emptyViews``
-            A list of view IDs that contain no annotations.
-      * ``annotationCountByType``
-            A dictionary mapping each annotation type to its count, plus a
-            ``total`` key for the sum of all annotations across all app
-            executions.
-    * ``apps``
-        A list of objects, where each object represents one app
-        execution. It includes metadata, profiling, and aggregated statistics
-        for all views generated by that execution. A special entry for views
-        that could not be assigned to an execution will be at the end of the list.
-
-    ---
-    The docstring above is used to generate help messages for the CLI command.
-    Do not remove the triple-dashed lines.
+    The output is a serialized :class:`~SingleMmifDesc` object.
 
+    .. pydantic_model:: SingleMmifDesc
+       :noindex:
+    
     :param mmif_input: Path to MMIF file (str or Path) or a Mmif object
     :return: A dictionary containing the workflow specification.
     """
     mmif = _read_mmif_from_path(mmif_input)
-    workflow_id = generate_workflow_identifier(mmif)
 
     error_view_ids = []
     warning_view_ids = []
@@ -259,17 +291,21 @@ def describe_single_mmif(mmif_input: Union[str, Path, Mmif]) -> dict:
         execution_view_ids = [v.id for v in group]
         processed_view_ids.update(execution_view_ids)
 
-        app_data = {
-            "app": first_view.metadata.app,
-            "viewIds": execution_view_ids,
-            "appConfiguration": first_view.metadata.get("appConfiguration", {}),
-            "appProfiling": _get_profile_data(first_view),
-        }
+        # Prepare annotation counts
         total_annotations_in_exec = sum(execution_ann_counter.values())
         if total_annotations_in_exec > 0:
-            app_data['annotationCountByType'] = dict(execution_ann_counter)
-            app_data['annotationCountByType']['total'] = total_annotations_in_exec
-        grouped_apps.append(app_data)
+            count_dict = dict(execution_ann_counter)
+            count_dict['total'] = total_annotations_in_exec
+        else:
+            count_dict = {}
+        
+        grouped_apps.append(AppExecution(
+            app=first_view.metadata.app,
+            viewIds=execution_view_ids,
+            appConfiguration=first_view.metadata.get("appConfiguration", {}),
+            appProfiling=_get_profile_data(first_view),
+            annotationCountByType=count_dict
+        ))
 
     # Handle unassigned and problematic views
     all_view_ids = set(v.id for v in mmif.views)
@@ -289,19 +325,23 @@ def describe_single_mmif(mmif_input: Union[str, Path, Mmif]) -> dict:
     app_count = len(grouped_apps)
 
     if unassigned_view_ids:
-        grouped_apps.append({
-            "app": "http://apps.clams.ai/non-existing-app/v1",
-            "viewIds": sorted(list(unassigned_view_ids))
-        })
+        grouped_apps.append(AppExecution(
+            app="http://apps.clams.ai/non-existing-app/v1",
+            viewIds=sorted(list(unassigned_view_ids)),
+            appConfiguration={},
+            appProfiling=AppProfiling(runningTimeMS=None),
+            annotationCountByType={}
+        ))
 
     # aggregate total annotation counts
     total_annotations_by_type = Counter()
     for execution in grouped_apps:
         # Only aggregate from actual apps, not the special unassigned entry
-        if execution.get('app') != "http://apps.clams.ai/non-existing-app/v1":
-            if 'annotationCountByType' in execution:
-                exec_counts = execution['annotationCountByType'].copy()
-                del exec_counts['total']
+        if execution.app != "http://apps.clams.ai/non-existing-app/v1":
+            if execution.annotation_count_by_type:
+                exec_counts = execution.annotation_count_by_type.copy()
+                if 'total' in exec_counts:
+                    del exec_counts['total']
                 total_annotations_by_type.update(Counter(exec_counts))
 
     final_total_annotations = sum(total_annotations_by_type.values())
@@ -309,17 +349,79 @@ def describe_single_mmif(mmif_input: Union[str, Path, Mmif]) -> dict:
     if final_total_annotations > 0:
         final_annotation_counts['total'] = final_total_annotations
 
-    return {
-        "workflowId": workflow_id,
-        "stats": {
-            "appCount": app_count,
-            "errorViews": error_view_ids,
-            "warningViews": warning_view_ids,
-            "emptyViews": empty_view_ids,
-            "annotationCountByType": final_annotation_counts
-        },
-        "apps": grouped_apps
-    }
+    return SingleMmifDesc(
+        workflowId=generate_workflow_identifier(mmif, return_param_dicts=False),
+        stats=SingleMmifStats(
+            appCount=app_count,
+            errorViews=error_view_ids,
+            warningViews=warning_view_ids,
+            emptyViews=empty_view_ids,
+            annotationCountByType=final_annotation_counts
+        ),
+        apps=grouped_apps
+    ).model_dump(by_alias=True)
+
+
+## MMIF collection summarization 
+
+class AppProfilingStats(BaseModel):
+    """
+    Aggregated profiling statistics for an app across a workflow.
+    """
+    model_config = ConfigDict(populate_by_name=True)
+    
+    avg_running_time_ms: Optional[float] = Field(default=None, alias="avgRunningTimeMS", description="Average execution time in milliseconds.")
+    min_running_time_ms: Optional[float] = Field(default=None, alias="minRunningTimeMS", description="Minimum execution time in milliseconds.")
+    max_running_time_ms: Optional[float] = Field(default=None, alias="maxRunningTimeMS", description="Maximum execution time in milliseconds.")
+    stdev_running_time_ms: Optional[float] = Field(default=None, alias="stdevRunningTimeMS", description="Standard deviation of execution time.")
+
+
+
+
+class WorkflowAppExecution(BaseModel):
+    """
+    Aggregated information about an app's usage within a specific workflow across multiple files.
+    """
+    model_config = ConfigDict(populate_by_name=True)
+    
+    app: str = Field(..., description="The URI of the app.")
+    app_configuration: Dict = Field(default_factory=dict, alias="appConfiguration", description="Representative configuration (usually from the first occurrence).")
+    app_profiling: AppProfilingStats = Field(default_factory=lambda: AppProfilingStats(), alias="appProfiling", description="Aggregated profiling statistics.")
+
+
+class WorkflowCollectionEntry(BaseModel):
+    """
+    Summary of a unique workflow found within a collection.
+    """
+    model_config = ConfigDict(populate_by_name=True)
+    
+    workflow_id: str = Field(..., alias="workflowId", description="Unique identifier for the workflow.")
+    mmifs: List[str] = Field(..., description="List of filenames belonging to this workflow.")
+    mmif_count: int = Field(..., alias="mmifCount", description="Number of MMIF files matching this workflow.")
+    apps: List[WorkflowAppExecution] = Field(..., description="Sequence of apps in this workflow with aggregated stats.")
+
+class MmifCountByStatus(BaseModel):
+    """
+    Breakdown of MMIF files in a collection by their processing status.
+    """
+    model_config = ConfigDict(populate_by_name=True)
+    
+    total: int = Field(..., description="Total number of MMIF files found.")
+    successful: int = Field(..., description="Number of files processed without errors.")
+    with_errors: int = Field(..., alias="withErrors", description="Number of files containing error views.")
+    with_warnings: int = Field(..., alias="withWarnings", description="Number of files containing warning views.")
+    invalid: int = Field(..., description="Number of files that failed to parse as valid MMIF.")
+
+
+class CollectionMmifDesc(BaseModel):
+    """
+    Summary of a collection of MMIF files.
+    """
+    model_config = ConfigDict(populate_by_name=True)
+    
+    mmif_count_by_status: MmifCountByStatus = Field(..., alias="mmifCountByStatus", description="Counts of MMIF files by status.")
+    workflows: List[WorkflowCollectionEntry] = Field(..., description="List of unique workflows identified in the collection.")
+    annotation_count_by_type: Dict[str, int] = Field(default_factory=dict, alias="annotationCountByType", description="Total annotation counts across the entire collection.")
 
 
 def describe_mmif_collection(mmif_dir: Union[str, Path]) -> dict:
@@ -329,139 +431,115 @@ def describe_mmif_collection(mmif_dir: Union[str, Path]) -> dict:
     This function provides an overview of a collection of MMIF files, aggregating
     statistics across multiple files.
 
-    The output format is a dictionary with the following keys:
-
-    * ``mmifCountByStatus``
-        A dictionary summarizing the processing status of all MMIF files in the
-        collection. It includes:
-
-        ``total``
-            Total number of MMIF files found.
-        ``successful``
-            Number of MMIF files processed without errors (may contain warnings).
-        ``withErrors``
-            Number of MMIF files containing app executions that reported errors.
-        ``withWarnings``
-            Number of MMIF files containing app executions that reported warnings.
-        ``invalid``
-            Number of files that failed to be parsed as valid MMIF.
-    * ``workflows``
-        A list of "workflow" objects found in the "successful" MMIF files (files
-        with errors are excluded), where each object contains:
-
-        ``workflowId``
-            The unique identifier for the workflow.
-        ``apps``
-            A list of app objects, each with ``app`` (name+ver identifier),
-            ``appConfiguration``, and ``appProfiling`` statistics (avg, min, max,
-            stdev running times) aggregated per workflow.
-        ``mmifs``
-            A list of MMIF file basenames belonging to this workflow.
-        ``mmifCount``
-            The number of MMIF files in this workflow.
-    * ``annotationCountByType``
-        A dictionary aggregating annotation counts across the entire collection.
-        It includes a ``total`` key for the grand total, plus integer counts for
-        each individual annotation type.
-
-    ---
-    The docstring above is used to generate help messages for the CLI command.
-    Do not remove the triple-dashed lines.
+    The output is a serialized :class:`~CollectionMmifDesc` object.
+
+    .. pydantic_model:: CollectionMmifDesc
+       :noindex:
 
     :param mmif_dir: Path to the directory containing MMIF files.
     :return: A dictionary containing the summarized collection specification.
     """
     import statistics
-    from collections import defaultdict, Counter
+    from collections import Counter
 
     mmif_files = list(Path(mmif_dir).glob('*.mmif'))
 
-    status_summary = defaultdict(int)
-    status_summary['total'] = len(mmif_files)
-    status_summary['successful'] = 0
-    status_summary['withErrors'] = 0
-    status_summary['withWarnings'] = 0
-    status_summary['invalid'] = 0
+    status_summary = MmifCountByStatus(
+        total=len(mmif_files),
+        successful=0,
+        withErrors=0,
+        withWarnings=0,
+        invalid=0
+    )
 
     aggregated_counts = Counter()
 
-    workflows_data = defaultdict(lambda: {
-        'mmifs': [],
-        'apps': defaultdict(lambda: {
-            'appConfiguration': None,  # Store the first config here
-            'execution_times': []
-        })
-    })
+    # Structure: {workflow_id: {'mmifs': [...], 'apps': {app_uri: {'appConfiguration': ..., 'execution_times': [...]}}}}
+    workflows_data: Dict[str, Dict] = {}
 
     for mmif_file in mmif_files:
         try:
-            single_report = describe_single_mmif(mmif_file)
-        except Exception as e:
-            status_summary['invalid'] += 1
+            single_report = SingleMmifDesc.model_validate(describe_single_mmif(mmif_file))
+        except Exception:
+            status_summary.invalid += 1
             continue
 
-        if single_report['stats']['errorViews']:
-            status_summary['withErrors'] += 1
+        if single_report.stats.error_views:
+            status_summary.with_errors += 1
             continue  # Exclude from all other stats
 
         # If we get here, the MMIF has no errors and is considered "successful"
-        status_summary['successful'] += 1
-        if single_report['stats']['warningViews']:
-            status_summary['withWarnings'] += 1
-
-        wf_id = single_report['workflowId']
+        status_summary.successful += 1
+        if single_report.stats.warning_views:
+            status_summary.with_warnings += 1
+
+        wf_id = single_report.workflow_id
+        # Initialize workflow entry if not exists
+        if wf_id not in workflows_data:
+            workflows_data[wf_id] = {'mmifs': [], 'apps': {}}
         workflows_data[wf_id]['mmifs'].append(Path(mmif_file).name)
 
         # Aggregate annotation counts for successful mmifs
-        report_counts = single_report['stats'].get('annotationCountByType', {})
+        report_counts = single_report.stats.annotation_count_by_type.copy()
         if 'total' in report_counts:
             del report_counts['total']  # don't add the sub-total to the main counter
         aggregated_counts.update(report_counts)
 
-        for app_exec in single_report.get('apps', []):
-            app_uri = app_exec.get('app')
+        for app_exec in single_report.apps:
+            app_uri = app_exec.app
             # skip the special "unassigned" app
             if app_uri and app_uri != "http://apps.clams.ai/non-existing-app/v1":
-                running_time = app_exec.get('appProfiling', {}).get('runningTimeMS')
+                # Initialize app entry if not exists
+                if app_uri not in workflows_data[wf_id]['apps']:
+                    workflows_data[wf_id]['apps'][app_uri] = {
+                        'appConfiguration': None,
+                        'execution_times': []
+                    }
+                
+                running_time = app_exec.app_profiling.running_time_ms
                 if running_time is not None:
                     workflows_data[wf_id]['apps'][app_uri]['execution_times'].append(running_time)
 
                 # Store the first non-empty app configuration we find for this app in this workflow
                 if workflows_data[wf_id]['apps'][app_uri]['appConfiguration'] is None:
-                    config = app_exec.get('appConfiguration', {})
+                    config = app_exec.app_configuration
                     if config:
                         workflows_data[wf_id]['apps'][app_uri]['appConfiguration'] = config
 
     # Process collected data into the final output format
     final_workflows_list = []
     for wf_id, wf_data in sorted(workflows_data.items()):
-        workflow_object = {
-            'workflowId': wf_id,
-            'mmifs': sorted(wf_data['mmifs']),
-            'mmifCount': len(wf_data['mmifs']),
-            'apps': []
-        }
+        workflow_apps = []
 
         for app_uri, app_data in sorted(wf_data['apps'].items()):
             times = app_data['execution_times']
             if times:
-                profiling_stats = {
-                    'avgRunningTimeMS': statistics.mean(times),
-                    'minRunningTimeMS': min(times),
-                    'maxRunningTimeMS': max(times),
-                    'stdevRunningTimeMS': statistics.stdev(times) if len(times) > 1 else 0
-                }
+                profiling_stats = AppProfilingStats(
+                    avgRunningTimeMS=statistics.mean(times),
+                    minRunningTimeMS=min(times),
+                    maxRunningTimeMS=max(times),
+                    stdevRunningTimeMS=statistics.stdev(times) if len(times) > 1 else 0
+                )
             else:
-                profiling_stats = {}
-
-            app_object = {
-                'app': app_uri,
-                'appConfiguration': app_data['appConfiguration'] or {},  # Default to empty dict
-                'appProfiling': profiling_stats
-            }
-            workflow_object['apps'].append(app_object)
-
-        final_workflows_list.append(workflow_object)
+                profiling_stats = AppProfilingStats(
+                    avgRunningTimeMS=None,
+                    minRunningTimeMS=None,
+                    maxRunningTimeMS=None,
+                    stdevRunningTimeMS=None
+                )
+
+            workflow_apps.append(WorkflowAppExecution(
+                app=app_uri,
+                appConfiguration=app_data['appConfiguration'] or {},
+                appProfiling=profiling_stats
+            ))
+
+        final_workflows_list.append(WorkflowCollectionEntry(
+            workflowId=wf_id,
+            mmifs=sorted(wf_data['mmifs']),
+            mmifCount=len(wf_data['mmifs']),
+            apps=workflow_apps
+        ))
 
     # Finalize annotation counts
     final_annotation_counts = dict(aggregated_counts)
@@ -469,8 +547,8 @@ def describe_mmif_collection(mmif_dir: Union[str, Path]) -> dict:
     if grand_total > 0:
         final_annotation_counts['total'] = grand_total
 
-    return {
-        'mmifCountByStatus': dict(status_summary),
-        'workflows': final_workflows_list,
-        'annotationCountByType': final_annotation_counts
-    }
+    return CollectionMmifDesc(
+        mmifCountByStatus=status_summary,
+        workflows=final_workflows_list,
+        annotationCountByType=final_annotation_counts
+    ).model_dump(by_alias=True)
diff --git a/requirements.txt b/requirements.txt
index a97c214e..c3e9d722 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,4 @@
 
 orderly-set==5.3.*  # 5.4 drops py38 support
 jsonschema
+pydantic>=2.0

From 8266a2edfcf9846f3d98baa3ab4e0e7c635074dc Mon Sep 17 00:00:00 2001
From: Keigh Rim <keigh.rim@gmail.com>
Date: Sun, 15 Feb 2026 08:51:34 -0500
Subject: [PATCH 2/3] updated test cases for utils and clis

---
 mmif/utils/cli/describe.py |   2 +-
 tests/test_utils.py        | 203 +++++++++++++++++++----
 tests/test_utils_cli.py    | 328 ++++++++++++++++++-------------------
 3 files changed, 333 insertions(+), 200 deletions(-)

diff --git a/mmif/utils/cli/describe.py b/mmif/utils/cli/describe.py
index d921b329..bb226a81 100644
--- a/mmif/utils/cli/describe.py
+++ b/mmif/utils/cli/describe.py
@@ -101,7 +101,7 @@ def main(args):
         models_map = {m.__name__: m for m in models_to_help}
         to_show = []
         if len(args.help_schemas) == 0 or 'all' in args.help_schemas:
-            to_show = models_to_help
+            to_show = [m.__name__ for m in models_to_help]
         else:
             to_show = args.help_schemas
         
diff --git a/tests/test_utils.py b/tests/test_utils.py
index 5f29b9d2..1aa4fdaf 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -1,17 +1,25 @@
+import json
+import os
 import pathlib
-import unittest
 import tempfile
-import json
+import unittest
+from pathlib import Path
 
 import pytest
-
-from mmif import Mmif, Document, AnnotationTypes
+from hypothesis import given
+from hypothesis import strategies as st
+
+from mmif import (
+    AnnotationTypes, 
+    Document, 
+    Mmif
+)
 from mmif.utils import sequence_helper as sqh
 from mmif.utils import text_document_helper as tdh
 from mmif.utils import timeunit_helper as tuh
 from mmif.utils import video_document_helper as vdh
-from tests.mmif_examples import *
-from hypothesis import given, strategies as st
+from mmif.utils import workflow_helper as wfh
+from tests import mmif_examples 
 
 
 class TestTimeunitHelper(unittest.TestCase):
@@ -205,7 +213,7 @@ def test_width_based_smoothing(self):
 
 
 class TestTextDocHelper(unittest.TestCase):
-    mmif_obj = Mmif(MMIF_EXAMPLES['everything'])
+    mmif_obj = Mmif(mmif_examples.MMIF_EXAMPLES['everything'])
 
     @pytest.mark.skip("The only valid test cases come from kaldi app which annotates wrong property")
     def test_slice_text(self):
@@ -232,8 +240,6 @@ def setUp(self) -> None:
 
     def create_temp_mmif_file(self, mmif_obj):
         """Helper to create a temporary MMIF file."""
-        import tempfile
-        import json
         tmp = tempfile.NamedTemporaryFile(mode='w', suffix='.mmif', delete=False)
         if isinstance(mmif_obj, Mmif):
             content_to_write = mmif_obj.serialize(pretty=False)
@@ -244,24 +250,20 @@ def create_temp_mmif_file(self, mmif_obj):
         return tmp.name
 
     def test_split_appname_appversion(self):
-        from mmif.utils.workflow_helper import _split_appname_appversion
-        app_name, app_version = _split_appname_appversion("http://apps.clams.ai/test-app/v1.0.0")
+        app_name, app_version = wfh._split_appname_appversion("http://apps.clams.ai/test-app/v1.0.0")
         self.assertEqual(app_name, "test-app")
         self.assertEqual(app_version, "v1.0.0")
 
     def test_generate_param_hash(self):
-        from mmif.utils.workflow_helper import generate_param_hash
         params = {"param1": "value1", "param2": 42}
-        hash1 = generate_param_hash(params)
-        hash2 = generate_param_hash(params)
+        hash1 = wfh.generate_param_hash(params)
+        hash2 = wfh.generate_param_hash(params)
         self.assertEqual(hash1, hash2)
         params_reversed = {"param2": 42, "param1": "value1"}
-        hash3 = generate_param_hash(params_reversed)
+        hash3 = wfh.generate_param_hash(params_reversed)
         self.assertEqual(hash1, hash3)
 
     def test_generate_workflow_identifier_grouped(self):
-        from mmif.vocabulary import AnnotationTypes
-        from mmif.utils import workflow_helper
         view1 = self.basic_mmif.new_view()
         view1.metadata.app = "http://apps.clams.ai/app1/v1.0.0"
         view1.metadata.timestamp = "2024-01-01T12:00:00Z"
@@ -274,7 +276,7 @@ def test_generate_workflow_identifier_grouped(self):
         tmp_file = self.create_temp_mmif_file(self.basic_mmif)
         import os
         try:
-            workflow_id = workflow_helper.generate_workflow_identifier(tmp_file)
+            workflow_id = wfh.generate_workflow_identifier(tmp_file)
             segments = workflow_id.split('/')
             self.assertEqual(len(segments), 6)
             self.assertIn('app1', segments[0])
@@ -284,39 +286,35 @@ def test_generate_workflow_identifier_grouped(self):
 
     def test_generate_workflow_identifier_with_mmif_object(self):
         """Test that generate_workflow_identifier accepts Mmif objects directly."""
-        from mmif.utils import workflow_helper
         import os
 
         # Test with Mmif object directly
-        workflow_id_from_obj = workflow_helper.generate_workflow_identifier(self.basic_mmif)
+        workflow_id_from_obj = wfh.generate_workflow_identifier(self.basic_mmif)
 
         # Test with file path - should produce the same result
         tmp_file = self.create_temp_mmif_file(self.basic_mmif)
         try:
-            workflow_id_from_file = workflow_helper.generate_workflow_identifier(tmp_file)
+            workflow_id_from_file = wfh.generate_workflow_identifier(tmp_file)
             self.assertEqual(workflow_id_from_obj, workflow_id_from_file)
         finally:
             os.unlink(tmp_file)
 
     def test_read_mmif_from_path(self):
         """Test the _read_mmif_from_path helper function."""
-        from mmif.utils.workflow_helper import _read_mmif_from_path
-        from pathlib import Path
-        import os
 
         # Test with Mmif object - should return as-is
-        result = _read_mmif_from_path(self.basic_mmif)
+        result = wfh._read_mmif_from_path(self.basic_mmif)
         self.assertIs(result, self.basic_mmif)
 
         # Test with file path string
         tmp_file = self.create_temp_mmif_file(self.basic_mmif)
         try:
-            result_from_str = _read_mmif_from_path(tmp_file)
+            result_from_str = wfh._read_mmif_from_path(tmp_file)
             self.assertIsInstance(result_from_str, Mmif)
             self.assertEqual(result_from_str.serialize(pretty=False), self.basic_mmif.serialize(pretty=False))
 
             # Test with Path object
-            result_from_path = _read_mmif_from_path(Path(tmp_file))
+            result_from_path = wfh._read_mmif_from_path(Path(tmp_file))
             self.assertIsInstance(result_from_path, Mmif)
             self.assertEqual(result_from_path.serialize(pretty=False), self.basic_mmif.serialize(pretty=False))
         finally:
@@ -324,27 +322,164 @@ def test_read_mmif_from_path(self):
 
         # Test with invalid input
         with pytest.raises(ValueError):
-            _read_mmif_from_path(12345)
+            wfh._read_mmif_from_path(12345)
 
     def test_describe_single_mmif_with_mmif_object(self):
         """Test that describe_single_mmif accepts Mmif objects directly."""
-        from mmif.utils.workflow_helper import describe_single_mmif
         import os
 
         # Test with Mmif object directly
-        result_from_obj = describe_single_mmif(self.basic_mmif)
+        result_from_obj = wfh.describe_single_mmif(self.basic_mmif)
 
         # Test with file path - should produce the same result
         tmp_file = self.create_temp_mmif_file(self.basic_mmif)
         try:
-            result_from_file = describe_single_mmif(tmp_file)
+            result_from_file = wfh.describe_single_mmif(tmp_file)
             self.assertEqual(result_from_obj, result_from_file)
-            self.assertIn('workflowId', result_from_obj)
-            self.assertIn('stats', result_from_obj)
-            self.assertIn('apps', result_from_obj)
+            
+            # Validate that the output conforms to the SingleMmifDesc Pydantic model
+            # If validation succeeds, all required fields with correct aliases are present
+            validated = wfh.SingleMmifDesc.model_validate(result_from_obj)
+            # Can assert on the validated object's attributes if needed
+            self.assertIsNotNone(validated.workflow_id)
+            self.assertIsNotNone(validated.stats)
+            self.assertIsNotNone(validated.apps)
+        finally:
+            os.unlink(tmp_file)
+
+    def test_describe_single_mmif_empty(self):
+        """Test describe_single_mmif with an empty MMIF (no views)."""
+        tmp_file = self.create_temp_mmif_file(self.basic_mmif)
+        try:
+            result = wfh.describe_single_mmif(tmp_file)
+            # Validate against Pydantic model
+            validated = wfh.SingleMmifDesc.model_validate(result)
+            self.assertEqual(validated.stats.app_count, 0)
+            self.assertEqual(len(validated.apps), 0)
+            self.assertEqual(validated.stats.annotation_count_by_type, {})
+        finally:
+            os.unlink(tmp_file)
+
+    def test_describe_single_mmif_one_app(self):
+        """Test describe_single_mmif with a single app execution."""
+        view = self.basic_mmif.new_view()
+        view.metadata.app = "http://apps.clams.ai/test-app/v1.0.0"
+        view.metadata.timestamp = "2024-01-01T12:00:00Z"
+        view.metadata.appProfiling = {"runningTime": "0:00:01.234"}
+        view.new_annotation(AnnotationTypes.TimeFrame)
+        tmp_file = self.create_temp_mmif_file(self.basic_mmif)
+        try:
+            result = wfh.describe_single_mmif(tmp_file)
+            # Validate against Pydantic model
+            validated = wfh.SingleMmifDesc.model_validate(result)
+            self.assertEqual(validated.stats.app_count, 1)
+            self.assertEqual(len(validated.apps), 1)
+            app_exec = validated.apps[0]
+            self.assertEqual(app_exec.app, view.metadata.app)
+            self.assertEqual(app_exec.view_ids, [view.id])
+            self.assertEqual(app_exec.app_profiling.running_time_ms, 1234)
+        finally:
+            os.unlink(tmp_file)
+
+    def test_describe_single_mmif_one_app_two_views(self):
+        """Test describe_single_mmif with one app execution producing two views."""
+        view1 = self.basic_mmif.new_view()
+        view1.metadata.app = "http://apps.clams.ai/test-app/v1.0.0"
+        view1.metadata.timestamp = "2024-01-01T12:00:00Z"
+        view1.new_annotation(AnnotationTypes.TimeFrame)
+        view2 = self.basic_mmif.new_view()
+        view2.metadata.app = "http://apps.clams.ai/test-app/v1.0.0"
+        view2.metadata.timestamp = "2024-01-01T12:00:00Z"
+        view2.new_annotation(AnnotationTypes.TimeFrame)
+        tmp_file = self.create_temp_mmif_file(self.basic_mmif)
+        try:
+            result = wfh.describe_single_mmif(tmp_file)
+            # Validate against Pydantic model
+            validated = wfh.SingleMmifDesc.model_validate(result)
+            self.assertEqual(validated.stats.app_count, 1)
+            self.assertEqual(len(validated.apps), 1)
+            app_exec = validated.apps[0]
+            self.assertEqual(app_exec.view_ids, [view1.id, view2.id])
+        finally:
+            os.unlink(tmp_file)
+
+    def test_describe_single_mmif_error_view(self):
+        """Test describe_single_mmif with a view containing an error."""
+        view = self.basic_mmif.new_view()
+        view.metadata.app = "http://apps.clams.ai/test-app/v1.0.0"
+        view.metadata.timestamp = "2024-01-01T12:00:00Z"
+        view.metadata.error = {"message": "Something went wrong"}
+        tmp_file = self.create_temp_mmif_file(self.basic_mmif)
+        try:
+            result = wfh.describe_single_mmif(tmp_file)
+            # Validate against Pydantic model
+            validated = wfh.SingleMmifDesc.model_validate(result)
+            self.assertEqual(validated.stats.app_count, 0)
+            self.assertEqual(len(validated.apps), 0)
+            self.assertEqual(len(validated.stats.error_views), 1)
         finally:
             os.unlink(tmp_file)
 
+    def test_describe_single_mmif_with_unassigned_views(self):
+        """Test describe_single_mmif with views that cannot be grouped."""
+        import unittest.mock
+        raw_mmif = json.loads(self.basic_mmif.serialize())
+        raw_mmif['views'].append({'id': 'v1', 'metadata': {'app': 'http://apps.clams.ai/app1/v1.0.0', 'timestamp': '2024-01-01T12:00:00Z'}, 'annotations': []})
+        raw_mmif['views'].append({'id': 'v2', 'metadata': {'app': 'http://apps.clams.ai/app2/v2.0.0'}, 'annotations': []})
+        raw_mmif['views'].append({'id': 'v3', 'metadata': {'timestamp': '2024-01-01T12:01:00Z', 'app': ''}, 'annotations': []})
+        tmp_file = self.create_temp_mmif_file(raw_mmif)
+        try:
+            with unittest.mock.patch('jsonschema.validators.validate'):
+                result = wfh.describe_single_mmif(tmp_file)
+            # Validate against Pydantic model
+            validated = wfh.SingleMmifDesc.model_validate(result)
+            self.assertEqual(validated.stats.app_count, 1)
+            self.assertEqual(len(validated.apps), 2)
+            special_entry = validated.apps[-1]
+            self.assertEqual(special_entry.app, 'http://apps.clams.ai/non-existing-app/v1')
+            self.assertEqual(len(special_entry.view_ids), 2)
+            self.assertIn('v2', special_entry.view_ids)
+            self.assertIn('v3', special_entry.view_ids)
+        finally:
+            os.unlink(tmp_file)
+
+    def test_describe_collection_empty(self):
+        """Test describe_mmif_collection with an empty directory."""
+        dummy_dir = 'dummy_mmif_collection'
+        os.makedirs(dummy_dir, exist_ok=True)
+        try:
+            output = wfh.describe_mmif_collection(dummy_dir)
+            # Validate using Pydantic model
+            validated = wfh.CollectionMmifDesc.model_validate(output)
+            self.assertEqual(validated.mmif_count_by_status.total, 0)
+            self.assertEqual(len(validated.workflows), 0)
+        finally:
+            os.rmdir(dummy_dir)
+
+    def test_describe_collection_with_files(self):
+        """Test describe_mmif_collection with MMIF files."""
+        dummy_dir = 'dummy_mmif_collection_with_files'
+        os.makedirs(dummy_dir, exist_ok=True)
+        try:
+            # Create two MMIF files in the directory
+            for i in range(2):
+                tmp_file = os.path.join(dummy_dir, f'{i}.mmif')
+                with open(tmp_file, 'w') as f:
+                    f.write(self.basic_mmif.serialize())
+            
+            output = wfh.describe_mmif_collection(dummy_dir)
+            
+            # Validate structure using Pydantic model
+            # If validation succeeds, all required fields with correct aliases are present
+            validated = wfh.CollectionMmifDesc.model_validate(output)
+            
+            # Verify counts using validated object attributes
+            self.assertEqual(validated.mmif_count_by_status.total, 2)
+            self.assertIsInstance(validated.workflows, list)
+        finally:
+            import shutil
+            shutil.rmtree(dummy_dir)
+
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/test_utils_cli.py b/tests/test_utils_cli.py
index 10270525..66c77c38 100644
--- a/tests/test_utils_cli.py
+++ b/tests/test_utils_cli.py
@@ -1,3 +1,4 @@
+import argparse
 import contextlib
 import io
 import json
@@ -6,18 +7,121 @@
 import unittest.mock
 
 import mmif
-from mmif.utils.cli import rewind
-from mmif.utils.cli import source
-from mmif.utils.cli import describe
-from mmif.utils.cli import summarize
-
 from mmif.serialize import Mmif
-from mmif.vocabulary import DocumentTypes, AnnotationTypes
-
+from mmif.utils.cli import describe, rewind, source, summarize
+from mmif.vocabulary import AnnotationTypes
 
 BASIC_MMIF_STRING = '{"metadata": {"mmif": "http://mmif.clams.ai/1.0.0"}, "documents": [{"@type": "http://mmif.clams.ai/vocabulary/VideoDocument/v1", "properties": {"id": "d1", "mime": "video/mp4", "location": "file:///test/video.mp4"}}], "views": []}'
 
 
+class BaseCliTestCase(unittest.TestCase):
+    """Base class for CLI module tests with common utilities."""
+    
+    cli_module = None  # Override in subclass
+    
+    def setUp(self):
+        """Set up common test fixtures."""
+        if self.cli_module:
+            self.parser = self.cli_module.prep_argparser()
+        self.basic_mmif = Mmif(BASIC_MMIF_STRING)
+        self.maxDiff = None
+    
+    @staticmethod
+    def create_temp_mmif_file(mmif_obj):
+        """Create a temporary MMIF file for testing.
+        
+        Args:
+            mmif_obj: Either a Mmif object or a dict/string to serialize
+            
+        Returns:
+            str: Path to the temporary file (caller must unlink)
+        """
+        tmp = tempfile.NamedTemporaryFile(mode='w', suffix='.mmif', delete=False)
+        if isinstance(mmif_obj, Mmif):
+            content = mmif_obj.serialize(pretty=False)
+        else:
+            content = json.dumps(mmif_obj) if isinstance(mmif_obj, dict) else mmif_obj
+        tmp.write(content)
+        tmp.close()
+        return tmp.name
+    
+    def run_cli_capture_stdout(self, args_namespace):
+        """Run CLI module and capture stdout as parsed JSON.
+        
+        Args:
+            args_namespace: Namespace object with CLI arguments
+            
+        Returns:
+            dict: Parsed JSON output from stdout
+        """
+        with unittest.mock.patch('sys.stdout', new=io.StringIO()) as stdout:
+            self.cli_module.main(args_namespace)
+            return json.loads(stdout.getvalue())
+
+
+class IOTestMixin:
+    """Mixin providing common I/O tests for CLI modules.
+    
+    Requires the test class to have:
+    - cli_module attribute
+    - basic_mmif attribute
+    - create_temp_mmif_file method
+    - run_cli_capture_stdout method
+    - expected_output_keys attribute (list of keys to check in output)
+    """
+    
+    def test_file_input_stdout_output(self):
+        """Test reading from file and outputting to stdout."""
+        tmp_file = self.create_temp_mmif_file(self.basic_mmif)
+        try:
+            args = argparse.Namespace(
+                MMIF_FILE=tmp_file,
+                output=None,
+                pretty=False,
+                help_schemas=None  # For describe module
+            )
+            output = self.run_cli_capture_stdout(args)
+            self.assertIsInstance(output, dict)
+            for key in self.expected_output_keys:
+                self.assertIn(key, output)
+        finally:
+            os.unlink(tmp_file)
+    
+    def test_file_input_file_output(self):
+        """Test reading from file and outputting to file."""
+        tmp_input = self.create_temp_mmif_file(self.basic_mmif)
+        tmp_output = tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False)
+        tmp_output.close()
+        try:
+            args = self.parser.parse_args([tmp_input, '-o', tmp_output.name])
+            self.cli_module.main(args)
+            with open(tmp_output.name, 'r') as f:
+                output = json.load(f)
+            self.assertIsInstance(output, dict)
+            for key in self.expected_output_keys:
+                self.assertIn(key, output)
+        finally:
+            os.unlink(tmp_input)
+            os.unlink(tmp_output.name)
+    
+    def test_stdin_input_stdout_output(self):
+        """Test reading from stdin and outputting to stdout."""
+        mmif_str = self.basic_mmif.serialize()
+        with unittest.mock.patch('sys.stdin', io.StringIO(mmif_str)), \
+             unittest.mock.patch('sys.stdout', new=io.StringIO()) as stdout:
+            args = argparse.Namespace(
+                MMIF_FILE=None,
+                output=None,
+                pretty=False,
+                help_schemas=None  # For describe module
+            )
+            self.cli_module.main(args)
+            output = json.loads(stdout.getvalue())
+            self.assertIsInstance(output, dict)
+            for key in self.expected_output_keys:
+                self.assertIn(key, output)
+
+
 class TestCli(unittest.TestCase):
     def setUp(self) -> None:
         self.parser, _, _ = mmif.prep_argparser_and_subcmds()
@@ -179,178 +283,72 @@ def test_app_rewind(self):
         self.assertIn('dummy_app_two', remaining_apps)
 
 
-class TestDescribe(unittest.TestCase):
+class TestDescribe(BaseCliTestCase, IOTestMixin):
     """Test suite for the describe CLI module."""
-
-    def setUp(self):
-        """Create test MMIF structures."""
-        self.parser = describe.prep_argparser()
-        self.maxDiff = None
-        self.basic_mmif = Mmif(BASIC_MMIF_STRING)
-
-    def create_temp_mmif_file(self, mmif_obj):
-        """Helper to create a temporary MMIF file."""
-        tmp = tempfile.NamedTemporaryFile(mode='w', suffix='.mmif', delete=False)
-        if isinstance(mmif_obj, Mmif):
-            content_to_write = mmif_obj.serialize(pretty=False)
-        else:
-            content_to_write = json.dumps(mmif_obj)
-        tmp.write(content_to_write)
-        tmp.close()
-        return tmp.name
-
-    def test_describe_single_mmif_empty(self):
-        tmp_file = self.create_temp_mmif_file(self.basic_mmif)
-        try:
-            result = mmif.utils.workflow_helper.describe_single_mmif(tmp_file)
-            self.assertEqual(result["stats"]["appCount"], 0)
-            self.assertEqual(len(result["apps"]), 0)
-            self.assertEqual(result["stats"]["annotationCountByType"], {})
-        finally:
-            os.unlink(tmp_file)
-
-    def test_describe_single_mmif_one_app(self):
-        view = self.basic_mmif.new_view()
-        view.metadata.app = "http://apps.clams.ai/test-app/v1.0.0"
-        view.metadata.timestamp = "2024-01-01T12:00:00Z"
-        view.metadata.appProfiling = {"runningTime": "0:00:01.234"}
-        view.new_annotation(AnnotationTypes.TimeFrame)
-        tmp_file = self.create_temp_mmif_file(self.basic_mmif)
-        try:
-            result = mmif.utils.workflow_helper.describe_single_mmif(tmp_file)
-            self.assertEqual(result["stats"]["appCount"], 1)
-            self.assertEqual(len(result["apps"]), 1)
-            app_exec = result["apps"][0]
-            self.assertEqual(app_exec["app"], view.metadata.app)
-            self.assertEqual(app_exec["viewIds"], [view.id])
-            self.assertEqual(app_exec["appProfiling"]["runningTimeMS"], 1234)
-        finally:
-            os.unlink(tmp_file)
-
-    def test_describe_single_mmif_one_app_two_views(self):
-        view1 = self.basic_mmif.new_view()
-        view1.metadata.app = "http://apps.clams.ai/test-app/v1.0.0"
-        view1.metadata.timestamp = "2024-01-01T12:00:00Z"
-        view1.new_annotation(AnnotationTypes.TimeFrame)
-        view2 = self.basic_mmif.new_view()
-        view2.metadata.app = "http://apps.clams.ai/test-app/v1.0.0"
-        view2.metadata.timestamp = "2024-01-01T12:00:00Z"
-        view2.new_annotation(AnnotationTypes.TimeFrame)
-        tmp_file = self.create_temp_mmif_file(self.basic_mmif)
-        try:
-            result = mmif.utils.workflow_helper.describe_single_mmif(tmp_file)
-            self.assertEqual(result["stats"]["appCount"], 1)
-            self.assertEqual(len(result["apps"]), 1)
-            app_exec = result["apps"][0]
-            self.assertEqual(app_exec["viewIds"], [view1.id, view2.id])
-        finally:
-            os.unlink(tmp_file)
-
-    def test_describe_single_mmif_error_view(self):
-        view = self.basic_mmif.new_view()
-        view.metadata.app = "http://apps.clams.ai/test-app/v1.0.0"
-        view.metadata.timestamp = "2024-01-01T12:00:00Z"
-        view.metadata.error = {"message": "Something went wrong"}
+    
+    cli_module = describe
+    expected_output_keys = ['workflowId', 'stats', 'apps']
+
+    def test_help_schemas_all(self):
+        """Test --help-schemas all"""
+        from mmif.utils.cli.describe import models_to_help
+        with unittest.mock.patch('sys.stdout', new=io.StringIO()) as stdout:
+            args = argparse.Namespace(help_schemas=['all'], MMIF_FILE=None, output=None, pretty=False)
+            with self.assertRaises(SystemExit) as cm:
+                describe.main(args)
+            self.assertEqual(cm.exception.code, 0)
+            output = stdout.getvalue()
+            for m in models_to_help:
+                self.assertIn(m.__name__, output)
+            self.assertIn("$defs", output)
+
+    def test_describe_main_directory(self):
+        """Test describe.main with a directory input"""
+        with tempfile.TemporaryDirectory() as tmp_dir:
+            # Create two mmif files
+            with open(os.path.join(tmp_dir, '1.mmif'), 'w') as f:
+                f.write(self.basic_mmif.serialize())
+            with open(os.path.join(tmp_dir, '2.mmif'), 'w') as f:
+                f.write(self.basic_mmif.serialize())
+            
+            with unittest.mock.patch('sys.stdout', new=io.StringIO()) as stdout:
+                # MMIF_FILE argument expects a string path
+                args = argparse.Namespace(MMIF_FILE=tmp_dir, output=None, pretty=False, help_schemas=None)
+                describe.main(args)
+                output_json = json.loads(stdout.getvalue())
+                # Just verify valid JSON output was produced
+                self.assertIsInstance(output_json, dict)
+                self.assertTrue(len(output_json) > 0)
+
+    def test_deprecated_functions(self):
+        """Test backward compatibility wrapper functions"""
         tmp_file = self.create_temp_mmif_file(self.basic_mmif)
         try:
-            result = mmif.utils.workflow_helper.describe_single_mmif(tmp_file)
-            self.assertEqual(result["stats"]["appCount"], 0)
-            self.assertEqual(len(result["apps"]), 0)
-            self.assertEqual(len(result["stats"]["errorViews"]), 1)
-        finally:
-            os.unlink(tmp_file)
-
-    @unittest.mock.patch('jsonschema.validators.validate')
-    def test_describe_single_mmif_with_unassigned_views(self, mock_validate):
-        raw_mmif = json.loads(self.basic_mmif.serialize())
-        raw_mmif['views'].append({'id': 'v1', 'metadata': {'app': 'http://apps.clams.ai/app1/v1.0.0', 'timestamp': '2024-01-01T12:00:00Z'}, 'annotations': []})
-        raw_mmif['views'].append({'id': 'v2', 'metadata': {'app': 'http://apps.clams.ai/app2/v2.0.0'}, 'annotations': []})
-        raw_mmif['views'].append({'id': 'v3', 'metadata': {'timestamp': '2024-01-01T12:01:00Z', 'app': ''}, 'annotations': []})
-        tmp_file = self.create_temp_mmif_file(raw_mmif)
-        try:
-            result = mmif.utils.workflow_helper.describe_single_mmif(tmp_file)
-            self.assertEqual(result['stats']['appCount'], 1)
-            self.assertEqual(len(result['apps']), 2)
-            special_entry = result['apps'][-1]
-            self.assertEqual(special_entry['app'], 'http://apps.clams.ai/non-existing-app/v1')
-            self.assertEqual(len(special_entry['viewIds']), 2)
-            self.assertIn('v2', special_entry['viewIds'])
-            self.assertIn('v3', special_entry['viewIds'])
+            with self.assertWarns(DeprecationWarning):
+                describe.get_pipeline_specs(tmp_file)
+            with self.assertWarns(DeprecationWarning):
+                describe.generate_pipeline_identifier(tmp_file)
         finally:
             os.unlink(tmp_file)
 
-    def test_describe_collection_empty(self):
-        dummy_dir = 'dummy_mmif_collection'
-        os.makedirs(dummy_dir, exist_ok=True)
-        try:
-            output = mmif.utils.workflow_helper.describe_mmif_collection(dummy_dir)
-            expected = {
-                'mmifCountByStatus': {'total': 0, 'successful': 0, 'withErrors': 0, 'withWarnings': 0, 'invalid': 0},
-                'workflows': [],
-                'annotationCountByType': {}
-            }
-            self.assertEqual(output, expected)
-        finally:
-            os.rmdir(dummy_dir)
-
 
-class TestSummarize(unittest.TestCase):
+class TestSummarize(BaseCliTestCase, IOTestMixin):
     """Test suite for the summarize CLI module."""
+    
+    cli_module = summarize
+    expected_output_keys = ['mmif_version', 'documents', 'views']
 
-    def setUp(self):
-        """Create test MMIF structures."""
-        self.parser = summarize.prep_argparser()
-        self.basic_mmif = Mmif(BASIC_MMIF_STRING)
-
-    def create_temp_mmif_file(self, mmif_obj):
-        """Helper to create a temporary MMIF file."""
-        tmp = tempfile.NamedTemporaryFile(mode='w', suffix='.mmif', delete=False)
-        tmp.write(mmif_obj.serialize(pretty=False))
-        tmp.close()
-        return tmp.name
-
-    def test_summarize_positional_input(self):
+    def test_summarize_validates_content(self):
+        """Test that summarize produces expected content."""
         tmp_file = self.create_temp_mmif_file(self.basic_mmif)
         try:
-            with unittest.mock.patch('sys.stdout', new=io.StringIO()) as stdout:
-                args = self.parser.parse_args([tmp_file])
-                # args.output is None by default, which means stdout in open_cli_io_arg
-                summarize.main(args)
-                output = json.loads(stdout.getvalue())
-                self.assertIn('mmif_version', output)
-                self.assertEqual(output['mmif_version'], "http://mmif.clams.ai/1.0.0")
+            output = self.run_cli_capture_stdout(
+                argparse.Namespace(MMIF_FILE=tmp_file, output=None, pretty=False)
+            )
+            self.assertEqual(output['mmif_version'], "http://mmif.clams.ai/1.0.0")
         finally:
             os.unlink(tmp_file)
 
-    def test_summarize_output_file(self):
-        tmp_input = self.create_temp_mmif_file(self.basic_mmif)
-        tmp_output = tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False)
-        tmp_output.close()
-        try:
-            args = self.parser.parse_args([tmp_input, "-o", tmp_output.name])
-            summarize.main(args)
-            # args.output is a path string now; no file handle to close.
-            with open(tmp_output.name, 'r') as f:
-                output = json.load(f)
-            self.assertIn('mmif_version', output)
-        finally:
-            os.unlink(tmp_input)
-            os.unlink(tmp_output.name)
-
-    def test_summarize_stdin(self):
-        mmif_str = self.basic_mmif.serialize()
-        import argparse
-        
-        with unittest.mock.patch('sys.stdin', io.StringIO(mmif_str)), \
-             unittest.mock.patch('sys.stdout', new=io.StringIO()) as stdout:
-            # MMIF_FILE defaults to None -> stdin
-            # output defaults to None -> stdout
-            args = argparse.Namespace(MMIF_FILE=None, output=None, pretty=False)
-            summarize.main(args)
-
-            output = json.loads(stdout.getvalue())
-            self.assertEqual(output['mmif_version'], "http://mmif.clams.ai/1.0.0")
-
 
 if __name__ == '__main__':
     unittest.main()

From 9ee0bd5ac5ecf7fee0b80e5e372b7fec8708dd00 Mon Sep 17 00:00:00 2001
From: Keigh Rim <keigh.rim@gmail.com>
Date: Mon, 16 Feb 2026 05:32:33 -0500
Subject: [PATCH 3/3] added human-friendly summary for pydantic classes in
 `describe --help`

---
 mmif/utils/cli/__init__.py | 136 +++++++++++++++++++++++++++++++------
 mmif/utils/cli/describe.py |  52 +++++++-------
 tests/test_utils_cli.py    |  34 +++++++---
 3 files changed, 162 insertions(+), 60 deletions(-)

diff --git a/mmif/utils/cli/__init__.py b/mmif/utils/cli/__init__.py
index 935ab0a7..f24248f2 100644
--- a/mmif/utils/cli/__init__.py
+++ b/mmif/utils/cli/__init__.py
@@ -6,16 +6,19 @@
 import io
 import os
 import sys
-from typing import Iterator, Optional, TextIO, cast
+from typing import Iterator, Optional, TextIO, Type, Union, cast, get_args, get_origin
+
+from pydantic import BaseModel
 
 
 @contextlib.contextmanager
-def open_cli_io_arg(path_or_dash: Optional[str],
-                    mode: str = 'r',
-                    encoding: Optional[str] = None,
-                    errors: Optional[str] = None,
-                    default_stdin: bool = False,
-                    ) -> Iterator[TextIO]:
+def open_cli_io_arg(
+    path_or_dash: Optional[str],
+    mode: str = "r",
+    encoding: Optional[str] = None,
+    errors: Optional[str] = None,
+    default_stdin: bool = False,
+) -> Iterator[TextIO]:
     """
     Context manager for opening files with stdin/stdout support.
 
@@ -55,10 +58,10 @@ def open_cli_io_arg(path_or_dash: Optional[str],
             f.write(content)
     """
     # Valid text modes for file operations
-    _READ_FLAGS = frozenset({'r', '+'})
-    _WRITE_FLAGS = frozenset({'w', 'a', 'x', '+'})
+    _READ_FLAGS = frozenset({"r", "+"})
+    _WRITE_FLAGS = frozenset({"w", "a", "x", "+"})
 
-    if 'b' in mode:
+    if "b" in mode:
         raise ValueError(
             f"Binary mode '{mode}' is not supported. "
             "Use text modes ('r', 'w', 'a', 'x') instead."
@@ -67,9 +70,7 @@ def open_cli_io_arg(path_or_dash: Optional[str],
     needs_read = bool(set(mode) & _READ_FLAGS)
     needs_write = bool(set(mode) & _WRITE_FLAGS)
 
-    should_use_stdio = path_or_dash == '-' or (
-        path_or_dash is None and default_stdin
-    )
+    should_use_stdio = path_or_dash == "-" or (path_or_dash is None and default_stdin)
 
     file_handle: Optional[TextIO] = None
     should_close = False
@@ -84,11 +85,7 @@ def open_cli_io_arg(path_or_dash: Optional[str],
 
             if needs_read:
                 # Check for missing input when stdin is a terminal
-                if (
-                    path_or_dash is None
-                    and default_stdin
-                    and sys.stdin.isatty()
-                ):
+                if path_or_dash is None and default_stdin and sys.stdin.isatty():
                     raise SystemExit("error: No input provided.")
                 file_handle = sys.stdin
 
@@ -97,14 +94,15 @@ def open_cli_io_arg(path_or_dash: Optional[str],
 
             else:
                 raise ValueError(
-                    f"Mode '{mode}' not supported with stdin/stdout "
-                    "(use 'r' or 'w')"
+                    f"Mode '{mode}' not supported with stdin/stdout (use 'r' or 'w')"
                 )
 
         elif isinstance(path_or_dash, str):
             if needs_read and not os.path.exists(path_or_dash):
                 raise FileNotFoundError(f"Input path does not exist: {path_or_dash}")
-            file_handle = cast(TextIO, io.open(path_or_dash, mode, encoding=encoding, errors=errors))
+            file_handle = cast(
+                TextIO, io.open(path_or_dash, mode, encoding=encoding, errors=errors)
+            )
             should_close = True
 
         elif path_or_dash is None:
@@ -126,6 +124,102 @@ def open_cli_io_arg(path_or_dash: Optional[str],
             file_handle.close()
 
 
+def generate_model_summary(model: Type[BaseModel], indent: int = 0) -> str:
+    lines = []
+    prefix = " " * indent
+
+    # model_fields is a dictionary of FieldInfo objects
+    for name, field in model.model_fields.items():
+        # Get the alias if available, otherwise use the field name
+        field_name = field.alias if field.alias else name
+
+        # Get type annotation
+        type_annotation = field.annotation
+
+        def format_type(t) -> str:
+            origin = get_origin(t)
+            args = get_args(t)
+
+            # Handle Optional (Union[T, None])
+            if origin is Union and type(None) in args:
+                non_none_args = [arg for arg in args if arg is not type(None)]
+                if len(non_none_args) == 1:
+                    return f"{format_type(non_none_args[0])}, optional"
+
+            # Handle List
+            if origin is list:
+                if args:
+                    return f"[{format_type(args[0])}]"
+                return "[]"
+
+            # Handle Dict
+            if origin is dict:
+                return "obj"
+
+            # Handle Pydantic Models (Custom Classes)
+            if isinstance(t, type) and issubclass(t, BaseModel):
+                return "obj"
+
+            # Handle basic types and cleanup
+            t_str = str(t)
+            if t_str.startswith("<class '"):
+                t_str = t_str[8:-2]
+            if t_str.startswith("typing."):
+                t_str = t_str[7:]
+
+            # Remove module prefix if present
+            if "." in t_str:
+                t_str = t_str.split(".")[-1]
+
+            return t_str
+
+        display_type = format_type(type_annotation)
+
+        description = field.description if field.description else ""
+
+        line_content = f"{prefix}- {field_name} ({display_type})"
+        if description:
+            line_content += f": {description}"
+        lines.append(line_content)
+
+        # Check if it's a Pydantic model or a list/dict of Pydantic models
+        origin = get_origin(type_annotation)
+        args = get_args(type_annotation)
+
+        nested_model = None
+        # Handle Optional wrappers for nesting check
+        check_type = type_annotation
+        if origin is Union and type(None) in args:
+            non_none_args = [arg for arg in args if arg is not type(None)]
+            if len(non_none_args) == 1:
+                check_type = non_none_args[0]
+                origin = get_origin(check_type)
+                args = get_args(check_type)
+
+        if isinstance(check_type, type) and issubclass(check_type, BaseModel):
+            nested_model = check_type
+        elif (
+            origin is list
+            and args
+            and isinstance(args[0], type)
+            and issubclass(args[0], BaseModel)
+        ):
+            nested_model = args[0]
+        elif (
+            origin is dict
+            and args
+            and len(args) > 1
+            and isinstance(args[1], type)
+            and issubclass(args[1], BaseModel)
+        ):
+            nested_model = args[1]
+
+        if nested_model:
+            lines.append(generate_model_summary(nested_model, indent + 4))
+
+    return "\n".join(lines)
+
+
 # keep imports of CLI modules for historical reasons
 # keep them here in the bottom to avoid circular imports
 from mmif.utils.cli import rewind
diff --git a/mmif/utils/cli/describe.py b/mmif/utils/cli/describe.py
index bb226a81..b8c79ced 100644
--- a/mmif/utils/cli/describe.py
+++ b/mmif/utils/cli/describe.py
@@ -3,11 +3,9 @@
 import sys
 import textwrap
 from pathlib import Path
-from typing import Dict, Type, Union, cast
+from typing import Union, cast
 
-from pydantic import BaseModel
-
-from mmif.utils.cli import open_cli_io_arg
+from mmif.utils.cli import open_cli_io_arg, generate_model_summary
 
 # gen_param_hash is imported for backward compatibility
 from mmif.utils.workflow_helper import (
@@ -18,12 +16,6 @@
     generate_workflow_identifier,
 )
 
-models_to_help = [SingleMmifDesc, CollectionMmifDesc]
-model_modules = set(model.__module__ for model in models_to_help)
-def get_all_models() -> Dict[str, Type[BaseModel]]:
-    return {
-        name: cls for name, cls in models_to_help
-    }
 
 def get_pipeline_specs(mmif_file: Union[str, Path]):
     import warnings
@@ -49,7 +41,15 @@ def describe_argparser():
     This command extracts workflow information from a single MMIF file or 
     a directory of MMIF files. The output is serialized as JSON.
     
-    Use `--help-schemas` to inspect the structure of the JSON output.
+    Output Schemas:
+    
+    1. Single MMIF File (mmif-file):
+{generate_model_summary(SingleMmifDesc, indent=4)}
+    
+    2. MMIF Collection (mmif-dir):
+{generate_model_summary(CollectionMmifDesc, indent=4)}
+    
+    Use `--help-schema` to inspect the full JSON schema for a specific output type.
     """)
     return oneliner, additional
 
@@ -79,13 +79,11 @@ def prep_argparser(**kwargs):
         help="Pretty-print JSON output"
     )
     parser.add_argument(
-        "--help-schemas",
-        nargs="*",
-        choices=["all"] + [m.__name__ for m in models_to_help],
+        "--help-schema",
+        nargs=1,
+        choices=["mmif-file", "mmif-dir"],
         metavar="SCHEMA_NAME",
-        help=f"Print the JSON schema for the output. For human-readable documentation, "
-             f"visit https://clams.ai/mmif-python and see the following modules: "
-             f"{', '.join(model_modules)}.\nOptions: all, {', '.join([m.__name__ for m in models_to_help])}."
+        help="Print the JSON schema for the output. Options: mmif-file, mmif-dir."
     )
     return parser
 
@@ -97,19 +95,15 @@ def main(args):
     :func:`describe_single_mmif` (for single file input) or 
     :func:`describe_mmif_collection` (for directory input).
     """
-    if hasattr(args, 'help_schemas') and args.help_schemas is not None:
-        models_map = {m.__name__: m for m in models_to_help}
-        to_show = []
-        if len(args.help_schemas) == 0 or 'all' in args.help_schemas:
-            to_show = [m.__name__ for m in models_to_help]
-        else:
-            to_show = args.help_schemas
+    if hasattr(args, 'help_schema') and args.help_schema is not None:
+        schema_name = args.help_schema[0]
+        if schema_name == 'mmif-file':
+            model_cls = SingleMmifDesc
+        elif schema_name == 'mmif-dir':
+            model_cls = CollectionMmifDesc
         
-        for name in to_show:
-            model_cls = models_map[name]
-            schema = model_cls.model_json_schema()
-            print(json.dumps(schema, indent=2))
-            print()
+        schema = model_cls.model_json_schema()
+        print(json.dumps(schema, indent=2))
         sys.exit(0)
 
     output = {}
diff --git a/tests/test_utils_cli.py b/tests/test_utils_cli.py
index 66c77c38..dd33fec2 100644
--- a/tests/test_utils_cli.py
+++ b/tests/test_utils_cli.py
@@ -78,7 +78,7 @@ def test_file_input_stdout_output(self):
                 MMIF_FILE=tmp_file,
                 output=None,
                 pretty=False,
-                help_schemas=None  # For describe module
+                help_schema=None  # For describe module
             )
             output = self.run_cli_capture_stdout(args)
             self.assertIsInstance(output, dict)
@@ -113,7 +113,7 @@ def test_stdin_input_stdout_output(self):
                 MMIF_FILE=None,
                 output=None,
                 pretty=False,
-                help_schemas=None  # For describe module
+                help_schema=None  # For describe module
             )
             self.cli_module.main(args)
             output = json.loads(stdout.getvalue())
@@ -289,18 +289,32 @@ class TestDescribe(BaseCliTestCase, IOTestMixin):
     cli_module = describe
     expected_output_keys = ['workflowId', 'stats', 'apps']
 
-    def test_help_schemas_all(self):
-        """Test --help-schemas all"""
-        from mmif.utils.cli.describe import models_to_help
+    def test_help_schema(self):
+        """Test --help-schema with different options"""
+        from mmif.utils.workflow_helper import SingleMmifDesc, CollectionMmifDesc
+        
+        # Test mmif-file
+        with unittest.mock.patch('sys.stdout', new=io.StringIO()) as stdout:
+            args = argparse.Namespace(help_schema=['mmif-file'], MMIF_FILE=None, output=None, pretty=False)
+            with self.assertRaises(SystemExit) as cm:
+                describe.main(args)
+            self.assertEqual(cm.exception.code, 0)
+            output = stdout.getvalue()
+            # Verify SingleMmifDesc schema keys are present
+            self.assertIn("workflowId", output)
+            self.assertIn("stats", output)
+            self.assertIn("apps", output)
+
+        # Test mmif-dir
         with unittest.mock.patch('sys.stdout', new=io.StringIO()) as stdout:
-            args = argparse.Namespace(help_schemas=['all'], MMIF_FILE=None, output=None, pretty=False)
+            args = argparse.Namespace(help_schema=['mmif-dir'], MMIF_FILE=None, output=None, pretty=False)
             with self.assertRaises(SystemExit) as cm:
                 describe.main(args)
             self.assertEqual(cm.exception.code, 0)
             output = stdout.getvalue()
-            for m in models_to_help:
-                self.assertIn(m.__name__, output)
-            self.assertIn("$defs", output)
+            # Verify CollectionMmifDesc schema keys are present
+            self.assertIn("mmifCountByStatus", output)
+            self.assertIn("workflows", output)
 
     def test_describe_main_directory(self):
         """Test describe.main with a directory input"""
@@ -313,7 +327,7 @@ def test_describe_main_directory(self):
             
             with unittest.mock.patch('sys.stdout', new=io.StringIO()) as stdout:
                 # MMIF_FILE argument expects a string path
-                args = argparse.Namespace(MMIF_FILE=tmp_dir, output=None, pretty=False, help_schemas=None)
+                args = argparse.Namespace(MMIF_FILE=tmp_dir, output=None, pretty=False, help_schema=None)
                 describe.main(args)
                 output_json = json.loads(stdout.getvalue())
                 # Just verify valid JSON output was produced