From 300f77b9b85cb50f7e53998437e54e64807d79c5 Mon Sep 17 00:00:00 2001
From: Aaron Steven White <aaronstevenwhite@gmail.com>
Date: Thu, 5 Feb 2026 19:32:46 -0500
Subject: [PATCH 01/11] Rewrites WordNet converter and loader to use enriched
 single-file JSONL output with supplementary sense and exception files.

---
 src/glazing/wordnet/converter.py     | 259 +++++++++++++++++
 src/glazing/wordnet/loader.py        | 264 ++++++++----------
 src/glazing/wordnet/models.py        |  15 +
 tests/test_wordnet/test_converter.py |   2 +-
 tests/test_wordnet/test_loader.py    | 185 +++++-------
 tests/test_wordnet/test_morphy.py    | 403 ++++-----------------------
 6 files changed, 523 insertions(+), 605 deletions(-)

diff --git a/src/glazing/wordnet/converter.py b/src/glazing/wordnet/converter.py
index f16112b..cdb498d 100644
--- a/src/glazing/wordnet/converter.py
+++ b/src/glazing/wordnet/converter.py
@@ -315,6 +315,120 @@ def parse_exception_file(self, filepath: Path | str) -> list[ExceptionEntry]:
 
         return entries
 
+    def parse_verb_framestext(self, filepath: Path | str) -> dict[int, str]:
+        """Parse verb.Framestext into a mapping of frame number to template string.
+
+        Parameters
+        ----------
+        filepath : Path | str
+            Path to verb.Framestext file.
+
+        Returns
+        -------
+        dict[int, str]
+            Mapping from frame number to template string.
+        """
+        filepath = Path(filepath)
+        if not filepath.exists():
+            return {}
+
+        frames: dict[int, str] = {}
+
+        with filepath.open("r", encoding="utf-8") as f:
+            for line_raw in f:
+                line = line_raw.strip()
+                if not line:
+                    continue
+
+                parts = line.split(None, 1)
+                if len(parts) < 2:
+                    continue
+
+                try:
+                    frame_num = int(parts[0])
+                    template = parts[1]
+                    frames[frame_num] = template
+                except ValueError:
+                    continue
+
+        return frames
+
+    def parse_verb_sentences(self, filepath: Path | str) -> dict[int, str]:
+        """Parse sents.vrb into a mapping of frame number to example sentence.
+
+        Parameters
+        ----------
+        filepath : Path | str
+            Path to sents.vrb file.
+
+        Returns
+        -------
+        dict[int, str]
+            Mapping from frame number to example sentence.
+        """
+        filepath = Path(filepath)
+        if not filepath.exists():
+            return {}
+
+        sentences: dict[int, str] = {}
+
+        with filepath.open("r", encoding="utf-8") as f:
+            for line_raw in f:
+                line = line_raw.strip()
+                if not line:
+                    continue
+
+                parts = line.split(None, 1)
+                if len(parts) < 2:
+                    continue
+
+                try:
+                    sent_num = int(parts[0])
+                    sentence = parts[1]
+                    sentences[sent_num] = sentence
+                except ValueError:
+                    continue
+
+        return sentences
+
+    def parse_cntlist(self, filepath: Path | str) -> dict[str, int]:
+        """Parse cntlist into a mapping of sense key to frequency count.
+
+        Parameters
+        ----------
+        filepath : Path | str
+            Path to cntlist file.
+
+        Returns
+        -------
+        dict[str, int]
+            Mapping from sense key to frequency count.
+        """
+        filepath = Path(filepath)
+        if not filepath.exists():
+            return {}
+
+        counts: dict[str, int] = {}
+
+        with filepath.open("r", encoding="utf-8") as f:
+            for line_raw in f:
+                line = line_raw.strip()
+                if not line:
+                    continue
+
+                parts = line.split()
+                if len(parts) < 2:
+                    continue
+
+                try:
+                    count = int(parts[0])
+                    sense_key = parts[1]
+                    counts[sense_key] = count
+                except ValueError:
+                    continue
+
+        return counts
+
     def convert_wordnet_database(
         self, wordnet_dir: Path | str, output_file: Path | str
     ) -> dict[str, int]:
@@ -363,6 +477,71 @@ def convert_wordnet_database(
                 all_synsets.extend(synsets)
                 counts[f"synsets_{pos_name}"] = len(synsets)
 
+        # Parse supplementary files for enrichment
+        framestext = self.parse_verb_framestext(wordnet_dir / "verb.Framestext")
+        sents = self.parse_verb_sentences(wordnet_dir / "sents.vrb")
+
+        # Build sense_key → (sense_number, tag_count) map from index.sense
+        sense_map: dict[str, tuple[int, int]] = {}
+        sense_index_file = wordnet_dir / "index.sense"
+        if sense_index_file.exists():
+            with sense_index_file.open("r", encoding="utf-8") as f:
+                for line_raw in f:
+                    line = line_raw.strip()
+                    if not line:
+                        continue
+                    parts = line.split()
+                    if len(parts) != 4:
+                        continue
+                    try:
+                        sk = parts[0]
+                        sense_number = int(parts[2])
+                        tag_count = int(parts[3])
+                        sense_map[sk] = (sense_number, tag_count)
+                    except ValueError:
+                        continue
+
+        # Parse cntlist to enhance tag_count data
+        cntlist = self.parse_cntlist(wordnet_dir / "cntlist")
+        for sk, count in cntlist.items():
+            if sk in sense_map:
+                sn, _ = sense_map[sk]
+                sense_map[sk] = (sn, count)
+            else:
+                sense_map[sk] = (0, count)
+
+        # ss_type to number mapping for sense key construction
+        ss_type_num_map: dict[str, int] = {
+            "n": 1,
+            "v": 2,
+            "a": 3,
+            "r": 4,
+            "s": 5,
+        }
+
+        # Enrich synsets with sense data and verb frame templates
+        for synset in all_synsets:
+            ss_num = ss_type_num_map.get(synset.ss_type, 1)
+
+            # Enrich words with sense_number and tag_count
+            for word in synset.words:
+                lemma_lower = word.lemma.lower()
+                sense_key = f"{lemma_lower}%{ss_num}:{synset.lex_filenum:02d}:{word.lex_id:02d}::"
+                if sense_key in sense_map:
+                    sn, tc = sense_map[sense_key]
+                    if sn > 0:
+                        word.sense_number = sn
+                    word.tag_count = tc
+
+            # Enrich verb frames with template and example_sentence
+            if synset.frames:
+                for frame in synset.frames:
+                    fn = frame.frame_number
+                    if fn in framestext:
+                        frame.template = framestext[fn]
+                    if fn in sents:
+                        frame.example_sentence = sents[fn]
+
         # Write all synsets to single output file
         with output_file.open("w", encoding="utf-8") as f:
             for synset in all_synsets:
@@ -372,6 +551,83 @@ def convert_wordnet_database(
 
         return counts
 
+    def convert_sense_index(self, wordnet_dir: Path | str, output_file: Path | str) -> int:
+        """Parse index.sense and output Sense objects to JSONL.
+
+        Parameters
+        ----------
+        wordnet_dir : Path | str
+            Directory containing WordNet database files.
+        output_file : Path | str
+            Output JSON Lines file path.
+
+        Returns
+        -------
+        int
+            Number of sense entries written.
+
+        Raises
+        ------
+        FileNotFoundError
+            If index.sense file does not exist.
+        """
+        wordnet_dir = Path(wordnet_dir)
+        output_file = Path(output_file)
+
+        output_file.parent.mkdir(parents=True, exist_ok=True)
+
+        sense_file = wordnet_dir / "index.sense"
+        senses = self.parse_sense_index(sense_file)
+
+        with output_file.open("w", encoding="utf-8") as f:
+            for sense in senses:
+                f.write(f"{sense.model_dump_json()}\n")
+
+        return len(senses)
+
+    def convert_exceptions(self, wordnet_dir: Path | str, output_file: Path | str) -> int:
+        """Parse *.exc files and output ExceptionEntry objects to JSONL.
+
+        Parameters
+        ----------
+        wordnet_dir : Path | str
+            Directory containing WordNet database files.
+        output_file : Path | str
+            Output JSON Lines file path.
+
+        Returns
+        -------
+        int
+            Number of exception entries written.
+        """
+        wordnet_dir = Path(wordnet_dir)
+        output_file = Path(output_file)
+
+        output_file.parent.mkdir(parents=True, exist_ok=True)
+
+        all_entries: list[ExceptionEntry] = []
+
+        exc_files: list[tuple[str, WordNetPOS]] = [
+            ("noun.exc", "n"),
+            ("verb.exc", "v"),
+            ("adj.exc", "a"),
+            ("adv.exc", "r"),
+        ]
+
+        for exc_name, pos in exc_files:
+            exc_path = wordnet_dir / exc_name
+            if exc_path.exists():
+                entries = self.parse_exception_file(exc_path)
+                for entry in entries:
+                    entry.pos = pos
+                all_entries.extend(entries)
+
+        with output_file.open("w", encoding="utf-8") as f:
+            for entry in all_entries:
+                f.write(f"{entry.model_dump_json()}\n")
+
+        return len(all_entries)
+
     def _parse_data_line(self, line: str) -> Synset | None:
         """Parse a line from WordNet data file.
 
@@ -458,6 +714,9 @@ def _parse_data_line(self, line: str) -> Synset | None:
             if ss_type == "v" and idx < len(parts):
                 frames = []
 
+                # Skip frame count field
+                idx += 1
+
                 # Parse frames until no more "+" markers
                 while idx + 2 < len(parts) and parts[idx] == "+":
                     frame_marker = parts[idx]  # "+"
diff --git a/src/glazing/wordnet/loader.py b/src/glazing/wordnet/loader.py
index 82b86fb..625f85b 100644
--- a/src/glazing/wordnet/loader.py
+++ b/src/glazing/wordnet/loader.py
@@ -32,7 +32,6 @@
 import json
 from collections import defaultdict
 from pathlib import Path
-from typing import cast
 
 from pydantic import ValidationError
 
@@ -40,7 +39,6 @@
 from glazing.utils.cache import LRUCache
 from glazing.wordnet.models import (
     ExceptionEntry,
-    IndexEntry,
     Sense,
     Synset,
 )
@@ -62,7 +60,7 @@ class WordNetLoader:
     Parameters
     ----------
     data_path : Path | str | None, optional
-        Path to directory containing WordNet JSON Lines files.
+        Path to the WordNet JSONL file (e.g., wordnet.jsonl).
         If None, uses default path from environment.
     lazy : bool, default=False
         If True, load synsets on demand rather than all at once.
@@ -76,8 +74,8 @@ class WordNetLoader:
     ----------
     synsets : dict[SynsetOffset, Synset]
         All loaded synsets indexed by offset.
-    lemma_index : dict[str, dict[WordNetPOS, list[IndexEntry]]]
-        Index from lemmas to their index entries by POS.
+    lemma_index : dict[str, dict[WordNetPOS, list[SynsetOffset]]]
+        Index from lemmas to synset offsets by POS.
     sense_index : dict[SenseKey, Sense]
         Index from sense keys to sense objects.
     exceptions : dict[WordNetPOS, dict[str, list[str]]]
@@ -120,7 +118,7 @@ def __init__(
         Parameters
         ----------
         data_path : Path | str | None, optional
-            Path to directory containing WordNet JSON Lines files.
+            Path to the WordNet JSONL file (e.g., wordnet.jsonl).
             If None, uses default path from environment.
         lazy : bool, default=False
             If True, load synsets on demand.
@@ -138,7 +136,7 @@ def __init__(
 
         # Core data structures
         self.synsets: dict[SynsetOffset, Synset] = {}
-        self.lemma_index: dict[str, dict[WordNetPOS, list[IndexEntry]]] = defaultdict(dict)
+        self.lemma_index: dict[str, dict[WordNetPOS, list[SynsetOffset]]] = defaultdict(dict)
         self.sense_index: dict[SenseKey, Sense] = {}
         self.exceptions: dict[WordNetPOS, dict[str, list[str]]] = {}
 
@@ -148,8 +146,8 @@ def __init__(
         self.meronym_index: dict[SynsetOffset, list[SynsetOffset]] = defaultdict(list)
         self.holonym_index: dict[SynsetOffset, list[SynsetOffset]] = defaultdict(list)
 
-        # File paths for lazy loading
-        self._synset_file_index: dict[SynsetOffset, tuple[Path, int]] = {}
+        # File index for lazy loading (offset -> byte position in file)
+        self._synset_file_index: dict[SynsetOffset, int] = {}
 
         # Cache for lazy loading
         if lazy:
@@ -167,33 +165,32 @@ def __init__(
     def load(self) -> None:
         """Load all WordNet data from JSON Lines files.
 
-        This method loads synsets, builds indices, loads exceptions,
-        and constructs relation graphs. If lazy loading is enabled,
-        it only builds the file index without loading synset data.
+        This method loads synsets from the primary JSONL file, builds
+        lemma and relation indices from loaded data, and optionally loads
+        supplementary sense and exception data.
 
         Raises
         ------
         FileNotFoundError
-            If data directory or required files don't exist.
+            If the primary JSONL file doesn't exist.
         ValidationError
             If JSON data doesn't match expected schema.
         """
         if self._loaded:
             return
 
-        # Load synsets
+        # Load synsets from single JSONL file
         if self.lazy:
             self._build_file_index()
         else:
             self._load_all_synsets()
 
-        # Load index files
-        self._load_index_files()
+        # Build lemma index from loaded synsets
+        if not self.lazy:
+            self._build_lemma_index()
 
-        # Load sense index
+        # Load supplementary data if available
         self._load_sense_index()
-
-        # Load exceptions
         self._load_exceptions()
 
         # Build relation indices
@@ -203,45 +200,44 @@ def load(self) -> None:
         self._loaded = True
 
     def _load_all_synsets(self) -> None:
-        """Load all synsets from JSON Lines files."""
-        for pos in ["noun", "verb", "adj", "adv"]:
-            synset_file = self.data_path / f"data.{pos}.jsonl"
-            if not synset_file.exists():
-                continue
-
-            with synset_file.open(encoding="utf-8") as f:
-                for line in f:
-                    if not line.strip():
-                        continue
-
-                    try:
-                        data = json.loads(line)
-                        synset = Synset.model_validate(data)
-                        self.synsets[synset.offset] = synset
-                    except (json.JSONDecodeError, ValidationError) as e:
-                        # Log error but continue loading
-                        print(f"Error loading synset: {e}")
+        """Load all synsets from single JSONL file."""
+        if not self.data_path.exists():
+            return
+
+        with self.data_path.open(encoding="utf-8") as f:
+            for line in f:
+                if not line.strip():
+                    continue
+
+                try:
+                    data = json.loads(line)
+                    synset = Synset.model_validate(data)
+                    self.synsets[synset.offset] = synset
+                except (json.JSONDecodeError, ValidationError):
+                    continue
 
     def _build_file_index(self) -> None:
-        """Build index of synset locations for lazy loading."""
-        for pos in ["noun", "verb", "adj", "adv"]:
-            synset_file = self.data_path / f"data.{pos}.jsonl"
-            if not synset_file.exists():
-                continue
-
-            with synset_file.open(encoding="utf-8") as f:
-                for line_num, line in enumerate(f):
-                    if not line.strip():
-                        continue
-
-                    try:
-                        # Just extract offset without full validation
-                        data = json.loads(line)
-                        offset = data.get("offset")
-                        if offset:
-                            self._synset_file_index[offset] = (synset_file, line_num)
-                    except json.JSONDecodeError:
-                        pass
+        """Build byte-offset index for lazy loading from single JSONL file."""
+        if not self.data_path.exists():
+            return
+
+        with self.data_path.open(encoding="utf-8") as f:
+            while True:
+                byte_pos = f.tell()
+                line = f.readline()
+                if not line:
+                    break
+
+                if not line.strip():
+                    continue
+
+                try:
+                    data = json.loads(line)
+                    offset = data.get("offset")
+                    if offset:
+                        self._synset_file_index[offset] = byte_pos
+                except json.JSONDecodeError:
+                    pass
 
     def _load_synset_lazy(self, offset: SynsetOffset) -> Synset | None:
         """Load a single synset on demand.
@@ -261,61 +257,44 @@ def _load_synset_lazy(self, offset: SynsetOffset) -> Synset | None:
 
         # Check cache first
         if self._cache is not None:
-            # Create cache key from offset (cache expects strings)
             cached = self._cache.get(offset)
             if cached is not None:
                 return cached
 
-        # Load from file
-        file_info = self._synset_file_index.get(offset)
-        if not file_info:
+        # Load from file using byte offset
+        byte_pos = self._synset_file_index.get(offset)
+        if byte_pos is None:
             return None
 
-        synset_file, line_num = file_info
-
         try:
-            with synset_file.open(encoding="utf-8") as f:
-                for i, line in enumerate(f):
-                    if i == line_num:
-                        data = json.loads(line)
-                        synset = Synset.model_validate(data)
+            with self.data_path.open(encoding="utf-8") as f:
+                f.seek(byte_pos)
+                line = f.readline()
+                data = json.loads(line)
+                synset = Synset.model_validate(data)
 
-                        # Cache it
-                        if self._cache is not None:
-                            self._cache.put(offset, synset)
+                # Cache it
+                if self._cache is not None:
+                    self._cache.put(offset, synset)
 
-                        return synset
+                return synset
         except (json.JSONDecodeError, ValidationError):
             return None
 
-        return None
-
-    def _load_index_files(self) -> None:
-        """Load lemma index files."""
-        for pos_name, pos_tag in [("noun", "n"), ("verb", "v"), ("adj", "a"), ("adv", "r")]:
-            index_file = self.data_path / f"index.{pos_name}.jsonl"
-            if not index_file.exists():
-                continue
-
-            with index_file.open(encoding="utf-8") as f:
-                for line in f:
-                    if not line.strip():
-                        continue
-
-                    try:
-                        data = json.loads(line)
-                        entry = IndexEntry.model_validate(data)
-
-                        # Add to lemma index
-                        if pos_tag not in self.lemma_index[entry.lemma]:
-                            self.lemma_index[entry.lemma][cast(WordNetPOS, pos_tag)] = []
-                        self.lemma_index[entry.lemma][cast(WordNetPOS, pos_tag)].append(entry)
-                    except (json.JSONDecodeError, ValidationError) as e:
-                        print(f"Error loading index entry: {e}")
+    def _build_lemma_index(self) -> None:
+        """Build lemma→synset index from loaded synset data."""
+        for synset in self.synsets.values():
+            pos = synset.ss_type
+            for word in synset.words:
+                lemma = word.lemma.lower()
+                if pos not in self.lemma_index[lemma]:
+                    self.lemma_index[lemma][pos] = []
+                if synset.offset not in self.lemma_index[lemma][pos]:
+                    self.lemma_index[lemma][pos].append(synset.offset)
 
     def _load_sense_index(self) -> None:
-        """Load sense index file."""
-        sense_file = self.data_path / "index.sense.jsonl"
+        """Load sense index from supplementary JSONL file."""
+        sense_file = self.data_path.parent / "wordnet_senses.jsonl"
         if not sense_file.exists():
             return
 
@@ -328,31 +307,32 @@ def _load_sense_index(self) -> None:
                     data = json.loads(line)
                     sense = Sense.model_validate(data)
                     self.sense_index[sense.sense_key] = sense
-                except (json.JSONDecodeError, ValidationError) as e:
-                    print(f"Error loading sense: {e}")
+                except (json.JSONDecodeError, ValidationError):
+                    continue
 
     def _load_exceptions(self) -> None:
-        """Load morphological exception files."""
-        for pos_name, pos_tag in [("noun", "n"), ("verb", "v"), ("adj", "a"), ("adv", "r")]:
-            exc_file = self.data_path / f"{pos_name}.exc.jsonl"
-            if not exc_file.exists():
-                continue
-
-            if pos_tag not in self.exceptions:
-                self.exceptions[cast(WordNetPOS, pos_tag)] = {}
-
-            with exc_file.open(encoding="utf-8") as f:
-                for line in f:
-                    if not line.strip():
-                        continue
-
-                    try:
-                        data = json.loads(line)
-                        entry = ExceptionEntry.model_validate(data)
-                        pos_exceptions = self.exceptions[cast(WordNetPOS, pos_tag)]
-                        pos_exceptions[entry.inflected_form] = entry.base_forms
-                    except (json.JSONDecodeError, ValidationError) as e:
-                        print(f"Error loading exception: {e}")
+        """Load morphological exceptions from supplementary JSONL file."""
+        exc_file = self.data_path.parent / "wordnet_exceptions.jsonl"
+        if not exc_file.exists():
+            return
+
+        with exc_file.open(encoding="utf-8") as f:
+            for line in f:
+                if not line.strip():
+                    continue
+
+                try:
+                    data = json.loads(line)
+                    entry = ExceptionEntry.model_validate(data)
+                    # Determine POS from the base form by looking up in synsets
+                    # Store under all POS for simplicity (exceptions file doesn't have POS)
+                    pos = data.get("pos")
+                    if pos and pos in ("n", "v", "a", "r", "s"):
+                        if pos not in self.exceptions:
+                            self.exceptions[pos] = {}
+                        self.exceptions[pos][entry.inflected_form] = entry.base_forms
+                except (json.JSONDecodeError, ValidationError):
+                    continue
 
     def _build_relation_indices(self) -> None:
         """Build relation indices for efficient traversal."""
@@ -371,12 +351,12 @@ def _build_relation_indices(self) -> None:
                         self.hypernym_index[pointer.offset].append(synset.offset)
 
                 # Meronym/holonym relations
-                elif pointer.symbol in ["%m", "%s", "%p"]:
+                elif pointer.symbol in ("%m", "%s", "%p"):
                     if pointer.offset not in self.meronym_index[synset.offset]:
                         self.meronym_index[synset.offset].append(pointer.offset)
                     if synset.offset not in self.holonym_index[pointer.offset]:
                         self.holonym_index[pointer.offset].append(synset.offset)
-                elif pointer.symbol in ["#m", "#s", "#p"]:
+                elif pointer.symbol in ("#m", "#s", "#p"):
                     if pointer.offset not in self.holonym_index[synset.offset]:
                         self.holonym_index[synset.offset].append(pointer.offset)
                     if synset.offset not in self.meronym_index[pointer.offset]:
@@ -426,23 +406,24 @@ def get_synsets_by_lemma(self, lemma: str, pos: WordNetPOS | None = None) -> lis
         ...     print(synset.gloss)
         """
         synsets: list[Synset] = []
+        lemma_lower = lemma.lower()
 
-        if lemma not in self.lemma_index:
+        if lemma_lower not in self.lemma_index:
             return synsets
 
         # Get POS tags to search
+        pos_tags: list[WordNetPOS]
         if pos:
-            pos_tags = [pos] if pos in self.lemma_index[lemma] else []
+            pos_tags = [pos] if pos in self.lemma_index[lemma_lower] else []
         else:
-            pos_tags = list(self.lemma_index[lemma].keys())
+            pos_tags = list(self.lemma_index[lemma_lower].keys())
 
-        # Collect synsets
+        # Collect synsets from offset lists
         for pos_tag in pos_tags:
-            for entry in self.lemma_index[lemma][pos_tag]:
-                for offset in entry.synset_offsets:
-                    synset = self.get_synset(offset)
-                    if synset:
-                        synsets.append(synset)
+            for offset in self.lemma_index[lemma_lower].get(pos_tag, []):
+                synset = self.get_synset(offset)
+                if synset:
+                    synsets.append(synset)
 
         return synsets
 
@@ -479,7 +460,7 @@ def get_senses_by_lemma(self, lemma: str, pos: WordNetPOS | None = None) -> list
         Returns
         -------
         list[Sense]
-            List of senses for the lemma.
+            List of senses for the lemma, sorted by sense number.
 
         Examples
         --------
@@ -489,18 +470,9 @@ def get_senses_by_lemma(self, lemma: str, pos: WordNetPOS | None = None) -> list
         """
         senses = []
 
-        # Get synsets first
-        synsets = self.get_synsets_by_lemma(lemma, pos)
-
-        # Extract senses from synsets
-        for synset in synsets:
-            for word in synset.words:
-                if word.lemma == lemma:
-                    # Try to find corresponding sense
-                    for _key, sense in self.sense_index.items():
-                        if sense.lemma == lemma and sense.synset_offset == synset.offset:
-                            senses.append(sense)
-                            break
+        for sense in self.sense_index.values():
+            if sense.lemma == lemma and (pos is None or sense.ss_type == pos):
+                senses.append(sense)
 
         # Sort by sense number (frequency order)
         senses.sort(key=lambda s: s.sense_number)
@@ -611,7 +583,7 @@ def load_wordnet(
     Parameters
     ----------
     data_path : Path | str
-        Path to directory containing WordNet JSON Lines files.
+        Path to the WordNet JSONL file (e.g., wordnet.jsonl).
     lazy : bool, default=False
         If True, load synsets on demand.
     cache_size : int, default=1000
@@ -624,7 +596,7 @@ def load_wordnet(
 
     Examples
     --------
-    >>> wn = load_wordnet("data/wordnet")
+    >>> wn = load_wordnet("data/wordnet.jsonl")
     >>> dog = wn.get_synsets_by_lemma("dog", "n")[0]
     >>> print(dog.gloss)
     """
diff --git a/src/glazing/wordnet/models.py b/src/glazing/wordnet/models.py
index bdc58c0..2336d3a 100644
--- a/src/glazing/wordnet/models.py
+++ b/src/glazing/wordnet/models.py
@@ -66,6 +66,10 @@ class Word(GlazingBaseModel):
         Word form (lowercase, underscores for spaces).
     lex_id : LexID
         Distinguishes same word in synset (0-15).
+    sense_number : int | None, default=None
+        Frequency-based sense ordering from index.sense.
+    tag_count : int, default=0
+        Semantic concordance tag count.
 
     Examples
     --------
@@ -78,6 +82,8 @@ class Word(GlazingBaseModel):
 
     lemma: str = Field(description="Word form (lowercase, underscores for spaces)")
     lex_id: LexID = Field(description="Lexical ID distinguishing same word in synset")
+    sense_number: int | None = Field(default=None, description="Frequency-based sense ordering")
+    tag_count: int = Field(default=0, ge=0, description="Semantic concordance tag count")
 
     @field_validator("lemma")
     @classmethod
@@ -177,6 +183,10 @@ class VerbFrame(GlazingBaseModel):
         Frame number (1-35).
     word_indices : list[int]
         Word indices (0 = all words, or specific indices).
+    template : str | None, default=None
+        Natural language frame template (e.g., "Something ----s").
+    example_sentence : str | None, default=None
+        Example sentence with %s placeholder for verb.
 
     Examples
     --------
@@ -189,6 +199,10 @@ class VerbFrame(GlazingBaseModel):
     word_indices: list[int] = Field(
         default_factory=list, description="Word indices (0 = all words)"
     )
+    template: str | None = Field(default=None, description="Natural language frame template")
+    example_sentence: str | None = Field(
+        default=None, description="Example sentence with %s placeholder"
+    )
 
     @field_validator("word_indices")
     @classmethod
@@ -517,6 +531,7 @@ class ExceptionEntry(GlazingBaseModel):
 
     inflected_form: str = Field(description="Inflected/irregular form")
     base_forms: list[str] = Field(description="Base/lemma forms")
+    pos: WordNetPOS | None = Field(default=None, description="Part of speech")
 
     @field_validator("inflected_form", "base_forms")
     @classmethod
diff --git a/tests/test_wordnet/test_converter.py b/tests/test_wordnet/test_converter.py
index a08b4a7..f4bc050 100644
--- a/tests/test_wordnet/test_converter.py
+++ b/tests/test_wordnet/test_converter.py
@@ -30,7 +30,7 @@ def sample_data_file_content(self):
         return """  Copyright notice and license text here
   More license text
 00001740 29 v 01 breathe 0 005 $ 00001740 v 0000 @ 00002084 v 0000 ~ 00001740 v 0000 + 00002760 v 0000 ^ 00001740 v 0000 | take in and expel air through lungs
-00002084 29 v 02 respire 0 breathe 1 003 $ 00001740 v 0000 @ 00002325 v 0000 ~ 00002760 v 0000 + 01 00 + 02 01 | undergo the biomedical and metabolic processes of respiration by taking up oxygen and producing carbon monoxide
+00002084 29 v 02 respire 0 breathe 1 003 $ 00001740 v 0000 @ 00002325 v 0000 ~ 00002760 v 0000 02 + 01 00 + 02 01 | undergo the biomedical and metabolic processes of respiration by taking up oxygen and producing carbon monoxide
 """
 
     @pytest.fixture
diff --git a/tests/test_wordnet/test_loader.py b/tests/test_wordnet/test_loader.py
index 0c92c77..63084c0 100644
--- a/tests/test_wordnet/test_loader.py
+++ b/tests/test_wordnet/test_loader.py
@@ -13,12 +13,12 @@ class TestWordNetLoader:
     """Test WordNet loader functionality."""
 
     @pytest.fixture
-    def temp_data_dir(self):
-        """Create temporary directory with test data."""
+    def temp_data_file(self):
+        """Create temporary directory with test data in single-file format."""
         with tempfile.TemporaryDirectory() as tmpdir:
             data_path = Path(tmpdir)
 
-            # Create test synset data
+            # All synsets go into a single wordnet.jsonl file
             synsets_data = [
                 {
                     "offset": "00001740",
@@ -45,75 +45,27 @@ def temp_data_dir(self):
                     ],
                     "gloss": "an entity that has physical existence",
                 },
+                {
+                    "offset": "00002325",
+                    "lex_filenum": 29,
+                    "lex_filename": "verb.body",
+                    "ss_type": "v",
+                    "words": [{"lemma": "run", "lex_id": 0}, {"lemma": "go", "lex_id": 1}],
+                    "pointers": [],
+                    "frames": [
+                        {"frame_number": 1, "word_indices": [0]},
+                        {"frame_number": 2, "word_indices": [0, 1]},
+                    ],
+                    "gloss": "move fast by using one's feet",
+                },
             ]
 
-            # Write noun synsets
-            with open(data_path / "data.noun.jsonl", "w") as f:
+            wordnet_file = data_path / "wordnet.jsonl"
+            with open(wordnet_file, "w") as f:
                 for synset in synsets_data:
                     f.write(json.dumps(synset) + "\n")
 
-            # Create test verb synset
-            verb_synset = {
-                "offset": "00002325",
-                "lex_filenum": 29,
-                "lex_filename": "verb.body",
-                "ss_type": "v",
-                "words": [{"lemma": "run", "lex_id": 0}, {"lemma": "go", "lex_id": 1}],
-                "pointers": [],
-                "frames": [
-                    {"frame_number": 1, "word_indices": [0]},
-                    {"frame_number": 2, "word_indices": [0, 1]},
-                ],
-                "gloss": "move fast by using one's feet",
-            }
-
-            with open(data_path / "data.verb.jsonl", "w") as f:
-                f.write(json.dumps(verb_synset) + "\n")
-
-            # Create index entries
-            index_data = [
-                {
-                    "lemma": "entity",
-                    "pos": "n",
-                    "synset_cnt": 1,
-                    "p_cnt": 1,
-                    "ptr_symbols": ["~"],
-                    "sense_cnt": 1,
-                    "tagsense_cnt": 0,
-                    "synset_offsets": ["00001740"],
-                },
-                {
-                    "lemma": "physical_entity",
-                    "pos": "n",
-                    "synset_cnt": 1,
-                    "p_cnt": 1,
-                    "ptr_symbols": ["@"],
-                    "sense_cnt": 1,
-                    "tagsense_cnt": 0,
-                    "synset_offsets": ["00001930"],
-                },
-            ]
-
-            with open(data_path / "index.noun.jsonl", "w") as f:
-                for entry in index_data:
-                    f.write(json.dumps(entry) + "\n")
-
-            # Create verb index
-            verb_index = {
-                "lemma": "run",
-                "pos": "v",
-                "synset_cnt": 1,
-                "p_cnt": 0,
-                "ptr_symbols": [],
-                "sense_cnt": 1,
-                "tagsense_cnt": 0,
-                "synset_offsets": ["00002325"],
-            }
-
-            with open(data_path / "index.verb.jsonl", "w") as f:
-                f.write(json.dumps(verb_index) + "\n")
-
-            # Create sense index
+            # Create sense index (supplementary file alongside primary)
             sense_data = [
                 {
                     "sense_key": "entity%1:03:00::",
@@ -137,40 +89,36 @@ def temp_data_dir(self):
                 },
             ]
 
-            with open(data_path / "index.sense.jsonl", "w") as f:
+            with open(data_path / "wordnet_senses.jsonl", "w") as f:
                 for sense in sense_data:
                     f.write(json.dumps(sense) + "\n")
 
-            # Create exception entries
+            # Create exception entries (with pos field)
             exc_data = [
-                {"inflected_form": "children", "base_forms": ["child"]},
-                {"inflected_form": "geese", "base_forms": ["goose"]},
+                {"inflected_form": "children", "base_forms": ["child"], "pos": "n"},
+                {"inflected_form": "geese", "base_forms": ["goose"], "pos": "n"},
+                {"inflected_form": "ran", "base_forms": ["run"], "pos": "v"},
             ]
 
-            with open(data_path / "noun.exc.jsonl", "w") as f:
+            with open(data_path / "wordnet_exceptions.jsonl", "w") as f:
                 for exc in exc_data:
                     f.write(json.dumps(exc) + "\n")
 
-            verb_exc = {"inflected_form": "ran", "base_forms": ["run"]}
-
-            with open(data_path / "verb.exc.jsonl", "w") as f:
-                f.write(json.dumps(verb_exc) + "\n")
+            yield wordnet_file
 
-            yield data_path
-
-    def test_loader_initialization(self, temp_data_dir):
+    def test_loader_initialization(self, temp_data_file):
         """Test loader initialization without autoload."""
-        loader = WordNetLoader(temp_data_dir, autoload=False)
+        loader = WordNetLoader(temp_data_file, autoload=False)
 
-        assert loader.data_path == temp_data_dir
+        assert loader.data_path == temp_data_file
         assert loader.lazy is False
         assert loader.cache_size == 1000
         assert not loader._loaded
         assert len(loader.synsets) == 0
 
-    def test_load_synsets(self, temp_data_dir):
+    def test_load_synsets(self, temp_data_file):
         """Test loading synsets from JSON Lines."""
-        loader = WordNetLoader(temp_data_dir)
+        loader = WordNetLoader(temp_data_file)
         loader.load()
 
         # Check synsets loaded
@@ -186,23 +134,32 @@ def test_load_synsets(self, temp_data_dir):
         assert entity.words[0].lemma == "entity"
         assert len(entity.pointers) == 1
 
-    def test_load_index(self, temp_data_dir):
-        """Test loading index files."""
-        loader = WordNetLoader(temp_data_dir)
+    def test_load_lemma_index(self, temp_data_file):
+        """Test building lemma index from synset data."""
+        loader = WordNetLoader(temp_data_file)
         loader.load()
 
-        # Check lemma index
+        # Check lemma index built from synset words
         assert "entity" in loader.lemma_index
         assert "n" in loader.lemma_index["entity"]
         assert len(loader.lemma_index["entity"]["n"]) == 1
 
-        entry = loader.lemma_index["entity"]["n"][0]
-        assert entry.lemma == "entity"
-        assert entry.synset_offsets == ["00001740"]
+        # lemma_index values are SynsetOffset strings now
+        offset = loader.lemma_index["entity"]["n"][0]
+        assert offset == "00001740"
+
+        # Check verb lemmas
+        assert "run" in loader.lemma_index
+        assert "v" in loader.lemma_index["run"]
+        assert loader.lemma_index["run"]["v"][0] == "00002325"
+
+        # "go" should also be indexed
+        assert "go" in loader.lemma_index
+        assert "v" in loader.lemma_index["go"]
 
-    def test_load_sense_index(self, temp_data_dir):
+    def test_load_sense_index(self, temp_data_file):
         """Test loading sense index."""
-        loader = WordNetLoader(temp_data_dir)
+        loader = WordNetLoader(temp_data_file)
         loader.load()
 
         # Check sense index
@@ -212,9 +169,9 @@ def test_load_sense_index(self, temp_data_dir):
         assert sense.synset_offset == "00001740"
         assert sense.sense_number == 1
 
-    def test_load_exceptions(self, temp_data_dir):
+    def test_load_exceptions(self, temp_data_file):
         """Test loading exception files."""
-        loader = WordNetLoader(temp_data_dir)
+        loader = WordNetLoader(temp_data_file)
         loader.load()
 
         # Check noun exceptions
@@ -227,9 +184,9 @@ def test_load_exceptions(self, temp_data_dir):
         assert "ran" in loader.exceptions["v"]
         assert loader.exceptions["v"]["ran"] == ["run"]
 
-    def test_build_relation_indices(self, temp_data_dir):
+    def test_build_relation_indices(self, temp_data_file):
         """Test building relation indices."""
-        loader = WordNetLoader(temp_data_dir)
+        loader = WordNetLoader(temp_data_file)
         loader.load()
 
         # Check hypernym index
@@ -240,9 +197,9 @@ def test_build_relation_indices(self, temp_data_dir):
         assert "00001740" in loader.hyponym_index
         assert "00001930" in loader.hyponym_index["00001740"]
 
-    def test_get_synset(self, temp_data_dir):
+    def test_get_synset(self, temp_data_file):
         """Test getting synset by offset."""
-        loader = WordNetLoader(temp_data_dir)
+        loader = WordNetLoader(temp_data_file)
         loader.load()
 
         synset = loader.get_synset("00001740")
@@ -254,9 +211,9 @@ def test_get_synset(self, temp_data_dir):
         synset = loader.get_synset("99999999")
         assert synset is None
 
-    def test_get_synsets_by_lemma(self, temp_data_dir):
+    def test_get_synsets_by_lemma(self, temp_data_file):
         """Test getting synsets by lemma."""
-        loader = WordNetLoader(temp_data_dir)
+        loader = WordNetLoader(temp_data_file)
         loader.load()
 
         # Test noun
@@ -277,9 +234,9 @@ def test_get_synsets_by_lemma(self, temp_data_dir):
         synsets = loader.get_synsets_by_lemma("nonexistent")
         assert len(synsets) == 0
 
-    def test_get_sense_by_key(self, temp_data_dir):
+    def test_get_sense_by_key(self, temp_data_file):
         """Test getting sense by key."""
-        loader = WordNetLoader(temp_data_dir)
+        loader = WordNetLoader(temp_data_file)
         loader.load()
 
         sense = loader.get_sense_by_key("entity%1:03:00::")
@@ -291,9 +248,9 @@ def test_get_sense_by_key(self, temp_data_dir):
         sense = loader.get_sense_by_key("nonexistent%1:00:00::")
         assert sense is None
 
-    def test_get_senses_by_lemma(self, temp_data_dir):
+    def test_get_senses_by_lemma(self, temp_data_file):
         """Test getting senses by lemma."""
-        loader = WordNetLoader(temp_data_dir)
+        loader = WordNetLoader(temp_data_file)
         loader.load()
 
         senses = loader.get_senses_by_lemma("entity", "n")
@@ -304,9 +261,9 @@ def test_get_senses_by_lemma(self, temp_data_dir):
         assert len(senses) == 1
         assert senses[0].sense_key == "run%2:38:00::"
 
-    def test_get_hypernyms(self, temp_data_dir):
+    def test_get_hypernyms(self, temp_data_file):
         """Test getting hypernyms."""
-        loader = WordNetLoader(temp_data_dir)
+        loader = WordNetLoader(temp_data_file)
         loader.load()
 
         synset = loader.get_synset("00001930")
@@ -314,9 +271,9 @@ def test_get_hypernyms(self, temp_data_dir):
         assert len(hypernyms) == 1
         assert hypernyms[0].offset == "00001740"
 
-    def test_get_hyponyms(self, temp_data_dir):
+    def test_get_hyponyms(self, temp_data_file):
         """Test getting hyponyms."""
-        loader = WordNetLoader(temp_data_dir)
+        loader = WordNetLoader(temp_data_file)
         loader.load()
 
         synset = loader.get_synset("00001740")
@@ -324,9 +281,9 @@ def test_get_hyponyms(self, temp_data_dir):
         assert len(hyponyms) == 1
         assert hyponyms[0].offset == "00001930"
 
-    def test_lazy_loading(self, temp_data_dir):
+    def test_lazy_loading(self, temp_data_file):
         """Test lazy loading mode."""
-        loader = WordNetLoader(temp_data_dir, lazy=True, cache_size=2)
+        loader = WordNetLoader(temp_data_file, lazy=True, cache_size=2)
         loader.load()
 
         # Synsets should not be loaded yet
@@ -346,9 +303,9 @@ def test_lazy_loading(self, temp_data_dir):
         assert cached is not None
         assert cached.offset == "00001740"
 
-    def test_get_exceptions(self, temp_data_dir):
+    def test_get_exceptions(self, temp_data_file):
         """Test getting morphological exceptions."""
-        loader = WordNetLoader(temp_data_dir)
+        loader = WordNetLoader(temp_data_file)
         loader.load()
 
         noun_exc = loader.get_exceptions("n")
@@ -363,9 +320,9 @@ def test_get_exceptions(self, temp_data_dir):
         adv_exc = loader.get_exceptions("r")
         assert len(adv_exc) == 0
 
-    def test_load_wordnet_function(self, temp_data_dir):
+    def test_load_wordnet_function(self, temp_data_file):
         """Test the convenience load_wordnet function."""
-        wn = load_wordnet(temp_data_dir)
+        wn = load_wordnet(temp_data_file)
 
         assert isinstance(wn, WordNetLoader)
         assert wn._loaded is True
diff --git a/tests/test_wordnet/test_morphy.py b/tests/test_wordnet/test_morphy.py
index 97efaa8..f9bfa33 100644
--- a/tests/test_wordnet/test_morphy.py
+++ b/tests/test_wordnet/test_morphy.py
@@ -10,6 +10,26 @@
 from glazing.wordnet.morphy import Morphy, morphy
 
 
+def _write_wordnet_files(
+    data_path: Path, synsets: list[dict], exceptions: list[dict] | None = None
+) -> Path:
+    """Helper to write synsets and exceptions in the single-file format.
+
+    Returns the path to the primary wordnet.jsonl file.
+    """
+    wordnet_file = data_path / "wordnet.jsonl"
+    with open(wordnet_file, "w") as f:
+        for synset in synsets:
+            f.write(json.dumps(synset) + "\n")
+
+    if exceptions:
+        with open(data_path / "wordnet_exceptions.jsonl", "w") as f:
+            for exc in exceptions:
+                f.write(json.dumps(exc) + "\n")
+
+    return wordnet_file
+
+
 class TestMorphy:
     """Test WordNet morphological processing."""
 
@@ -19,8 +39,9 @@ def temp_data_with_lemmas(self):
         with tempfile.TemporaryDirectory() as tmpdir:
             data_path = Path(tmpdir)
 
-            # Create noun synsets with various lemmas
-            noun_synsets = [
+            # All synsets in a single file
+            all_synsets = [
+                # Noun synsets
                 {
                     "offset": "02084442",
                     "lex_filenum": 5,
@@ -60,14 +81,7 @@ def temp_data_with_lemmas(self):
                     "pointers": [],
                     "gloss": "two-winged insects",
                 },
-            ]
-
-            with open(data_path / "data.noun.jsonl", "w") as f:
-                for synset in noun_synsets:
-                    f.write(json.dumps(synset) + "\n")
-
-            # Create verb synsets
-            verb_synsets = [
+                # Verb synsets
                 {
                     "offset": "01926311",
                     "lex_filenum": 38,
@@ -108,14 +122,7 @@ def temp_data_with_lemmas(self):
                     "frames": [],
                     "gloss": "look attentively",
                 },
-            ]
-
-            with open(data_path / "data.verb.jsonl", "w") as f:
-                for synset in verb_synsets:
-                    f.write(json.dumps(synset) + "\n")
-
-            # Create adjective synsets
-            adj_synsets = [
+                # Adjective synsets
                 {
                     "offset": "00001740",
                     "lex_filenum": 0,
@@ -145,201 +152,33 @@ def temp_data_with_lemmas(self):
                 },
             ]
 
-            with open(data_path / "data.adj.jsonl", "w") as f:
-                for synset in adj_synsets:
-                    f.write(json.dumps(synset) + "\n")
-
-            # Create noun index
-            noun_index = [
-                {
-                    "lemma": "dog",
-                    "pos": "n",
-                    "synset_cnt": 1,
-                    "p_cnt": 0,
-                    "ptr_symbols": [],
-                    "sense_cnt": 1,
-                    "tagsense_cnt": 0,
-                    "synset_offsets": ["02084442"],
-                },
-                {
-                    "lemma": "child",
-                    "pos": "n",
-                    "synset_cnt": 1,
-                    "p_cnt": 0,
-                    "ptr_symbols": [],
-                    "sense_cnt": 1,
-                    "tagsense_cnt": 0,
-                    "synset_offsets": ["09917593"],
-                },
-                {
-                    "lemma": "box",
-                    "pos": "n",
-                    "synset_cnt": 1,
-                    "p_cnt": 0,
-                    "ptr_symbols": [],
-                    "sense_cnt": 1,
-                    "tagsense_cnt": 0,
-                    "synset_offsets": ["02866578"],
-                },
-                {
-                    "lemma": "fly",
-                    "pos": "n",
-                    "synset_cnt": 1,
-                    "p_cnt": 0,
-                    "ptr_symbols": [],
-                    "sense_cnt": 1,
-                    "tagsense_cnt": 0,
-                    "synset_offsets": ["01930374"],
-                },
-            ]
-
-            with open(data_path / "index.noun.jsonl", "w") as f:
-                for entry in noun_index:
-                    f.write(json.dumps(entry) + "\n")
-
-            # Create verb index
-            verb_index = [
-                {
-                    "lemma": "run",
-                    "pos": "v",
-                    "synset_cnt": 1,
-                    "p_cnt": 0,
-                    "ptr_symbols": [],
-                    "sense_cnt": 1,
-                    "tagsense_cnt": 0,
-                    "synset_offsets": ["01926311"],
-                },
-                {
-                    "lemma": "fly",
-                    "pos": "v",
-                    "synset_cnt": 1,
-                    "p_cnt": 0,
-                    "ptr_symbols": [],
-                    "sense_cnt": 1,
-                    "tagsense_cnt": 0,
-                    "synset_offsets": ["01835496"],
-                },
-                {
-                    "lemma": "be",
-                    "pos": "v",
-                    "synset_cnt": 1,
-                    "p_cnt": 0,
-                    "ptr_symbols": [],
-                    "sense_cnt": 1,
-                    "tagsense_cnt": 0,
-                    "synset_offsets": ["00010435"],
-                },
-                {
-                    "lemma": "watch",
-                    "pos": "v",
-                    "synset_cnt": 1,
-                    "p_cnt": 0,
-                    "ptr_symbols": [],
-                    "sense_cnt": 1,
-                    "tagsense_cnt": 0,
-                    "synset_offsets": ["00654625"],
-                },
-            ]
-
-            with open(data_path / "index.verb.jsonl", "w") as f:
-                for entry in verb_index:
-                    f.write(json.dumps(entry) + "\n")
-
-            # Create adjective index
-            adj_index = [
-                {
-                    "lemma": "big",
-                    "pos": "a",
-                    "synset_cnt": 1,
-                    "p_cnt": 0,
-                    "ptr_symbols": [],
-                    "sense_cnt": 1,
-                    "tagsense_cnt": 0,
-                    "synset_offsets": ["00001740"],
-                },
-                {
-                    "lemma": "nice",
-                    "pos": "a",
-                    "synset_cnt": 1,
-                    "p_cnt": 0,
-                    "ptr_symbols": [],
-                    "sense_cnt": 1,
-                    "tagsense_cnt": 0,
-                    "synset_offsets": ["00001741"],
-                },
-                {
-                    "lemma": "good",
-                    "pos": "a",
-                    "synset_cnt": 1,
-                    "p_cnt": 0,
-                    "ptr_symbols": [],
-                    "sense_cnt": 1,
-                    "tagsense_cnt": 0,
-                    "synset_offsets": ["00001742"],
-                },
-                {
-                    "lemma": "well",
-                    "pos": "a",
-                    "synset_cnt": 1,
-                    "p_cnt": 0,
-                    "ptr_symbols": [],
-                    "sense_cnt": 1,
-                    "tagsense_cnt": 0,
-                    "synset_offsets": ["00001742"],
-                },
-            ]
-
-            with open(data_path / "index.adj.jsonl", "w") as f:
-                for entry in adj_index:
-                    f.write(json.dumps(entry) + "\n")
-
-            # Create noun exceptions
-            noun_exc = [
-                {"inflected_form": "children", "base_forms": ["child"]},
-                {"inflected_form": "geese", "base_forms": ["goose"]},
-                {"inflected_form": "men", "base_forms": ["man"]},
-                {"inflected_form": "women", "base_forms": ["woman"]},
-                {"inflected_form": "teeth", "base_forms": ["tooth"]},
-                {"inflected_form": "feet", "base_forms": ["foot"]},
-                {"inflected_form": "mice", "base_forms": ["mouse"]},
-            ]
-
-            with open(data_path / "noun.exc.jsonl", "w") as f:
-                for exc in noun_exc:
-                    f.write(json.dumps(exc) + "\n")
-
-            # Create verb exceptions
-            verb_exc = [
-                {"inflected_form": "ran", "base_forms": ["run"]},
-                {"inflected_form": "went", "base_forms": ["go"]},
-                {"inflected_form": "was", "base_forms": ["be"]},
-                {"inflected_form": "were", "base_forms": ["be"]},
-                {"inflected_form": "been", "base_forms": ["be"]},
-                {"inflected_form": "flew", "base_forms": ["fly"]},
-                {"inflected_form": "flown", "base_forms": ["fly"]},
+            # All exceptions in a single file with pos field
+            all_exceptions = [
+                # Noun exceptions
+                {"inflected_form": "children", "base_forms": ["child"], "pos": "n"},
+                {"inflected_form": "geese", "base_forms": ["goose"], "pos": "n"},
+                {"inflected_form": "men", "base_forms": ["man"], "pos": "n"},
+                {"inflected_form": "women", "base_forms": ["woman"], "pos": "n"},
+                {"inflected_form": "teeth", "base_forms": ["tooth"], "pos": "n"},
+                {"inflected_form": "feet", "base_forms": ["foot"], "pos": "n"},
+                {"inflected_form": "mice", "base_forms": ["mouse"], "pos": "n"},
+                # Verb exceptions
+                {"inflected_form": "ran", "base_forms": ["run"], "pos": "v"},
+                {"inflected_form": "went", "base_forms": ["go"], "pos": "v"},
+                {"inflected_form": "was", "base_forms": ["be"], "pos": "v"},
+                {"inflected_form": "were", "base_forms": ["be"], "pos": "v"},
+                {"inflected_form": "been", "base_forms": ["be"], "pos": "v"},
+                {"inflected_form": "flew", "base_forms": ["fly"], "pos": "v"},
+                {"inflected_form": "flown", "base_forms": ["fly"], "pos": "v"},
+                # Adjective exceptions
+                {"inflected_form": "better", "base_forms": ["good", "well"], "pos": "a"},
+                {"inflected_form": "best", "base_forms": ["good", "well"], "pos": "a"},
+                {"inflected_form": "worse", "base_forms": ["bad"], "pos": "a"},
+                {"inflected_form": "worst", "base_forms": ["bad"], "pos": "a"},
             ]
 
-            with open(data_path / "verb.exc.jsonl", "w") as f:
-                for exc in verb_exc:
-                    f.write(json.dumps(exc) + "\n")
-
-            # Create adjective exceptions
-            adj_exc = [
-                {"inflected_form": "better", "base_forms": ["good", "well"]},
-                {"inflected_form": "best", "base_forms": ["good", "well"]},
-                {"inflected_form": "worse", "base_forms": ["bad"]},
-                {"inflected_form": "worst", "base_forms": ["bad"]},
-            ]
-
-            with open(data_path / "adj.exc.jsonl", "w") as f:
-                for exc in adj_exc:
-                    f.write(json.dumps(exc) + "\n")
-
-            # Create empty sense index (required but not used in tests)
-            with open(data_path / "index.sense.jsonl", "w") as f:
-                pass
-
-            yield data_path
+            wordnet_file = _write_wordnet_files(data_path, all_synsets, all_exceptions)
+            yield wordnet_file
 
     @pytest.fixture
     def loader_with_data(self, temp_data_with_lemmas):
@@ -581,12 +420,10 @@ def test_period_removal(self, loader_with_data):
 
     def test_ful_suffix_handling(self):
         """Test special handling of nouns ending with 'ful'."""
-        # Create test data with "box" and "boxful"
         with tempfile.TemporaryDirectory() as tmpdir:
             data_path = Path(tmpdir)
 
-            # Create noun synsets
-            noun_synsets = [
+            synsets = [
                 {
                     "offset": "02883344",
                     "lex_filenum": 6,
@@ -607,44 +444,8 @@ def test_ful_suffix_handling(self):
                 },
             ]
 
-            with open(data_path / "data.noun.jsonl", "w") as f:
-                for synset in noun_synsets:
-                    f.write(json.dumps(synset) + "\n")
-
-            # Create index
-            noun_index = [
-                {
-                    "lemma": "box",
-                    "pos": "n",
-                    "synset_cnt": 1,
-                    "p_cnt": 0,
-                    "ptr_symbols": [],
-                    "sense_cnt": 1,
-                    "tagsense_cnt": 0,
-                    "synset_offsets": ["02883344"],
-                },
-                {
-                    "lemma": "boxful",
-                    "pos": "n",
-                    "synset_cnt": 1,
-                    "p_cnt": 0,
-                    "ptr_symbols": [],
-                    "sense_cnt": 1,
-                    "tagsense_cnt": 0,
-                    "synset_offsets": ["13767879"],
-                },
-            ]
-
-            with open(data_path / "index.noun.jsonl", "w") as f:
-                for entry in noun_index:
-                    f.write(json.dumps(entry) + "\n")
-
-            # Create empty files
-            with open(data_path / "index.sense.jsonl", "w") as f:
-                pass
-
-            # Load and test
-            loader = WordNetLoader(data_path)
+            wordnet_file = _write_wordnet_files(data_path, synsets)
+            loader = WordNetLoader(wordnet_file)
             loader.load()
             processor = Morphy(loader)
 
@@ -654,12 +455,10 @@ def test_ful_suffix_handling(self):
 
     def test_collocation_simple(self):
         """Test simple multi-word expressions."""
-        # Create test data with "attorney_general"
         with tempfile.TemporaryDirectory() as tmpdir:
             data_path = Path(tmpdir)
 
-            # Create noun synset with multi-word expression
-            noun_synsets = [
+            synsets = [
                 {
                     "offset": "09780632",
                     "lex_filenum": 15,
@@ -689,54 +488,8 @@ def test_collocation_simple(self):
                 },
             ]
 
-            with open(data_path / "data.noun.jsonl", "w") as f:
-                for synset in noun_synsets:
-                    f.write(json.dumps(synset) + "\n")
-
-            # Create index
-            noun_index = [
-                {
-                    "lemma": "attorney",
-                    "pos": "n",
-                    "synset_cnt": 1,
-                    "p_cnt": 0,
-                    "ptr_symbols": [],
-                    "sense_cnt": 1,
-                    "tagsense_cnt": 0,
-                    "synset_offsets": ["09780632"],
-                },
-                {
-                    "lemma": "general",
-                    "pos": "n",
-                    "synset_cnt": 1,
-                    "p_cnt": 0,
-                    "ptr_symbols": [],
-                    "sense_cnt": 1,
-                    "tagsense_cnt": 0,
-                    "synset_offsets": ["10260706"],
-                },
-                {
-                    "lemma": "attorney_general",
-                    "pos": "n",
-                    "synset_cnt": 1,
-                    "p_cnt": 0,
-                    "ptr_symbols": [],
-                    "sense_cnt": 1,
-                    "tagsense_cnt": 0,
-                    "synset_offsets": ["09781263"],
-                },
-            ]
-
-            with open(data_path / "index.noun.jsonl", "w") as f:
-                for entry in noun_index:
-                    f.write(json.dumps(entry) + "\n")
-
-            # Create empty files
-            with open(data_path / "index.sense.jsonl", "w") as f:
-                pass
-
-            # Load and test
-            loader = WordNetLoader(data_path)
+            wordnet_file = _write_wordnet_files(data_path, synsets)
+            loader = WordNetLoader(wordnet_file)
             loader.load()
             processor = Morphy(loader)
 
@@ -746,12 +499,10 @@ def test_collocation_simple(self):
 
     def test_hyphenated_words(self):
         """Test hyphenated multi-word expressions."""
-        # Create test data
         with tempfile.TemporaryDirectory() as tmpdir:
             data_path = Path(tmpdir)
 
-            # Create noun synsets
-            noun_synsets = [
+            synsets = [
                 {
                     "offset": "10639637",
                     "lex_filenum": 15,
@@ -772,44 +523,8 @@ def test_hyphenated_words(self):
                 },
             ]
 
-            with open(data_path / "data.noun.jsonl", "w") as f:
-                for synset in noun_synsets:
-                    f.write(json.dumps(synset) + "\n")
-
-            # Create index
-            noun_index = [
-                {
-                    "lemma": "son",
-                    "pos": "n",
-                    "synset_cnt": 1,
-                    "p_cnt": 0,
-                    "ptr_symbols": [],
-                    "sense_cnt": 1,
-                    "tagsense_cnt": 0,
-                    "synset_offsets": ["10639637"],
-                },
-                {
-                    "lemma": "son_in_law",
-                    "pos": "n",
-                    "synset_cnt": 1,
-                    "p_cnt": 0,
-                    "ptr_symbols": [],
-                    "sense_cnt": 1,
-                    "tagsense_cnt": 0,
-                    "synset_offsets": ["10105733"],
-                },
-            ]
-
-            with open(data_path / "index.noun.jsonl", "w") as f:
-                for entry in noun_index:
-                    f.write(json.dumps(entry) + "\n")
-
-            # Create empty files
-            with open(data_path / "index.sense.jsonl", "w") as f:
-                pass
-
-            # Load and test
-            loader = WordNetLoader(data_path)
+            wordnet_file = _write_wordnet_files(data_path, synsets)
+            loader = WordNetLoader(wordnet_file)
             loader.load()
             processor = Morphy(loader)
 

From b5cc8553174e76be29f5803fb84a784b6efc9f96 Mon Sep 17 00:00:00 2001
From: Aaron Steven White <aaronstevenwhite@gmail.com>
Date: Thu, 5 Feb 2026 19:34:45 -0500
Subject: [PATCH 02/11] Adds frame relation, LU enrichment, semantic type, and
 fulltext parsing to FrameNet converter and loader.

---
 pyproject.toml                    |   3 +-
 src/glazing/framenet/converter.py | 655 ++++++++++++++++++++++++++++++
 src/glazing/framenet/loader.py    |  45 +-
 3 files changed, 699 insertions(+), 4 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 4b07a5b..ab89ebd 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -125,6 +125,7 @@ ignore = [
 "tests/test_verbnet/test_converter.py" = ["E501", "S314"]
 "tests/test_verbnet/test_inheritance.py" = ["E501"]
 "tests/test_wordnet/test_converter.py" = ["E501"]
+"tests/test_integration/test_converter_loader_roundtrip.py" = ["E501"]
 "tests/test_cli/test_download.py" = ["E501", "EM102"]
 "tests/test_downloader.py" = ["E501", "F841"]
 "tests/test_verbnet/test_downloader.py" = ["E501", "F841"]
@@ -136,7 +137,7 @@ ignore = [
 "src/glazing/wordnet/converter.py" = ["C901", "PLR0912", "PLR0915"]
 "src/glazing/wordnet/loader.py" = ["C901", "PLR0912"]
 "src/glazing/verbnet/converter.py" = ["C901", "S320"]
-"src/glazing/framenet/converter.py" = ["S320"]
+"src/glazing/framenet/converter.py" = ["S320", "C901", "PLR0912", "PLR0915"]
 "src/glazing/propbank/converter.py" = ["S320"]
 
 [tool.ruff.lint.pydocstyle]
diff --git a/src/glazing/framenet/converter.py b/src/glazing/framenet/converter.py
index 60081cd..a3054d5 100644
--- a/src/glazing/framenet/converter.py
+++ b/src/glazing/framenet/converter.py
@@ -38,22 +38,54 @@
 import html
 from datetime import UTC, datetime
 from pathlib import Path
+from typing import get_args
 
 from lxml import etree
 
 from glazing.framenet.models import (
     AnnotatedText,
+    AnnotationLayer,
+    AnnotationSet,
+    FERealization,
+    FERelation,
     Frame,
     FrameElement,
+    FrameRelation,
+    Label,
     Lexeme,
     LexicalUnit,
+    SemanticType,
+    SemTypeRef,
+    Sentence,
     SentenceCount,
+    ValenceAnnotationPattern,
+    ValencePattern,
+    ValenceRealizationPattern,
+    ValenceUnit,
+)
+from glazing.framenet.types import (
+    AnnotationStatus,
+    LayerType,
 )
 from glazing.utils.xml_parser import (
     parse_attributes,
     parse_with_schema,
 )
 
+# Map from frRelation.xml relation type names to (sub_type, super_type) pairs.
+# sub_type is the relation from the sub-frame's perspective;
+# super_type is the relation from the super-frame's perspective (None if one-directional).
+FRAME_RELATION_TYPE_MAP: dict[str, tuple[str, str | None]] = {
+    "Inheritance": ("Inherits from", "Is Inherited by"),
+    "Using": ("Uses", "Is Used by"),
+    "Subframe": ("Subframe of", "Has Subframe(s)"),
+    "Precedes": ("Precedes", "Is Preceded by"),
+    "Perspective_on": ("Perspective on", "Is Perspectivized in"),
+    "Causative_of": ("Is Causative of", None),
+    "Inchoative_of": ("Is Inchoative of", None),
+    "See_also": ("See also", "See also"),
+}
+
 
 class FrameNetConverter:
     """Convert FrameNet XML files to JSON Lines format.
@@ -80,6 +112,14 @@ class FrameNetConverter:
         Convert a lexical unit XML file to LexicalUnit model.
     convert_frames_directory(input_dir, output_file)
         Convert all frames in a directory to JSON Lines.
+    convert_frame_relations_file(filepath)
+        Convert frRelation.xml to frame relation mappings.
+    convert_semtypes_file(filepath, output_file)
+        Convert semTypes.xml to JSON Lines.
+    convert_fulltext_file(filepath)
+        Convert a fulltext XML file to Sentence models.
+    convert_fulltext_directory(input_dir, output_file)
+        Convert all fulltext files in a directory to JSON Lines.
     """
 
     def __init__(
@@ -99,6 +139,22 @@ def __init__(
         self.namespace = namespace
         self.ns = {"fn": namespace} if namespace else {}
         self.validate_schema = validate_schema
+        self._ns_prefix = f"{{{namespace}}}" if namespace else ""
+
+    def _tag(self, local_name: str) -> str:
+        """Build a namespace-qualified tag name.
+
+        Parameters
+        ----------
+        local_name : str
+            The local element name.
+
+        Returns
+        -------
+        str
+            Namespace-qualified tag name.
+        """
+        return f"{self._ns_prefix}{local_name}"
 
     def _parse_definition(self, element: etree._Element | None) -> AnnotatedText:
         """Parse a definition element with embedded markup.
@@ -499,6 +555,574 @@ def convert_lu_index_file(self, filepath: Path | str) -> list[LexicalUnit]:
 
         return lexical_units
 
+    def convert_frame_relations_file(self, filepath: Path | str) -> dict[int, list[FrameRelation]]:
+        """Convert frRelation.xml to frame relation mappings.
+
+        Parses the frame relation types and individual frame relations,
+        creating FrameRelation objects grouped by frame ID.
+
+        Parameters
+        ----------
+        filepath : Path | str
+            Path to frRelation.xml file.
+
+        Returns
+        -------
+        dict[int, list[FrameRelation]]
+            Dictionary mapping frame IDs to their FrameRelation objects.
+
+        Examples
+        --------
+        >>> converter = FrameNetConverter()
+        >>> relations = converter.convert_frame_relations_file("frRelation.xml")
+        >>> print(f"Found relations for {len(relations)} frames")
+        """
+        filepath = Path(filepath)
+
+        tree = etree.parse(str(filepath))
+        root = tree.getroot()
+
+        relations_by_frame: dict[int, list[FrameRelation]] = {}
+
+        for rel_type_elem in root.findall(self._tag("frameRelationType")):
+            type_name = rel_type_elem.get("name", "")
+
+            if type_name not in FRAME_RELATION_TYPE_MAP:
+                continue
+
+            sub_type, super_type = FRAME_RELATION_TYPE_MAP[type_name]
+
+            for fr_elem in rel_type_elem.findall(self._tag("frameRelation")):
+                sub_frame_id = int(fr_elem.get("subID", "0"))
+                sup_frame_id = int(fr_elem.get("supID", "0"))
+                sub_frame_name = fr_elem.get("subFrameName", "")
+                super_frame_name = fr_elem.get("superFrameName", "")
+                relation_id = int(fr_elem.get("ID", "0"))
+
+                # Parse FE relations
+                fe_relations: list[FERelation] = []
+                for fe_rel_elem in fr_elem.findall(self._tag("FERelation")):
+                    try:
+                        fe_rel = FERelation(  # type: ignore[call-arg]
+                            sub_fe_id=int(fe_rel_elem.get("subID", "0")),
+                            sub_fe_name=fe_rel_elem.get("subFEName"),
+                            super_fe_id=int(fe_rel_elem.get("supID", "0")),
+                            super_fe_name=fe_rel_elem.get("superFEName"),
+                        )
+                        fe_relations.append(fe_rel)
+                    except (ValueError, TypeError):
+                        continue
+
+                # Create FrameRelation for the sub-frame's perspective
+                try:
+                    sub_relation = FrameRelation(
+                        id=relation_id,
+                        type=sub_type,  # type: ignore[arg-type]
+                        sub_frame_id=sub_frame_id,
+                        sub_frame_name=sub_frame_name,
+                        super_frame_id=sup_frame_id,
+                        super_frame_name=super_frame_name,
+                        fe_relations=fe_relations,
+                    )
+                    relations_by_frame.setdefault(sub_frame_id, []).append(sub_relation)
+                except (ValueError, TypeError):
+                    pass
+
+                # Create FrameRelation for the super-frame's perspective (if applicable)
+                if super_type is not None:
+                    try:
+                        super_relation = FrameRelation(
+                            id=relation_id,
+                            type=super_type,  # type: ignore[arg-type]
+                            sub_frame_id=sub_frame_id,
+                            sub_frame_name=sub_frame_name,
+                            super_frame_id=sup_frame_id,
+                            super_frame_name=super_frame_name,
+                            fe_relations=fe_relations,
+                        )
+                        relations_by_frame.setdefault(sup_frame_id, []).append(super_relation)
+                    except (ValueError, TypeError):
+                        pass
+
+        return relations_by_frame
+
+    def convert_lu_file(
+        self, filepath: Path | str
+    ) -> tuple[list[ValencePattern], list[SemTypeRef], list[AnnotationSet]]:
+        """Convert an individual lu/*.xml file to extract valence patterns and semtypes.
+
+        Parses valence patterns (FE realizations and their syntactic patterns),
+        semantic type references, and annotation sets from a lexical unit file.
+
+        Parameters
+        ----------
+        filepath : Path | str
+            Path to individual lu XML file (e.g., lu/lu10.xml).
+
+        Returns
+        -------
+        tuple[list[ValencePattern], list[SemTypeRef], list[AnnotationSet]]
+            Tuple of (valence_patterns, semtypes, annotation_sets).
+
+        Examples
+        --------
+        >>> converter = FrameNetConverter()
+        >>> patterns, semtypes, annosets = converter.convert_lu_file("lu/lu10.xml")
+        >>> print(f"Found {len(patterns)} valence patterns")
+        """
+        filepath = Path(filepath)
+
+        tree = etree.parse(str(filepath))
+        root = tree.getroot()
+
+        # Parse semantic types (direct children of root)
+        semtypes: list[SemTypeRef] = []
+        for semtype_elem in root.findall(self._tag("semType")):
+            st_name = semtype_elem.get("name")
+            st_id = semtype_elem.get("ID")
+            if st_name and st_id:
+                try:
+                    semtypes.append(SemTypeRef(name=st_name, id=int(st_id)))
+                except (ValueError, TypeError):
+                    continue
+
+        # Parse valence patterns from <valences> element
+        valence_patterns: list[ValencePattern] = []
+        valences_elem = root.find(self._tag("valences"))
+        if valences_elem is not None:
+            # Parse FE realizations
+            fe_realizations: list[FERealization] = []
+            for fe_real_elem in valences_elem.findall(self._tag("FERealization")):
+                fe_real_total = int(fe_real_elem.get("total", "0"))
+
+                # Get FE name from child <FE> element
+                fe_child = fe_real_elem.find(self._tag("FE"))
+                fe_name = fe_child.get("name", "") if fe_child is not None else ""
+
+                if not fe_name:
+                    continue
+
+                # Parse patterns within this FE realization
+                patterns: list[ValenceRealizationPattern] = []
+                for pattern_elem in fe_real_elem.findall(self._tag("pattern")):
+                    pattern_total = int(pattern_elem.get("total", "0"))
+
+                    # Parse valence units
+                    valence_units: list[ValenceUnit] = []
+                    for vu_elem in pattern_elem.findall(self._tag("valenceUnit")):
+                        try:
+                            vu = ValenceUnit(
+                                GF=vu_elem.get("GF", ""),
+                                PT=vu_elem.get("PT", ""),
+                                FE=vu_elem.get("FE", ""),
+                            )
+                            valence_units.append(vu)
+                        except (ValueError, TypeError):
+                            continue
+
+                    # Parse annotation set IDs
+                    anno_set_ids: list[int] = []
+                    for anno_elem in pattern_elem.findall(self._tag("annoSet")):
+                        anno_id = anno_elem.get("ID")
+                        if anno_id:
+                            anno_set_ids.append(int(anno_id))
+
+                    if valence_units and pattern_total > 0:
+                        try:
+                            patterns.append(
+                                ValenceRealizationPattern(
+                                    valence_units=valence_units,
+                                    anno_set_ids=anno_set_ids,
+                                    total=pattern_total,
+                                )
+                            )
+                        except (ValueError, TypeError):
+                            continue
+
+                try:
+                    fe_realizations.append(
+                        FERealization(
+                            fe_name=fe_name,
+                            total=fe_real_total,
+                            patterns=patterns,
+                        )
+                    )
+                except (ValueError, TypeError):
+                    continue
+
+            # Build a single ValencePattern if we have FE realizations
+            if fe_realizations:
+                # Compute total annotated from the root <valences> or LU attributes
+                total_annotated = int(root.get("totalAnnotated", "0"))
+
+                # Parse FEGroupRealization / ValenceAnnotationPattern entries
+                valence_anno_patterns: list[ValenceAnnotationPattern] = []
+                # These come from <FEGroupRealization> elements in the valences section
+                # (not all LU files have these)
+
+                valence_patterns.append(
+                    ValencePattern(
+                        total_annotated=total_annotated,
+                        fe_realizations=fe_realizations,
+                        patterns=valence_anno_patterns,
+                    )
+                )
+
+        # Parse annotation sets (from <subCorpus> sections)
+        annotation_sets: list[AnnotationSet] = []
+        # Annotation sets in lu files are nested inside subCorpus > sentence > annotationSet
+        # We collect them but don't return full sentences here
+        for subcorpus_elem in root.findall(self._tag("subCorpus")):
+            for sentence_elem in subcorpus_elem.findall(self._tag("sentence")):
+                sent_id = int(sentence_elem.get("ID", "0"))
+                for annoset_elem in sentence_elem.findall(self._tag("annotationSet")):
+                    try:
+                        annoset = self._parse_annotation_set(annoset_elem, sent_id)
+                        if annoset is not None:
+                            annotation_sets.append(annoset)
+                    except (ValueError, TypeError):
+                        continue
+
+        return valence_patterns, semtypes, annotation_sets
+
+    def convert_semtypes_file(self, filepath: Path | str, output_file: Path | str) -> int:
+        """Convert semTypes.xml to JSON Lines format.
+
+        Parses the semantic type hierarchy and writes each type as a JSON line.
+
+        Parameters
+        ----------
+        filepath : Path | str
+            Path to semTypes.xml file.
+        output_file : Path | str
+            Output JSON Lines file path.
+
+        Returns
+        -------
+        int
+            Number of semantic types converted.
+
+        Examples
+        --------
+        >>> converter = FrameNetConverter()
+        >>> count = converter.convert_semtypes_file("semTypes.xml", "semtypes.jsonl")
+        >>> print(f"Converted {count} semantic types")
+        """
+        filepath = Path(filepath)
+        output_file = Path(output_file)
+
+        tree = etree.parse(str(filepath))
+        root = tree.getroot()
+
+        # semTypes.xml uses the FrameNet namespace
+        semtype_tag = self._tag("semType")
+        definition_tag = self._tag("definition")
+        supertype_tag = self._tag("superType")
+
+        semantic_types: list[SemanticType] = []
+
+        for st_elem in root.findall(semtype_tag):
+            st_id = st_elem.get("ID")
+            st_name = st_elem.get("name", "")
+            st_abbrev = st_elem.get("abbrev", "")
+
+            if not st_id or not st_name:
+                continue
+
+            # Parse definition
+            def_elem = st_elem.find(definition_tag)
+            definition_text = ""
+            if def_elem is not None and def_elem.text:
+                definition_text = def_elem.text.strip()
+            if not definition_text:
+                definition_text = f"Semantic type: {st_name}"
+
+            # Parse super type
+            super_type_id = None
+            super_type_name = None
+            sup_elem = st_elem.find(supertype_tag)
+            if sup_elem is not None:
+                sup_id = sup_elem.get("supID")
+                sup_name = sup_elem.get("superTypeName")
+                if sup_id:
+                    super_type_id = int(sup_id)
+                    super_type_name = sup_name
+
+            try:
+                sem_type = SemanticType(
+                    id=int(st_id),
+                    name=st_name,
+                    abbrev=st_abbrev if st_abbrev else st_name,
+                    definition=definition_text,
+                    super_type_id=super_type_id,
+                    super_type_name=super_type_name,
+                    root_type_id=None,
+                    root_type_name=None,
+                )
+                semantic_types.append(sem_type)
+            except (ValueError, TypeError) as e:
+                print(f"Warning: Failed to parse semantic type '{st_name}': {e}")
+                continue
+
+        # Write to output file
+        count = 0
+        with output_file.open("w", encoding="utf-8") as f:
+            for sem_type in semantic_types:
+                json_line = sem_type.model_dump_json(exclude_none=True)
+                f.write(json_line + "\n")
+                count += 1
+
+        return count
+
+    def _parse_annotation_set(
+        self, annoset_elem: etree._Element, sentence_id: int
+    ) -> AnnotationSet | None:
+        """Parse an annotation set element.
+
+        Parameters
+        ----------
+        annoset_elem : etree._Element
+            The annotationSet XML element.
+        sentence_id : int
+            ID of the containing sentence.
+
+        Returns
+        -------
+        AnnotationSet | None
+            Parsed annotation set, or None if invalid.
+        """
+        anno_id = annoset_elem.get("ID")
+        status = annoset_elem.get("status", "")
+
+        if not anno_id:
+            return None
+
+        # Validate status against allowed values
+        valid_statuses = get_args(AnnotationStatus.__value__)
+        if status not in valid_statuses:
+            return None
+
+        # Parse created_by and created_date
+        cby = annoset_elem.get("cBy")
+        cdate_str = annoset_elem.get("cDate")
+        cdate = self._parse_datetime(cdate_str) if cdate_str else None
+
+        # Parse layers
+        layers: list[AnnotationLayer] = []
+        valid_layer_types = get_args(LayerType.__value__)
+
+        for layer_elem in annoset_elem.findall(self._tag("layer")):
+            layer_name = layer_elem.get("name", "")
+            layer_rank = int(layer_elem.get("rank", "1"))
+
+            if layer_name not in valid_layer_types:
+                continue
+
+            # Parse labels
+            labels: list[Label] = []
+            for label_elem in layer_elem.findall(self._tag("label")):
+                label_name = label_elem.get("name", "")
+                if not label_name:
+                    continue
+
+                start_str = label_elem.get("start")
+                end_str = label_elem.get("end")
+                itype = label_elem.get("itype")
+                label_id_str = label_elem.get("ID")
+                fe_id_str = label_elem.get("feID")
+
+                # Handle null instantiation labels (no start/end attributes)
+                if itype and (start_str is None or end_str is None):
+                    # Null instantiation: set start=0, end=0
+                    start_val = 0
+                    end_val = 0
+                    is_null = True
+                elif start_str is not None and end_str is not None:
+                    start_val = int(start_str)
+                    end_val = int(end_str)
+                    is_null = bool(itype)
+                else:
+                    # Labels without start/end and without itype - skip
+                    continue
+
+                # Validate positions
+                if start_val < 0 or end_val < start_val:
+                    if is_null:
+                        start_val = 0
+                        end_val = 0
+                    else:
+                        continue
+
+                try:
+                    label = Label(
+                        id=int(label_id_str) if label_id_str else None,
+                        name=label_name,
+                        start=start_val,
+                        end=end_val,
+                        fe_id=int(fe_id_str) if fe_id_str else None,
+                        itype=is_null,
+                    )
+                    labels.append(label)
+                except (ValueError, TypeError):
+                    continue
+
+            try:
+                layers.append(
+                    AnnotationLayer(
+                        name=layer_name,  # type: ignore[arg-type]
+                        rank=layer_rank,
+                        labels=labels,
+                    )
+                )
+            except (ValueError, TypeError):
+                continue
+
+        try:
+            return AnnotationSet(
+                id=int(anno_id),
+                status=status,  # type: ignore[arg-type]
+                sentence_id=sentence_id,
+                layers=layers,
+                cBy=cby,
+                cDate=cdate,
+            )
+        except (ValueError, TypeError):
+            return None
+
+    def convert_fulltext_file(self, filepath: Path | str) -> list[Sentence]:
+        """Convert a fulltext/*.xml file to Sentence models.
+
+        Parses annotated corpus sentences with their annotation sets,
+        layers, and labels.
+
+        Parameters
+        ----------
+        filepath : Path | str
+            Path to fulltext XML file.
+
+        Returns
+        -------
+        list[Sentence]
+            List of parsed Sentence models.
+
+        Examples
+        --------
+        >>> converter = FrameNetConverter()
+        >>> sentences = converter.convert_fulltext_file("fulltext/ANC__110CYL067.xml")
+        >>> print(f"Found {len(sentences)} sentences")
+        """
+        filepath = Path(filepath)
+
+        tree = etree.parse(str(filepath))
+        root = tree.getroot()
+
+        sentences: list[Sentence] = []
+
+        for sent_elem in root.findall(self._tag("sentence")):
+            sent_id_str = sent_elem.get("ID")
+            if not sent_id_str:
+                continue
+            sent_id = int(sent_id_str)
+
+            # Get sentence text
+            text_elem = sent_elem.find(self._tag("text"))
+            if text_elem is None or not text_elem.text:
+                continue
+            text = text_elem.text
+
+            # Get sentence metadata
+            parag_no_str = sent_elem.get("paragNo")
+            sent_no_str = sent_elem.get("sentNo")
+            corp_id_str = sent_elem.get("corpID")
+            doc_id_str = sent_elem.get("docID")
+            apos_str = sent_elem.get("aPos")
+
+            # Parse annotation sets
+            annotation_sets: list[AnnotationSet] = []
+            for annoset_elem in sent_elem.findall(self._tag("annotationSet")):
+                try:
+                    annoset = self._parse_annotation_set(annoset_elem, sent_id)
+                    if annoset is not None:
+                        annotation_sets.append(annoset)
+                except (ValueError, TypeError):
+                    continue
+
+            try:
+                sentence = Sentence(
+                    id=sent_id,
+                    text=text,
+                    paragNo=int(parag_no_str) if parag_no_str else None,
+                    sentNo=int(sent_no_str) if sent_no_str else None,
+                    corpID=int(corp_id_str) if corp_id_str else None,
+                    docID=int(doc_id_str) if doc_id_str else None,
+                    apos=int(apos_str) if apos_str else None,
+                    annotation_sets=annotation_sets,
+                )
+                sentences.append(sentence)
+            except (ValueError, TypeError) as e:
+                print(f"Warning: Failed to parse sentence {sent_id}: {e}")
+                continue
+
+        return sentences
+
+    def convert_fulltext_directory(
+        self,
+        input_dir: Path | str,
+        output_file: Path | str,
+        pattern: str = "*.xml",
+    ) -> int:
+        """Convert all fulltext files in a directory to JSON Lines.
+
+        Parameters
+        ----------
+        input_dir : Path | str
+            Directory containing fulltext XML files.
+        output_file : Path | str
+            Output JSON Lines file path.
+        pattern : str, default="*.xml"
+            File pattern to match.
+
+        Returns
+        -------
+        int
+            Number of sentences converted.
+
+        Examples
+        --------
+        >>> converter = FrameNetConverter()
+        >>> count = converter.convert_fulltext_directory(
+        ...     "framenet_v17/fulltext",
+        ...     "fulltext.jsonl"
+        ... )
+        >>> print(f"Converted {count} sentences")
+        """
+        input_dir = Path(input_dir)
+        output_file = Path(output_file)
+
+        count = 0
+        errors: list[tuple[Path, Exception]] = []
+
+        with output_file.open("w", encoding="utf-8") as f:
+            for xml_file in sorted(input_dir.glob(pattern)):
+                try:
+                    sentences = self.convert_fulltext_file(xml_file)
+                    for sentence in sentences:
+                        json_line = sentence.model_dump_json(exclude_none=True)
+                        f.write(json_line + "\n")
+                        count += 1
+                except (etree.XMLSyntaxError, ValueError, TypeError) as e:
+                    errors.append((xml_file, e))
+
+        if errors:
+            error_details = "\n".join(f"  - {file}: {error}" for file, error in errors)
+            total_files = len(list(input_dir.glob(pattern)))
+            error_msg = (
+                f"Failed to convert {len(errors)} out of {total_files} files:\n{error_details}"
+            )
+            raise RuntimeError(error_msg)
+
+        return count
+
     def convert_frames_directory(
         self,
         input_dir: Path | str,
@@ -509,6 +1133,8 @@ def convert_frames_directory(
 
         This method parses frame XML files and associates them with lexical units
         from luIndex.xml (expected to be in the parent directory of input_dir).
+        It also loads frame relations from frRelation.xml and enriches LUs with
+        valence patterns and semantic types from individual lu/*.xml files.
 
         Parameters
         ----------
@@ -570,6 +1196,35 @@ def convert_frames_directory(
         for frame in frames:
             frame.lexical_units = lu_by_frame.get(frame.id, [])
 
+        # Load frame relations from frRelation.xml
+        fr_relation_path = parent_dir / "frRelation.xml"
+        if fr_relation_path.exists():
+            try:
+                relations_by_frame = self.convert_frame_relations_file(fr_relation_path)
+                for frame in frames:
+                    frame.frame_relations = relations_by_frame.get(frame.id, [])
+            except (etree.XMLSyntaxError, ValueError, TypeError) as e:
+                print(f"Warning: Failed to load frame relations from {fr_relation_path}: {e}")
+
+        # Enrich LUs with valence patterns and semtypes from individual lu/*.xml files
+        lu_dir = parent_dir / "lu"
+        if lu_dir.is_dir():
+            for frame in frames:
+                for lu in frame.lexical_units:
+                    lu_file = lu_dir / f"lu{lu.id}.xml"
+                    if lu_file.exists():
+                        try:
+                            valence_patterns, semtypes, _annotation_sets = self.convert_lu_file(
+                                lu_file
+                            )
+                            if valence_patterns:
+                                lu.valence_patterns = valence_patterns
+                            if semtypes:
+                                lu.semtypes = semtypes
+                        except (etree.XMLSyntaxError, ValueError, TypeError) as e:
+                            print(f"Warning: Failed to parse LU file {lu_file}: {e}")
+                            continue
+
         # Write frames with LUs to output file
         count = 0
         with output_file.open("w", encoding="utf-8") as f:
diff --git a/src/glazing/framenet/loader.py b/src/glazing/framenet/loader.py
index 5d91108..8d5e6df 100644
--- a/src/glazing/framenet/loader.py
+++ b/src/glazing/framenet/loader.py
@@ -36,7 +36,7 @@
 from collections import defaultdict
 from pathlib import Path
 
-from glazing.framenet.models import Frame, LexicalUnit, SemanticType
+from glazing.framenet.models import Frame, LexicalUnit, SemanticType, Sentence
 from glazing.framenet.types import FrameID
 from glazing.initialize import get_default_data_path
 
@@ -452,14 +452,15 @@ def load_lexical_units(
         return lexical_units
 
     def load_semantic_types(
-        self, filepath: Path | str, skip_errors: bool = False
+        self, filepath: Path | str | None = None, skip_errors: bool = False
     ) -> list[SemanticType]:
         """Load SemanticType models from JSON Lines file.
 
         Parameters
         ----------
-        filepath : Path | str
+        filepath : Path | str | None, optional
             Path to JSON Lines file containing SemanticType data.
+            If None, looks for ``framenet_semtypes.jsonl`` alongside the primary data file.
         skip_errors : bool, default=False
             If True, skip invalid lines rather than raising errors.
 
@@ -475,6 +476,8 @@ def load_semantic_types(
         ValueError
             If skip_errors=False and a line fails validation.
         """
+        if filepath is None:
+            filepath = self.data_path.parent / "framenet_semtypes.jsonl"
         filepath = Path(filepath)
         if not filepath.exists():
             msg = f"FrameNet semantic types file not found: {filepath}"
@@ -486,6 +489,42 @@ def load_semantic_types(
 
         return sem_types
 
+    def load_fulltext(
+        self, filepath: Path | str | None = None, skip_errors: bool = False
+    ) -> list[Sentence]:
+        """Load Sentence models from fulltext JSON Lines file.
+
+        Parameters
+        ----------
+        filepath : Path | str | None, optional
+            Path to JSON Lines file containing Sentence data.
+            If None, looks for ``framenet_fulltext.jsonl`` alongside the primary data file.
+        skip_errors : bool, default=False
+            If True, skip invalid lines rather than raising errors.
+
+        Returns
+        -------
+        list[Sentence]
+            List of loaded Sentence models.
+
+        Raises
+        ------
+        FileNotFoundError
+            If the input file does not exist.
+        """
+        if filepath is None:
+            filepath = self.data_path.parent / "framenet_fulltext.jsonl"
+        filepath = Path(filepath)
+        if not filepath.exists():
+            msg = f"FrameNet fulltext file not found: {filepath}"
+            raise FileNotFoundError(msg)
+
+        sentences = []
+        for sentence in Sentence.from_json_lines_file(filepath, skip_errors=skip_errors):
+            sentences.append(sentence)
+
+        return sentences
+
     def build_frame_index(self, frames: list[Frame]) -> FrameIndex:
         """Build searchable index from frames data.
 

From 6486fa8d3ab0c2014b6edaadd5234c0a01addb93 Mon Sep 17 00:00:00 2001
From: Aaron Steven White <aaronstevenwhite@gmail.com>
Date: Thu, 5 Feb 2026 19:38:56 -0500
Subject: [PATCH 03/11] Fixes VerbNet converter to populate framenet_mappings
 and propbank_mappings from member attributes.

---
 pyproject.toml                   |  2 +-
 src/glazing/verbnet/converter.py | 49 +++++++++++++++++++++++++++++---
 2 files changed, 46 insertions(+), 5 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index ab89ebd..4af2bf1 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -136,7 +136,7 @@ ignore = [
 "src/glazing/utils/xml_parser.py" = ["S320"]
 "src/glazing/wordnet/converter.py" = ["C901", "PLR0912", "PLR0915"]
 "src/glazing/wordnet/loader.py" = ["C901", "PLR0912"]
-"src/glazing/verbnet/converter.py" = ["C901", "S320"]
+"src/glazing/verbnet/converter.py" = ["C901", "S320", "PLR0912"]
 "src/glazing/framenet/converter.py" = ["S320", "C901", "PLR0912", "PLR0915"]
 "src/glazing/propbank/converter.py" = ["S320"]
 
diff --git a/src/glazing/verbnet/converter.py b/src/glazing/verbnet/converter.py
index 5a6452d..e843326 100644
--- a/src/glazing/verbnet/converter.py
+++ b/src/glazing/verbnet/converter.py
@@ -39,11 +39,17 @@
 from __future__ import annotations
 
 import re
+from datetime import UTC, datetime
 from pathlib import Path
 from typing import cast
 
 from lxml import etree
 
+from glazing.references.models import (
+    CrossReference,
+    MappingMetadata,
+    VerbNetFrameNetMapping,
+)
 from glazing.types import LogicType
 from glazing.utils.xml_parser import parse_attributes
 from glazing.verbnet.models import (
@@ -268,8 +274,8 @@ def _parse_members(self, element: etree._Element) -> list[Member]:
             # Parse cross-references from attributes
             features = {}
             wn_senses = str(attrs.get("wn", "")).strip()
-            str(attrs.get("grouping", "")).strip()
-            str(attrs.get("fn_mapping", "")).strip()
+            grouping = str(attrs.get("grouping", "")).strip()
+            fn_mapping = str(attrs.get("fn_mapping", "")).strip()
             features_str = str(attrs.get("features", "")).strip()
 
             # Parse features if present
@@ -292,14 +298,49 @@ def _parse_members(self, element: etree._Element) -> list[Member]:
                             # Skip invalid percentage notation
                             continue
 
+            # Parse FrameNet mappings from fn_mapping attribute
+            framenet_mappings: list[VerbNetFrameNetMapping] = []
+            if fn_mapping and fn_mapping != "None":
+                framenet_mappings.append(
+                    VerbNetFrameNetMapping(
+                        frame_name=fn_mapping,
+                        confidence=None,
+                        mapping_source="manual",
+                        role_mappings=[],
+                    )
+                )
+
+            # Parse PropBank mappings from grouping attribute
+            propbank_mappings: list[CrossReference] = []
+            if grouping and grouping != "None":
+                mapping_metadata = MappingMetadata(
+                    created_date=datetime.now(tz=UTC),
+                    created_by="verbnet_xml",
+                    version="3.4",
+                    validation_status="unvalidated",
+                )
+                for roleset_id in grouping.split():
+                    if roleset_id.strip():
+                        propbank_mappings.append(
+                            CrossReference(
+                                source_dataset="verbnet",
+                                source_id=verbnet_key,
+                                source_version="3.4",
+                                target_dataset="propbank",
+                                target_id=roleset_id.strip(),
+                                mapping_type="direct",
+                                metadata=mapping_metadata,
+                            )
+                        )
+
             # Create member model
             member = Member(
                 name=name,
                 verbnet_key=verbnet_key,
                 wordnet_mappings=wordnet_mappings,
+                framenet_mappings=framenet_mappings,
+                propbank_mappings=propbank_mappings,
                 features=features,
-                # PropBank and FrameNet mappings would be parsed from
-                # grouping and fn_mapping attributes here in a full implementation
             )
 
             members.append(member)

From 1e5e275fbbeccbf356ba4e33860d16c8d1298b2e Mon Sep 17 00:00:00 2001
From: Aaron Steven White <aaronstevenwhite@gmail.com>
Date: Thu, 5 Feb 2026 19:39:09 -0500
Subject: [PATCH 04/11] Adds AMR-UMR-91 roleset conversion and XML fixes to
 PropBank converter.

---
 src/glazing/propbank/converter.py  | 82 +++++++++++++++++++++++++++---
 src/glazing/utils/special_cases.py |  9 +++-
 2 files changed, 82 insertions(+), 9 deletions(-)

diff --git a/src/glazing/propbank/converter.py b/src/glazing/propbank/converter.py
index 8489524..8a360e2 100644
--- a/src/glazing/propbank/converter.py
+++ b/src/glazing/propbank/converter.py
@@ -33,6 +33,7 @@
 from pathlib import Path
 
 from lxml import etree
+from pydantic import ValidationError
 
 from glazing.propbank.models import (
     Alias,
@@ -281,14 +282,18 @@ def _parse_propbank_annotation(self, propbank_elem: etree._Element) -> PropBankA
             with contextlib.suppress(ValueError, TypeError):
                 end = int(end)
 
-            args.append(
-                Arg(
-                    type=str(attrs["type"]),  # type: ignore[arg-type]
-                    start=start,  # type: ignore[arg-type]
-                    end=end,  # type: ignore[arg-type]
-                    text=arg.text,
+            try:
+                args.append(
+                    Arg(
+                        type=str(attrs["type"]),  # type: ignore[arg-type]
+                        start=start,  # type: ignore[arg-type]
+                        end=end,  # type: ignore[arg-type]
+                        text=arg.text,
+                    )
                 )
-            )
+            except ValidationError:
+                # Skip args with non-standard types (e.g., AMR annotations)
+                continue
 
         # PropBankAnnotation expects a single Rel, not a list
         # Handle missing rel element (some annotations don't have it)
@@ -486,6 +491,52 @@ def convert_frameset_file(self, filepath: Path | str) -> Frameset:
 
         return Frameset(predicate_lemma=predicate_lemma, rolesets=rolesets, notes=notes)
 
+    def convert_combined_frameset_file(self, filepath: Path | str) -> list[Frameset]:
+        """Convert a combined frameset XML file with multiple predicates.
+
+        Handles files like AMR-UMR-91-rolesets.xml where a single <frameset>
+        root contains multiple <predicate> children.
+
+        Parameters
+        ----------
+        filepath : Path | str
+            Path to combined frameset XML file.
+
+        Returns
+        -------
+        list[Frameset]
+            List of parsed Frameset model instances, one per predicate.
+        """
+        filepath = Path(filepath)
+        xml_content = filepath.read_text(encoding="utf-8")
+        xml_content = self._fix_xml_errors(xml_content, filepath)
+
+        tree = etree.parse(BytesIO(xml_content.encode("utf-8")))
+        root = tree.getroot()
+
+        framesets: list[Frameset] = []
+        for predicate_elem in root.findall("predicate"):
+            predicate_lemma = predicate_elem.get("lemma", "")
+
+            rolesets = []
+            for roleset in predicate_elem.findall("roleset"):
+                try:
+                    rolesets.append(self._parse_roleset(roleset))
+                except (ValidationError, ValueError, TypeError):
+                    # Skip rolesets with non-standard values (e.g., AMR-specific types)
+                    continue
+
+            notes = []
+            for note in predicate_elem.findall("note"):
+                if note.text:
+                    notes.append(note.text)
+
+            framesets.append(
+                Frameset(predicate_lemma=predicate_lemma, rolesets=rolesets, notes=notes)
+            )
+
+        return framesets
+
     def convert_framesets_directory(
         self,
         input_dir: Path | str,
@@ -494,6 +545,9 @@ def convert_framesets_directory(
     ) -> int:
         """Convert all frameset files in a directory to JSON Lines.
 
+        Also processes combined frameset files (e.g., AMR-UMR-91-rolesets.xml)
+        found in the parent directory.
+
         Parameters
         ----------
         input_dir : Path | str
@@ -525,16 +579,28 @@ def convert_framesets_directory(
         errors: list[tuple[Path, Exception]] = []
 
         with output_file.open("w", encoding="utf-8") as f:
+            # Convert individual frameset files
             for xml_file in sorted(input_dir.glob(pattern)):
                 try:
                     frameset = self.convert_frameset_file(xml_file)
-                    # Write as JSON Lines
                     json_line = frameset.model_dump_json(exclude_none=True)
                     f.write(json_line + "\n")
                     count += 1
                 except (etree.XMLSyntaxError, ValueError, TypeError) as e:
                     errors.append((xml_file, e))
 
+            # Also process combined frameset files in parent directory
+            amr_file = input_dir.parent / "AMR-UMR-91-rolesets.xml"
+            if amr_file.exists():
+                try:
+                    amr_framesets = self.convert_combined_frameset_file(amr_file)
+                    for frameset in amr_framesets:
+                        json_line = frameset.model_dump_json(exclude_none=True)
+                        f.write(json_line + "\n")
+                        count += 1
+                except (etree.XMLSyntaxError, ValueError, TypeError) as e:
+                    errors.append((amr_file, e))
+
         # If there were any errors, raise an exception with details
         if errors:
             error_details = "\n".join(f"  - {file}: {error}" for file, error in errors)
diff --git a/src/glazing/utils/special_cases.py b/src/glazing/utils/special_cases.py
index 9d2def5..6ae1147 100644
--- a/src/glazing/utils/special_cases.py
+++ b/src/glazing/utils/special_cases.py
@@ -30,7 +30,14 @@ class SpecialCaseRegistry:
                 "replacement": ">in</arg>",
                 "description": "Mismatched closing tag",
             }
-        ]
+        ],
+        "AMR-UMR-91-rolesets.xml": [
+            {
+                "pattern": "      </example>\n      </example>",
+                "replacement": "      </example>",
+                "description": "Duplicate </example> closing tag in reference-illustration.91",
+            }
+        ],
     }
 
     PROPBANK_ROLESET_EXCEPTIONS: ClassVar[dict[str, str]] = {

From 38ec4fe2abc14d3a062aa32a569df4df37787fc5 Mon Sep 17 00:00:00 2001
From: Aaron Steven White <aaronstevenwhite@gmail.com>
Date: Thu, 5 Feb 2026 19:57:50 -0500
Subject: [PATCH 05/11] Updates initialize.py to convert supplementary WordNet
 and FrameNet data files.

---
 src/glazing/initialize.py | 30 ++++++++++++++++++++++++++++--
 1 file changed, 28 insertions(+), 2 deletions(-)

diff --git a/src/glazing/initialize.py b/src/glazing/initialize.py
index 7dde27b..9e3fb2b 100644
--- a/src/glazing/initialize.py
+++ b/src/glazing/initialize.py
@@ -168,6 +168,30 @@ def _process_dataset(name: str, data_dir: Path, verbose: bool) -> bool:
         return True
 
 
+def _convert_wordnet_supplementary(
+    converter: object, source: Path, converted_dir: Path, verbose: bool
+) -> None:
+    sense_count = converter.convert_sense_index(source, converted_dir / "wordnet_senses.jsonl")  # type: ignore[attr-defined]
+    exc_count = converter.convert_exceptions(source, converted_dir / "wordnet_exceptions.jsonl")  # type: ignore[attr-defined]
+    if verbose:
+        click.echo(f"  ✓ Converted {sense_count} senses, {exc_count} exceptions")
+
+
+def _convert_framenet_supplementary(
+    converter: object, download_path: Path, converted_dir: Path, verbose: bool
+) -> None:
+    semtype_count = converter.convert_semtypes_file(  # type: ignore[attr-defined]
+        download_path / "semTypes.xml", converted_dir / "framenet_semtypes.jsonl"
+    )
+    if verbose:
+        click.echo(f"  ✓ Converted {semtype_count} semantic types")
+    fulltext_count = converter.convert_fulltext_directory(  # type: ignore[attr-defined]
+        download_path / "fulltext", converted_dir / "framenet_fulltext.jsonl"
+    )
+    if verbose:
+        click.echo(f"  ✓ Converted {fulltext_count} fulltext sentences")
+
+
 def _convert_dataset(
     name: str, download_path: Path, output: Path, converter: object, verbose: bool
 ) -> None:
@@ -200,13 +224,15 @@ def _convert_dataset(
         source = download_path
         stats = converter.convert_wordnet_database(source, output)  # type: ignore[attr-defined]
         if verbose:
-            synset_count = sum(v for k, v in stats.items() if k.startswith("synsets_"))
-            click.echo(f"  ✓ Converted {synset_count} synsets")
+            total = stats.get("total_synsets", 0)
+            click.echo(f"  ✓ Converted {total} synsets")
+        _convert_wordnet_supplementary(converter, source, output.parent, verbose)
     elif name == "framenet":
         source = download_path / "frame"
         count = converter.convert_frames_directory(source, output)  # type: ignore[attr-defined]
         if verbose:
             click.echo(f"  ✓ Converted {count} frames")
+        _convert_framenet_supplementary(converter, download_path, output.parent, verbose)
 
 
 def initialize_datasets(

From e0d0ad954ca1dc04dc330fb5f2e3a6f3340841e4 Mon Sep 17 00:00:00 2001
From: Aaron Steven White <aaronstevenwhite@gmail.com>
Date: Thu, 5 Feb 2026 22:06:36 -0500
Subject: [PATCH 06/11] Adds converter-to-loader round-trip integration tests
 for all four resources.

---
 tests/test_integration/__init__.py            |   0
 .../test_converter_loader_roundtrip.py        | 829 ++++++++++++++++++
 2 files changed, 829 insertions(+)
 create mode 100644 tests/test_integration/__init__.py
 create mode 100644 tests/test_integration/test_converter_loader_roundtrip.py

diff --git a/tests/test_integration/__init__.py b/tests/test_integration/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/test_integration/test_converter_loader_roundtrip.py b/tests/test_integration/test_converter_loader_roundtrip.py
new file mode 100644
index 0000000..1829a00
--- /dev/null
+++ b/tests/test_integration/test_converter_loader_roundtrip.py
@@ -0,0 +1,829 @@
+"""Converter-to-loader round-trip integration tests.
+
+Tests that data survives the full pipeline: raw format → converter → JSONL → loader.
+Covers all four resources (WordNet, FrameNet, VerbNet, PropBank) plus contract
+and field completeness checks.
+"""
+
+import json
+
+import pytest
+
+from glazing.framenet.converter import FrameNetConverter
+from glazing.framenet.loader import FrameNetLoader
+from glazing.propbank.converter import PropBankConverter
+from glazing.propbank.loader import PropBankLoader
+from glazing.verbnet.converter import VerbNetConverter
+from glazing.verbnet.loader import VerbNetLoader
+from glazing.wordnet.converter import WordNetConverter
+from glazing.wordnet.loader import WordNetLoader
+
+# ── WordNet ────────────────────────────────────────────────────────────────
+
+
+WN_LICENSE_HEADER = """\
+  1 This software and database is being provided to you, the LICENSEE, by
+  2 Princeton University under the following license.
+  3
+  4
+  5
+"""
+
+
+class TestWordNetRoundTrip:
+    """WordNet converter → JSONL → loader pipeline."""
+
+    @pytest.fixture
+    def wordnet_data(self, tmp_path):
+        """Create a minimal WordNet database and run the full conversion pipeline."""
+        wn_dir = tmp_path / "wn"
+        wn_dir.mkdir()
+        output_dir = tmp_path / "out"
+        output_dir.mkdir()
+
+        # data.verb (2 synsets, second has 2 verb frames)
+        (wn_dir / "data.verb").write_text(
+            WN_LICENSE_HEADER
+            + "00001740 29 v 01 breathe 0 002 $ 00001740 v 0000 @ 00002084 v 0000 01 + 02 00 | draw air into and expel out of the lungs\n"
+            + "00002084 29 v 02 respire 0 breathe 1 001 @ 00001740 v 0000 02 + 01 00 + 02 01 | undergo respiration\n",
+            encoding="utf-8",
+        )
+        # data.noun (1 synset)
+        (wn_dir / "data.noun").write_text(
+            WN_LICENSE_HEADER
+            + "00002325 03 n 01 entity 0 001 ~ 00002684 n 0000 | something having concrete existence\n",
+            encoding="utf-8",
+        )
+        # data.adj / data.adv (empty but present)
+        for name in ("data.adj", "data.adv"):
+            (wn_dir / name).write_text(WN_LICENSE_HEADER, encoding="utf-8")
+
+        # index.sense  (sense_key synset_offset sense_number tag_count)
+        (wn_dir / "index.sense").write_text(
+            "breathe%2:29:00:: 00001740 1 25\n"
+            "respire%2:29:00:: 00002084 1 3\n"
+            "breathe%2:29:01:: 00002084 2 0\n"
+            "entity%1:03:00:: 00002325 1 11\n",
+            encoding="utf-8",
+        )
+
+        # verb.Framestext
+        (wn_dir / "verb.Framestext").write_text(
+            "1 Something ----s\n2 Somebody ----s\n",
+            encoding="utf-8",
+        )
+
+        # sents.vrb
+        (wn_dir / "sents.vrb").write_text(
+            "1 The children %s to the playground\n2 The banks %s the check\n",
+            encoding="utf-8",
+        )
+
+        # cntlist
+        (wn_dir / "cntlist").write_text(
+            "25 breathe%2:29:00:: 1\n3 respire%2:29:00:: 1\n11 entity%1:03:00:: 1\n",
+            encoding="utf-8",
+        )
+
+        # verb.exc
+        (wn_dir / "verb.exc").write_text(
+            "breathed breathe\nrespired respire\n",
+            encoding="utf-8",
+        )
+        # Create empty exception files for remaining POS categories
+        for name in ("noun.exc", "adj.exc", "adv.exc"):
+            (wn_dir / name).write_text("", encoding="utf-8")
+
+        # Run conversions
+        converter = WordNetConverter()
+        stats = converter.convert_wordnet_database(wn_dir, output_dir / "wordnet.jsonl")
+        sense_count = converter.convert_sense_index(wn_dir, output_dir / "wordnet_senses.jsonl")
+        exc_count = converter.convert_exceptions(wn_dir, output_dir / "wordnet_exceptions.jsonl")
+
+        loader = WordNetLoader(data_path=output_dir / "wordnet.jsonl")
+
+        return {
+            "stats": stats,
+            "sense_count": sense_count,
+            "exc_count": exc_count,
+            "loader": loader,
+            "output_dir": output_dir,
+        }
+
+    def test_synset_count_preserved(self, wordnet_data):
+        """Converter counts match loader counts."""
+        stats = wordnet_data["stats"]
+        wn = wordnet_data["loader"]
+
+        assert stats["synsets_verb"] == 2
+        assert stats["synsets_noun"] == 1
+        assert stats["total_synsets"] == 3
+        assert len(wn.synsets) == 3
+
+    def test_word_enrichment(self, wordnet_data):
+        """Words have tag_count and sense_number from cntlist/index.sense."""
+        wn = wordnet_data["loader"]
+
+        # breathe in synset 00001740 should have tag_count=25, sense_number=1
+        synset = wn.synsets["00001740"]
+        breathe_word = synset.words[0]
+        assert breathe_word.lemma == "breathe"
+        assert breathe_word.tag_count == 25
+        assert breathe_word.sense_number == 1
+
+        # entity in synset 00002325 should have tag_count=11
+        entity_synset = wn.synsets["00002325"]
+        entity_word = entity_synset.words[0]
+        assert entity_word.lemma == "entity"
+        assert entity_word.tag_count == 11
+
+    def test_verb_frame_templates(self, wordnet_data):
+        """VerbFrames have template and example_sentence from verb.Framestext/sents.vrb."""
+        wn = wordnet_data["loader"]
+        synset = wn.synsets["00001740"]
+
+        assert synset.frames is not None
+        assert len(synset.frames) == 1
+
+        frame = synset.frames[0]
+        assert frame.frame_number == 2
+        assert frame.template == "Somebody ----s"
+        assert frame.example_sentence == "The banks %s the check"
+
+    def test_pointers_preserved(self, wordnet_data):
+        """Pointer relations survive the round-trip."""
+        wn = wordnet_data["loader"]
+        synset = wn.synsets["00001740"]
+
+        assert len(synset.pointers) == 2
+        symbols = {p.symbol for p in synset.pointers}
+        assert "$" in symbols
+        assert "@" in symbols
+
+    def test_lemma_index_builds(self, wordnet_data):
+        """Lemma index enables word lookups after loading."""
+        wn = wordnet_data["loader"]
+
+        assert "breathe" in wn.lemma_index
+        assert "v" in wn.lemma_index["breathe"]
+        assert "entity" in wn.lemma_index
+        assert "n" in wn.lemma_index["entity"]
+
+    def test_sense_index_loads(self, wordnet_data):
+        """Sense index populated from supplementary wordnet_senses.jsonl."""
+        wn = wordnet_data["loader"]
+
+        assert len(wn.sense_index) == 4
+        assert "breathe%2:29:00::" in wn.sense_index
+        sense = wn.sense_index["breathe%2:29:00::"]
+        assert sense.synset_offset == "00001740"
+        assert sense.tag_count == 25
+
+    def test_exceptions_load(self, wordnet_data):
+        """Morphological exceptions loaded from supplementary wordnet_exceptions.jsonl."""
+        wn = wordnet_data["loader"]
+
+        assert "v" in wn.exceptions
+        assert "breathed" in wn.exceptions["v"]
+        assert wn.exceptions["v"]["breathed"] == ["breathe"]
+
+
+# ── FrameNet ──────────────────────────────────────────────────────────────
+
+
+class TestFrameNetRoundTrip:
+    """FrameNet converter → JSONL → loader pipeline."""
+
+    @pytest.fixture
+    def framenet_data(self, tmp_path):
+        """Create a minimal FrameNet dataset and run the full conversion pipeline."""
+        fn_root = tmp_path / "framenet"
+        frames_dir = fn_root / "frame"
+        lu_dir = fn_root / "lu"
+        fulltext_dir = fn_root / "fulltext"
+        output_dir = tmp_path / "output"
+
+        frames_dir.mkdir(parents=True)
+        lu_dir.mkdir()
+        fulltext_dir.mkdir()
+        output_dir.mkdir()
+
+        # Frame XML
+        (frames_dir / "Giving.xml").write_text(
+            """\
+<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
+<frame name="Giving" ID="139"
+       xmlns="http://framenet.icsi.berkeley.edu"
+       xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+    <definition>&lt;def-root&gt;A Donor transfers a Theme to a Recipient.&lt;/def-root&gt;</definition>
+    <FE bgColor="FF0000" fgColor="FFFFFF" coreType="Core" abbrev="Donor" name="Donor" ID="277">
+        <definition>&lt;def-root&gt;The person that gives.&lt;/def-root&gt;</definition>
+    </FE>
+    <FE bgColor="0000FF" fgColor="FFFFFF" coreType="Core" abbrev="Theme" name="Theme" ID="278">
+        <definition>&lt;def-root&gt;The object given.&lt;/def-root&gt;</definition>
+    </FE>
+    <FE bgColor="00FF00" fgColor="000000" coreType="Core" abbrev="Rec" name="Recipient" ID="279">
+        <definition>&lt;def-root&gt;The person receiving.&lt;/def-root&gt;</definition>
+    </FE>
+    <frameRelation type="Inherits from">
+        <relatedFrame ID="230">Transferring</relatedFrame>
+    </frameRelation>
+    <frameRelation type="Is Inherited by"/>
+    <lexUnit status="FN1_Sent" POS="V" name="give.v" ID="614" lemmaID="304">
+        <definition>COD: freely transfer the possession of</definition>
+        <sentenceCount annotated="20" total="100"/>
+        <lexeme order="1" headword="false" breakBefore="false" POS="V" name="give"/>
+    </lexUnit>
+    <lexUnit status="FN1_Sent" POS="V" name="donate.v" ID="615" lemmaID="305">
+        <definition>COD: give to a good cause</definition>
+        <sentenceCount annotated="10" total="50"/>
+        <lexeme order="1" headword="false" breakBefore="false" POS="V" name="donate"/>
+    </lexUnit>
+</frame>""",
+            encoding="utf-8",
+        )
+
+        # luIndex.xml
+        (fn_root / "luIndex.xml").write_text(
+            """\
+<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
+<luIndex xmlns="http://framenet.icsi.berkeley.edu">
+    <lu ID="614" name="give.v" frameID="139" frameName="Giving"
+        status="FN1_Sent" hasAnnotation="true" numAnnotInstances="20"/>
+    <lu ID="615" name="donate.v" frameID="139" frameName="Giving"
+        status="FN1_Sent" hasAnnotation="true" numAnnotInstances="10"/>
+</luIndex>""",
+            encoding="utf-8",
+        )
+
+        # frRelation.xml
+        (fn_root / "frRelation.xml").write_text(
+            """\
+<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
+<frameRelations xmlns="http://framenet.icsi.berkeley.edu"
+                xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+    <frameRelationType name="Inheritance" ID="1"
+                       superFrameName="Parent" subFrameName="Child">
+        <frameRelation ID="1" subID="139" subFrameName="Giving"
+                       supID="230" superFrameName="Transferring">
+            <FERelation ID="1" subID="277" subFEName="Donor"
+                        supID="500" superFEName="Sender"/>
+        </frameRelation>
+    </frameRelationType>
+</frameRelations>""",
+            encoding="utf-8",
+        )
+
+        # semTypes.xml
+        (fn_root / "semTypes.xml").write_text(
+            """\
+<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
+<semTypes xmlns="http://framenet.icsi.berkeley.edu"
+          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+    <semType name="Physical_entity" ID="68" abbrev="PhysObj">
+        <definition>A type for physical entities</definition>
+        <superType supID="70" superTypeName="Ontological_type"/>
+    </semType>
+</semTypes>""",
+            encoding="utf-8",
+        )
+
+        # Fulltext XML
+        (fulltext_dir / "TestDoc.xml").write_text(
+            """\
+<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
+<fullTextAnnotation xmlns="http://framenet.icsi.berkeley.edu"
+                    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+    <header>
+        <corpus description="Test corpus" name="TestCorpus" ID="1">
+            <document description="Test doc" name="TestDoc" ID="1"/>
+        </corpus>
+    </header>
+    <sentence corpID="1" docID="1" sentNo="1" paragNo="1" aPos="0" ID="100">
+        <text>He gave her a book.</text>
+        <annotationSet ID="200" status="MANUAL" frameName="Giving"
+                       frameID="139" luName="give.v" luID="614">
+            <layer rank="1" name="Target">
+                <label name="Target" start="3" end="6"/>
+            </layer>
+            <layer rank="1" name="FE">
+                <label name="Donor" start="0" end="1"/>
+                <label name="Recipient" start="8" end="10"/>
+                <label name="Theme" start="12" end="17"/>
+            </layer>
+        </annotationSet>
+    </sentence>
+</fullTextAnnotation>""",
+            encoding="utf-8",
+        )
+
+        # Run conversions
+        converter = FrameNetConverter()
+        frame_count = converter.convert_frames_directory(frames_dir, output_dir / "framenet.jsonl")
+        semtype_count = converter.convert_semtypes_file(
+            fn_root / "semTypes.xml", output_dir / "framenet_semtypes.jsonl"
+        )
+        fulltext_count = converter.convert_fulltext_directory(
+            fulltext_dir, output_dir / "framenet_fulltext.jsonl"
+        )
+
+        loader = FrameNetLoader(data_path=output_dir / "framenet.jsonl")
+
+        return {
+            "loader": loader,
+            "frame_count": frame_count,
+            "semtype_count": semtype_count,
+            "fulltext_count": fulltext_count,
+        }
+
+    def test_frame_count_preserved(self, framenet_data):
+        """Converter reports 1 frame; loader reads 1 frame."""
+        assert framenet_data["frame_count"] == 1
+        frames = framenet_data["loader"].frames
+        assert len(frames) == 1
+        assert frames[0].id == 139
+        assert frames[0].name == "Giving"
+
+    def test_frame_elements_preserved(self, framenet_data):
+        """FE names and core types survive the round trip."""
+        frame = framenet_data["loader"].frames[0]
+        fe_names = {fe.name for fe in frame.frame_elements}
+        assert fe_names == {"Donor", "Theme", "Recipient"}
+        for fe in frame.frame_elements:
+            assert fe.core_type == "Core"
+
+    def test_lexical_units_preserved(self, framenet_data):
+        """LU names and POS survive the round trip."""
+        frame = framenet_data["loader"].frames[0]
+        lu_names = {lu.name for lu in frame.lexical_units}
+        assert "give.v" in lu_names
+        assert "donate.v" in lu_names
+        for lu in frame.lexical_units:
+            assert lu.pos == "V"
+
+    def test_frame_relations_populated(self, framenet_data):
+        """Frame relations from frRelation.xml are attached to the frame."""
+        frame = framenet_data["loader"].frames[0]
+        inherits = [r for r in frame.frame_relations if r.type == "Inherits from"]
+        assert len(inherits) == 1
+        rel = inherits[0]
+        assert rel.sub_frame_id == 139
+        assert rel.super_frame_id == 230
+        assert rel.super_frame_name == "Transferring"
+        assert len(rel.fe_relations) == 1
+        assert rel.fe_relations[0].sub_fe_name == "Donor"
+        assert rel.fe_relations[0].super_fe_name == "Sender"
+
+    def test_semantic_types_load(self, framenet_data):
+        """Semantic types loaded from supplementary framenet_semtypes.jsonl."""
+        sem_types = framenet_data["loader"].load_semantic_types()
+        assert len(sem_types) == 1
+        assert sem_types[0].id == 68
+        assert sem_types[0].name == "Physical_entity"
+
+    def test_fulltext_loads(self, framenet_data):
+        """Fulltext sentences loaded from supplementary framenet_fulltext.jsonl."""
+        sentences = framenet_data["loader"].load_fulltext()
+        assert len(sentences) == 1
+        assert sentences[0].id == 100
+        assert sentences[0].text == "He gave her a book."
+        assert len(sentences[0].annotation_sets) == 1
+
+
+# ── VerbNet ───────────────────────────────────────────────────────────────
+
+
+VERBNET_XML = """\
+<!DOCTYPE VNCLASS SYSTEM "vn_class-3.dtd">
+<VNCLASS ID="give-13.1" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:noNamespaceSchemaLocation="vn_schema-3.xsd">
+  <MEMBERS>
+    <MEMBER fn_mapping="Giving" grouping="give.01 give.02" name="give"
+            verbnet_key="give#2" wn="give%2:40:00 give%2:40:01" features=""/>
+    <MEMBER fn_mapping="None" grouping="deal.04" name="deal"
+            verbnet_key="deal#2" wn="deal%2:40:01" features=""/>
+    <MEMBER fn_mapping="None" grouping="" name="loan"
+            verbnet_key="loan#1" wn="loan%2:40:00" features=""/>
+  </MEMBERS>
+  <THEMROLES>
+    <THEMROLE type="Agent">
+      <SELRESTRS logic="or">
+        <SELRESTR Value="+" type="animate"/>
+        <SELRESTR Value="+" type="organization"/>
+      </SELRESTRS>
+    </THEMROLE>
+    <THEMROLE type="Theme">
+      <SELRESTRS/>
+    </THEMROLE>
+    <THEMROLE type="Recipient">
+      <SELRESTRS>
+        <SELRESTR Value="+" type="animate"/>
+      </SELRESTRS>
+    </THEMROLE>
+  </THEMROLES>
+  <FRAMES>
+    <FRAME>
+      <DESCRIPTION descriptionNumber="0.2" primary="NP V NP PP.recipient"
+                   secondary="NP-PP; Recipient-PP" xtag=""/>
+      <EXAMPLES>
+        <EXAMPLE>They lent a bicycle to me.</EXAMPLE>
+      </EXAMPLES>
+      <SYNTAX>
+        <NP value="Agent"><SYNRESTRS/></NP>
+        <VERB/>
+        <NP value="Theme"><SYNRESTRS/></NP>
+        <PREP value="to"><SELRESTRS/></PREP>
+        <NP value="Recipient"><SYNRESTRS/></NP>
+      </SYNTAX>
+      <SEMANTICS>
+        <PRED value="transfer">
+          <ARGS>
+            <ARG type="Event" value="E"/>
+            <ARG type="ThemRole" value="Agent"/>
+          </ARGS>
+        </PRED>
+      </SEMANTICS>
+    </FRAME>
+  </FRAMES>
+  <SUBCLASSES>
+    <VNSUBCLASS ID="give-13.1-1">
+      <MEMBERS>
+        <MEMBER fn_mapping="Commerce_sell" grouping="sell.01" name="sell"
+                verbnet_key="sell#1" wn="sell%2:40:00" features=""/>
+      </MEMBERS>
+      <THEMROLES/>
+      <FRAMES/>
+    </VNSUBCLASS>
+  </SUBCLASSES>
+</VNCLASS>"""
+
+
+class TestVerbNetRoundTrip:
+    """VerbNet converter → JSONL → loader pipeline."""
+
+    @pytest.fixture
+    def verbnet_data(self, tmp_path):
+        """Create VerbNet XML and run the conversion pipeline."""
+        vn_dir = tmp_path / "verbnet"
+        vn_dir.mkdir()
+        output_dir = tmp_path / "output"
+        output_dir.mkdir()
+
+        (vn_dir / "give-13.1.xml").write_text(VERBNET_XML, encoding="utf-8")
+
+        converter = VerbNetConverter()
+        count = converter.convert_verbnet_directory(vn_dir, output_dir / "verbnet.jsonl")
+
+        loader = VerbNetLoader(data_path=output_dir / "verbnet.jsonl")
+
+        return {"count": count, "loader": loader}
+
+    def test_class_count_preserved(self, verbnet_data):
+        """Converter count matches loader count."""
+        assert verbnet_data["count"] == 1
+        assert len(verbnet_data["loader"].classes) == 1
+        assert "give-13.1" in verbnet_data["loader"].classes
+
+    def test_members_preserved(self, verbnet_data):
+        """Member names and keys survive the round trip."""
+        vc = verbnet_data["loader"].classes["give-13.1"]
+        member_names = {m.name for m in vc.members}
+        assert member_names == {"give", "deal", "loan"}
+
+        give = next(m for m in vc.members if m.name == "give")
+        assert give.verbnet_key == "give#2"
+
+    def test_member_framenet_mappings(self, verbnet_data):
+        """fn_mapping attribute parsed into framenet_mappings."""
+        vc = verbnet_data["loader"].classes["give-13.1"]
+
+        give = next(m for m in vc.members if m.name == "give")
+        assert len(give.framenet_mappings) == 1
+        assert give.framenet_mappings[0].frame_name == "Giving"
+
+        # "None" fn_mapping should result in no mappings
+        deal = next(m for m in vc.members if m.name == "deal")
+        assert len(deal.framenet_mappings) == 0
+
+    def test_member_propbank_mappings(self, verbnet_data):
+        """grouping attribute parsed into propbank_mappings."""
+        vc = verbnet_data["loader"].classes["give-13.1"]
+
+        give = next(m for m in vc.members if m.name == "give")
+        pb_ids = {xr.target_id for xr in give.propbank_mappings}
+        assert "give.01" in pb_ids
+        assert "give.02" in pb_ids
+
+        # empty grouping → no propbank mappings
+        loan = next(m for m in vc.members if m.name == "loan")
+        assert len(loan.propbank_mappings) == 0
+
+    def test_themroles_preserved(self, verbnet_data):
+        """Thematic roles survive the round trip."""
+        vc = verbnet_data["loader"].classes["give-13.1"]
+        role_types = {r.type for r in vc.themroles}
+        assert role_types == {"Agent", "Theme", "Recipient"}
+
+    def test_subclass_hierarchy(self, verbnet_data):
+        """Subclass members accessible through the class hierarchy."""
+        vc = verbnet_data["loader"].classes["give-13.1"]
+        assert len(vc.subclasses) == 1
+
+        sub = vc.subclasses[0]
+        assert sub.id == "give-13.1-1"
+        assert len(sub.members) == 1
+        assert sub.members[0].name == "sell"
+
+    def test_member_index_builds(self, verbnet_data):
+        """Member index allows looking up class by verbnet key."""
+        vn = verbnet_data["loader"]
+        assert "give#2" in vn.member_index
+        assert vn.member_index["give#2"] == "give-13.1"
+
+
+# ── PropBank ──────────────────────────────────────────────────────────────
+
+
+PROPBANK_XML = """\
+<?xml version="1.0" encoding="utf-8" standalone="no"?>
+<!DOCTYPE frameset PUBLIC "-//PB//PropBank Frame v3.4 Transitional//EN"
+  "http://propbank.org/specification/dtds/v3.4/frameset.dtd">
+<frameset>
+  <predicate lemma="abandon">
+    <roleset id="abandon.01" name="leave behind">
+      <aliases>
+        <alias pos="v">abandon</alias>
+        <alias pos="n">abandonment</alias>
+      </aliases>
+      <roles>
+        <role descr="abandoner" f="PPT" n="0">
+          <rolelinks>
+            <rolelink class="leave-51.2" resource="VerbNet" version="verbnet3.3">theme</rolelink>
+          </rolelinks>
+        </role>
+        <role descr="entity left behind" f="DIR" n="1"/>
+      </roles>
+      <usagenotes>
+        <usage resource="PropBank" version="3.4" inuse="+"/>
+      </usagenotes>
+      <lexlinks>
+        <lexlink class="Abandonment" confidence="0.8" resource="FrameNet"
+                 src="manual" version="1.7"/>
+        <lexlink class="leave-51.2" confidence="1.0" resource="VerbNet"
+                 src="manual" version="verbnet3.4"/>
+      </lexlinks>
+      <example name="typical transitive" src="">
+        <text>John abandoned the project.</text>
+        <propbank>
+          <rel relloc="1">abandoned</rel>
+          <arg type="ARG0" start="0" end="0">John</arg>
+          <arg type="ARG1" start="2" end="3">the project</arg>
+        </propbank>
+      </example>
+    </roleset>
+  </predicate>
+</frameset>"""
+
+
+class TestPropBankRoundTrip:
+    """PropBank converter → JSONL → loader pipeline."""
+
+    @pytest.fixture
+    def propbank_data(self, tmp_path):
+        """Create PropBank XML and run the conversion pipeline."""
+        pb_dir = tmp_path / "frames"
+        pb_dir.mkdir()
+        output_dir = tmp_path / "output"
+        output_dir.mkdir()
+
+        (pb_dir / "abandon.xml").write_text(PROPBANK_XML, encoding="utf-8")
+
+        converter = PropBankConverter()
+        count = converter.convert_framesets_directory(pb_dir, output_dir / "propbank.jsonl")
+
+        loader = PropBankLoader(data_path=output_dir / "propbank.jsonl")
+
+        return {"count": count, "loader": loader}
+
+    def test_frameset_count_preserved(self, propbank_data):
+        """Converter count matches loader count."""
+        assert propbank_data["count"] == 1
+        assert len(propbank_data["loader"].framesets) == 1
+        assert "abandon" in propbank_data["loader"].framesets
+
+    def test_roles_preserved(self, propbank_data):
+        """Roles survive the round trip with correct attributes."""
+        fs = propbank_data["loader"].framesets["abandon"]
+        rs = fs.rolesets[0]
+        assert rs.id == "abandon.01"
+        assert rs.name == "leave behind"
+
+        role_numbers = {r.n for r in rs.roles}
+        assert "0" in role_numbers
+        assert "1" in role_numbers
+
+        role0 = next(r for r in rs.roles if r.n == "0")
+        assert role0.descr == "abandoner"
+
+    def test_lexlinks_preserved(self, propbank_data):
+        """Lexical links survive the round trip."""
+        rs = propbank_data["loader"].framesets["abandon"].rolesets[0]
+
+        assert len(rs.lexlinks) == 2
+        resources = {ll.resource for ll in rs.lexlinks}
+        assert "FrameNet" in resources
+        assert "VerbNet" in resources
+
+        fn_link = next(ll for ll in rs.lexlinks if ll.resource == "FrameNet")
+        assert fn_link.class_name == "Abandonment"
+        assert fn_link.confidence == pytest.approx(0.8)
+
+    def test_examples_preserved(self, propbank_data):
+        """Example annotations survive the round trip."""
+        rs = propbank_data["loader"].framesets["abandon"].rolesets[0]
+        assert len(rs.examples) == 1
+
+        ex = rs.examples[0]
+        assert ex.name == "typical transitive"
+        assert "abandoned" in ex.text
+
+    def test_roleset_index_builds(self, propbank_data):
+        """Roleset index allows looking up frameset by roleset ID."""
+        pb = propbank_data["loader"]
+        assert "abandon.01" in pb.roleset_index
+        assert pb.roleset_index["abandon.01"] == "abandon"
+
+
+# ── Contract Tests ────────────────────────────────────────────────────────
+
+
+class TestConverterLoaderContracts:
+    """Contract tests: all converters produce valid JSONL, all loaders can read it."""
+
+    def test_all_converters_produce_valid_jsonl(self, tmp_path):
+        """Every line in every converter output is valid JSON."""
+        output_dir = tmp_path / "output"
+        output_dir.mkdir()
+        wn_dir = tmp_path / "wn"
+        wn_dir.mkdir()
+
+        # Minimal WordNet data
+        (wn_dir / "data.verb").write_text(
+            WN_LICENSE_HEADER
+            + "00001740 29 v 01 breathe 0 001 @ 00001740 v 0000 01 + 02 00 | breathe\n",
+            encoding="utf-8",
+        )
+        for name in ("data.noun", "data.adj", "data.adv"):
+            (wn_dir / name).write_text(WN_LICENSE_HEADER, encoding="utf-8")
+        (wn_dir / "index.sense").write_text("breathe%2:29:00:: 00001740 1 0\n", encoding="utf-8")
+        (wn_dir / "verb.Framestext").write_text("", encoding="utf-8")
+        (wn_dir / "sents.vrb").write_text("", encoding="utf-8")
+        (wn_dir / "cntlist").write_text("", encoding="utf-8")
+        for name in ("verb.exc", "noun.exc", "adj.exc", "adv.exc"):
+            (wn_dir / name).write_text("", encoding="utf-8")
+
+        converter = WordNetConverter()
+        converter.convert_wordnet_database(wn_dir, output_dir / "wordnet.jsonl")
+
+        # Verify every line is valid JSON
+        with (output_dir / "wordnet.jsonl").open() as f:
+            for i, line in enumerate(f):
+                obj = json.loads(line)
+                assert isinstance(obj, dict), f"Line {i} is not a JSON object"
+
+    def test_supplementary_files_optional(self, tmp_path):
+        """Loaders work without supplementary files."""
+        output_dir = tmp_path / "output"
+        output_dir.mkdir()
+        wn_dir = tmp_path / "wn"
+        wn_dir.mkdir()
+
+        (wn_dir / "data.verb").write_text(
+            WN_LICENSE_HEADER
+            + "00001740 29 v 01 breathe 0 001 @ 00001740 v 0000 01 + 02 00 | breathe\n",
+            encoding="utf-8",
+        )
+        for name in ("data.noun", "data.adj", "data.adv"):
+            (wn_dir / name).write_text(WN_LICENSE_HEADER, encoding="utf-8")
+        (wn_dir / "index.sense").write_text("breathe%2:29:00:: 00001740 1 0\n", encoding="utf-8")
+        (wn_dir / "verb.Framestext").write_text("", encoding="utf-8")
+        (wn_dir / "sents.vrb").write_text("", encoding="utf-8")
+        (wn_dir / "cntlist").write_text("", encoding="utf-8")
+        for name in ("verb.exc", "noun.exc", "adj.exc", "adv.exc"):
+            (wn_dir / name).write_text("", encoding="utf-8")
+
+        converter = WordNetConverter()
+        converter.convert_wordnet_database(wn_dir, output_dir / "wordnet.jsonl")
+
+        # Load without supplementary files
+        wn = WordNetLoader(data_path=output_dir / "wordnet.jsonl")
+        assert len(wn.synsets) == 1
+        # sense_index and exceptions should be empty but loader shouldn't crash
+        assert len(wn.sense_index) == 0
+        assert len(wn.exceptions) == 0
+
+
+# ── Field Completeness Tests ──────────────────────────────────────────────
+
+
+class TestFieldCompleteness:
+    """Verify that key model fields are populated after conversion."""
+
+    def test_wordnet_field_completeness(self, tmp_path):
+        """WordNet synsets have all expected fields."""
+        wn_dir = tmp_path / "wn"
+        wn_dir.mkdir()
+        output = tmp_path / "wordnet.jsonl"
+
+        (wn_dir / "data.verb").write_text(
+            WN_LICENSE_HEADER
+            + "00001740 29 v 01 breathe 0 001 @ 00002084 v 0000 01 + 02 00 | draw air\n",
+            encoding="utf-8",
+        )
+        for name in ("data.noun", "data.adj", "data.adv"):
+            (wn_dir / name).write_text(WN_LICENSE_HEADER, encoding="utf-8")
+        (wn_dir / "index.sense").write_text("breathe%2:29:00:: 00001740 1 5\n", encoding="utf-8")
+        (wn_dir / "verb.Framestext").write_text("2 Somebody ----s\n", encoding="utf-8")
+        (wn_dir / "sents.vrb").write_text("2 The banks %s\n", encoding="utf-8")
+        (wn_dir / "cntlist").write_text("5 breathe%2:29:00:: 1\n", encoding="utf-8")
+        for name in ("verb.exc", "noun.exc", "adj.exc", "adv.exc"):
+            (wn_dir / name).write_text("", encoding="utf-8")
+
+        converter = WordNetConverter()
+        converter.convert_wordnet_database(wn_dir, output)
+
+        with output.open() as f:
+            obj = json.loads(f.readline())
+
+        # Core fields
+        assert "offset" in obj
+        assert "lex_filenum" in obj
+        assert "ss_type" in obj
+        assert "words" in obj
+        assert len(obj["words"]) > 0
+        assert "pointers" in obj
+        assert "gloss" in obj
+
+        # Enriched word fields
+        word = obj["words"][0]
+        assert word["lemma"] == "breathe"
+        assert word["tag_count"] == 5
+        assert word["sense_number"] == 1
+
+        # Verb frame fields
+        assert "frames" in obj
+        assert len(obj["frames"]) == 1
+        frame = obj["frames"][0]
+        assert "frame_number" in frame
+        assert frame["template"] == "Somebody ----s"
+        assert frame["example_sentence"] == "The banks %s"
+
+    def test_verbnet_field_completeness(self, tmp_path):
+        """VerbNet members have cross-resource mapping fields populated."""
+        vn_dir = tmp_path / "vn"
+        vn_dir.mkdir()
+        output = tmp_path / "verbnet.jsonl"
+
+        (vn_dir / "give-13.1.xml").write_text(VERBNET_XML, encoding="utf-8")
+
+        converter = VerbNetConverter()
+        converter.convert_verbnet_directory(vn_dir, output)
+
+        with output.open() as f:
+            obj = json.loads(f.readline())
+
+        # Check members
+        give = next(m for m in obj["members"] if m["name"] == "give")
+        assert len(give["framenet_mappings"]) == 1
+        assert len(give["propbank_mappings"]) == 2
+        assert len(give["wordnet_mappings"]) >= 1
+
+        # Check themroles
+        assert len(obj["themroles"]) == 3
+
+        # Check frames
+        assert len(obj["frames"]) == 1
+
+    def test_propbank_field_completeness(self, tmp_path):
+        """PropBank rolesets have all expected fields."""
+        pb_dir = tmp_path / "pb"
+        pb_dir.mkdir()
+        output = tmp_path / "propbank.jsonl"
+
+        (pb_dir / "abandon.xml").write_text(PROPBANK_XML, encoding="utf-8")
+
+        converter = PropBankConverter()
+        converter.convert_framesets_directory(pb_dir, output)
+
+        with output.open() as f:
+            obj = json.loads(f.readline())
+
+        assert "predicate_lemma" in obj
+        assert obj["predicate_lemma"] == "abandon"
+        assert "rolesets" in obj
+
+        rs = obj["rolesets"][0]
+        assert rs["id"] == "abandon.01"
+        assert len(rs["roles"]) == 2
+        assert len(rs["lexlinks"]) == 2
+        assert len(rs["examples"]) == 1

From 6da7cb44fb3770c1a3c5c8570438adb028752be9 Mon Sep 17 00:00:00 2001
From: Aaron Steven White <aaronstevenwhite@gmail.com>
Date: Fri, 6 Feb 2026 13:06:01 -0500
Subject: [PATCH 07/11] Refines lemma validation to allow uppercase letters,
 digits at the start, and dots; updates tests accordingly.

---
 src/glazing/references/models.py     | 6 +++---
 src/glazing/types.py                 | 4 +++-
 src/glazing/wordnet/models.py        | 2 +-
 tests/test_base.py                   | 7 +++++--
 tests/test_references/test_models.py | 2 +-
 tests/test_types.py                  | 6 ++++--
 tests/test_wordnet/test_models.py    | 7 +++++--
 7 files changed, 22 insertions(+), 12 deletions(-)

diff --git a/src/glazing/references/models.py b/src/glazing/references/models.py
index cdb3547..ddb181c 100644
--- a/src/glazing/references/models.py
+++ b/src/glazing/references/models.py
@@ -57,7 +57,7 @@
 from pydantic import BaseModel, Field, field_validator
 
 from glazing.propbank.models import LexLink, RoleLink
-from glazing.types import DatasetType, MappingSource
+from glazing.types import LEMMA_PATTERN, DatasetType, MappingSource
 from glazing.wordnet.models import Sense, WordNetCrossRef
 from glazing.wordnet.types import SynsetOffset
 
@@ -584,8 +584,8 @@ def validate_lemma(cls, v: str) -> str:
         ValueError
             If lemma format is invalid.
         """
-        if not re.match(r"^[a-z][a-z0-9_\'-]*$", v):
-            msg = f"Invalid lemma format: {v}"
+        if not re.match(LEMMA_PATTERN, v):
+            msg = f"Invalid lemma format: {v!r}"
             raise ValueError(msg)
         return v
 
diff --git a/src/glazing/types.py b/src/glazing/types.py
index c041ecb..5ad3e00 100644
--- a/src/glazing/types.py
+++ b/src/glazing/types.py
@@ -161,7 +161,9 @@
 VERBNET_KEY_PATTERN = r"^[a-z_-]+#\d+$"  # e.g., "give#2"
 
 # Name validation patterns
-LEMMA_PATTERN = r"^[a-z][a-z0-9_\'-]*$"  # Word lemmas
+LEMMA_PATTERN = (
+    r"^[a-zA-Z0-9][a-zA-Z0-9_\'\-\.\/]*$"  # Word lemmas (incl. proper nouns, abbreviations)
+)
 
 # Color validation for FrameNet
 HEX_COLOR_PATTERN = r"^#?[0-9A-Fa-f]{6}$"  # 6-digit hex color with optional # prefix
diff --git a/src/glazing/wordnet/models.py b/src/glazing/wordnet/models.py
index 2336d3a..61fba80 100644
--- a/src/glazing/wordnet/models.py
+++ b/src/glazing/wordnet/models.py
@@ -106,7 +106,7 @@ def validate_lemma(cls, v: str) -> str:
             If lemma format is invalid.
         """
         if not re.match(LEMMA_PATTERN, v):
-            msg = f"Invalid lemma format: {v}"
+            msg = f"Invalid lemma format: {v!r}"
             raise ValueError(msg)
         return v
 
diff --git a/tests/test_base.py b/tests/test_base.py
index 9ee24a1..486ec46 100644
--- a/tests/test_base.py
+++ b/tests/test_base.py
@@ -385,11 +385,14 @@ def test_validate_lemma(self):
         assert validate_lemma("abandon") == "abandon"
         assert validate_lemma("spray_paint") == "spray_paint"
         assert validate_lemma("don't") == "don't"
+        assert validate_lemma("Abandon") == "Abandon"  # Uppercase allowed
+        assert validate_lemma("123abandon") == "123abandon"  # Digit start allowed
+        assert validate_lemma("Dr.") == "Dr."  # Dots allowed
 
         with pytest.raises(ValueError):
-            validate_lemma("Abandon")  # Uppercase
+            validate_lemma("")  # Empty
         with pytest.raises(ValueError):
-            validate_lemma("123abandon")  # Starts with number
+            validate_lemma(" abandon")  # Leading space
 
     def test_validate_hex_color(self):
         """Test hex color validation."""
diff --git a/tests/test_references/test_models.py b/tests/test_references/test_models.py
index 399639e..eeac360 100644
--- a/tests/test_references/test_models.py
+++ b/tests/test_references/test_models.py
@@ -455,7 +455,7 @@ def test_invalid_lemma_format(self):
         """Test invalid lemma format."""
         with pytest.raises(ValidationError) as exc_info:
             UnifiedLemma(
-                lemma="Give",  # Capital letter invalid
+                lemma=" give",  # Leading space invalid
                 pos="v",
                 framenet_lus=[],
                 propbank_rolesets=[],
diff --git a/tests/test_types.py b/tests/test_types.py
index e573e58..c275e63 100644
--- a/tests/test_types.py
+++ b/tests/test_types.py
@@ -318,11 +318,13 @@ def test_lemma_pattern(self):
         assert pattern.match("run_up")
         assert pattern.match("don't")
         assert pattern.match("mother-in-law")
+        assert pattern.match("Give")  # Capital letter allowed
+        assert pattern.match("123run")  # Digit start allowed
+        assert pattern.match("Dr.")  # Dots allowed
 
         # Invalid lemmas
-        assert not pattern.match("Give")  # Capital letter
-        assert not pattern.match("123run")  # Number start
         assert not pattern.match("")  # Empty string
+        assert not pattern.match(" give")  # Leading space
 
     def test_hex_color_pattern(self):
         """Test hex color pattern."""
diff --git a/tests/test_wordnet/test_models.py b/tests/test_wordnet/test_models.py
index 341a271..fe9998e 100644
--- a/tests/test_wordnet/test_models.py
+++ b/tests/test_wordnet/test_models.py
@@ -34,12 +34,15 @@ def test_word_lemma_validation(self):
         Word(lemma="dog", lex_id=0)
         Word(lemma="run_up", lex_id=1)
         Word(lemma="mother-in-law", lex_id=0)
+        Word(lemma="Dog", lex_id=0)  # Uppercase allowed (proper nouns)
+        Word(lemma="Dr.", lex_id=0)  # Dots allowed (abbreviations)
+        Word(lemma="123dog", lex_id=0)  # Digit start allowed
 
         # Invalid lemmas
         with pytest.raises(ValidationError):
-            Word(lemma="Dog", lex_id=0)  # Capital letter
+            Word(lemma="", lex_id=0)  # Empty
         with pytest.raises(ValidationError):
-            Word(lemma="123dog", lex_id=0)  # Number start
+            Word(lemma=" dog", lex_id=0)  # Leading space
 
     def test_word_lex_id_validation(self):
         """Test lex_id range validation."""

From 34dc404ad91d3f6fbe25683f6710ca3c75174177 Mon Sep 17 00:00:00 2001
From: Aaron Steven White <aaronstevenwhite@gmail.com>
Date: Fri, 6 Feb 2026 13:06:27 -0500
Subject: [PATCH 08/11] Adds pypi workflow.

---
 .github/workflows/publish.yml | 52 +++++++++++++++++++++++++++++++++++
 1 file changed, 52 insertions(+)
 create mode 100644 .github/workflows/publish.yml

diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
new file mode 100644
index 0000000..9851ad1
--- /dev/null
+++ b/.github/workflows/publish.yml
@@ -0,0 +1,52 @@
+name: Publish to PyPI
+
+on:
+  push:
+    tags:
+      - "v*"
+
+env:
+  PYTHON_VERSION: "3.13"
+
+jobs:
+  build:
+    name: Build Distribution
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ env.PYTHON_VERSION }}
+
+      - name: Install build dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install build
+
+      - name: Build distribution
+        run: python -m build
+
+      - name: Upload artifacts
+        uses: actions/upload-artifact@v4
+        with:
+          name: dist
+          path: dist/
+
+  publish:
+    name: Publish to PyPI
+    needs: build
+    runs-on: ubuntu-latest
+    environment: pypi
+    permissions:
+      id-token: write
+    steps:
+      - name: Download artifacts
+        uses: actions/download-artifact@v4
+        with:
+          name: dist
+          path: dist/
+
+      - name: Publish to PyPI
+        uses: pypa/gh-action-pypi-publish@release/v1

From 8596c9916ced0d0dcf49c54faecb5b823aaf901d Mon Sep 17 00:00:00 2001
From: Aaron Steven White <aaronstevenwhite@gmail.com>
Date: Fri, 6 Feb 2026 15:31:57 -0500
Subject: [PATCH 09/11] Bumps version and changes repo location.

---
 CHANGELOG.md               | 38 +++++++++++++++++++++++++++-----------
 README.md                  | 36 ++++++++++++++++++------------------
 docs/api/index.md          |  2 +-
 docs/citation.md           | 12 ++++++------
 docs/contributing.md       |  4 ++--
 docs/index.md              | 16 ++++++++--------
 docs/installation.md       |  4 ++--
 mkdocs.yml                 |  6 +++---
 pyproject.toml             |  8 ++++----
 src/glazing/__version__.py |  2 +-
 10 files changed, 72 insertions(+), 56 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 05c574a..b6e3a33 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,16 +7,31 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 
+## [0.2.2] - 2026-02-06
+
+### Added
+
+- **PyPI publish workflow** triggered on tag creation using trusted publishers (OIDC)
+- **Converter-to-loader round-trip integration tests** for all four resources
+- **FrameNet frame relation, LU enrichment, semantic type, and fulltext parsing** in converter and loader
+- **Supplementary data conversion** for WordNet senses/exceptions and FrameNet semantic types/fulltext in `initialize.py`
+
+### Changed
+
+- **Rewrote WordNet converter and loader** to use enriched single-file JSONL output with supplementary sense and exception files
+- **Relaxed lemma validation** to allow uppercase letters, digits at the start, and dots (supporting proper nouns, abbreviations, and numeric prefixes)
+- **Moved repository** from `aaronstevenwhite/glazing` to `factslab/glazing`
+
+### Fixed
+
+- **VerbNet converter** now populates `framenet_mappings` and `propbank_mappings` from member attributes
+- **PropBank converter** now handles AMR-UMR-91 roleset conversion and XML edge cases
+
 ## [0.2.1] - 2025-10-28
 
 ### Fixed
 
-- **FrameNet lexical units now properly loaded during conversion**
-  - Lexical units are now parsed from `luIndex.xml` during frame conversion
-  - All frames now include their associated lexical units with complete metadata
-  - Fixes critical data completeness issue where `frame.lexical_units` was always empty
-  - Enables querying frames by lexical unit name via the frame index
-  - Approximately 13,500 lexical units now correctly associated with their frames
+- **FrameNet converter** now properly loads lexical units from `luIndex.xml` during frame conversion, fixing a critical issue where `frame.lexical_units` was always empty (~13,500 LUs now correctly associated)
 
 ## [0.2.0] - 2025-09-30
 
@@ -197,8 +212,9 @@ Initial release of `glazing`, a package containing unified data models and inter
 - `tqdm >= 4.60.0` (progress bars)
 - `rich >= 13.0.0` (CLI formatting)
 
-[Unreleased]: https://github.com/aaronstevenwhite/glazing/compare/v0.2.1...HEAD
-[0.2.1]: https://github.com/aaronstevenwhite/glazing/releases/tag/v0.2.1
-[0.2.0]: https://github.com/aaronstevenwhite/glazing/releases/tag/v0.2.0
-[0.1.1]: https://github.com/aaronstevenwhite/glazing/releases/tag/v0.1.1
-[0.1.0]: https://github.com/aaronstevenwhite/glazing/releases/tag/v0.1.0
+[Unreleased]: https://github.com/factslab/glazing/compare/v0.2.2...HEAD
+[0.2.2]: https://github.com/factslab/glazing/releases/tag/v0.2.2
+[0.2.1]: https://github.com/factslab/glazing/releases/tag/v0.2.1
+[0.2.0]: https://github.com/factslab/glazing/releases/tag/v0.2.0
+[0.1.1]: https://github.com/factslab/glazing/releases/tag/v0.1.1
+[0.1.0]: https://github.com/factslab/glazing/releases/tag/v0.1.0
diff --git a/README.md b/README.md
index 5f93af7..1f1542b 100644
--- a/README.md
+++ b/README.md
@@ -2,23 +2,23 @@
 
 [![PyPI version](https://img.shields.io/pypi/v/glazing)](https://pypi.org/project/glazing/)
 [![Python versions](https://img.shields.io/pypi/pyversions/glazing)](https://pypi.org/project/glazing/)
-[![CI](https://github.com/aaronstevenwhite/glazing/actions/workflows/ci.yml/badge.svg?branch=main)](https://github.com/aaronstevenwhite/glazing/actions/workflows/ci.yml)
+[![CI](https://github.com/factslab/glazing/actions/workflows/ci.yml/badge.svg?branch=main)](https://github.com/factslab/glazing/actions/workflows/ci.yml)
 [![Documentation](https://readthedocs.org/projects/glazing/badge/?version=latest)](https://glazing.readthedocs.io/en/latest/?badge=latest)
-[![License](https://img.shields.io/pypi/l/glazing)](https://github.com/aaronstevenwhite/glazing/blob/main/LICENSE)
+[![License](https://img.shields.io/pypi/l/glazing)](https://github.com/factslab/glazing/blob/main/LICENSE)
 [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.17467082.svg)](https://doi.org/10.5281/zenodo.17467082)
 
 Unified data models and interfaces for syntactic and semantic frame ontologies.
 
 ## Features
 
-- 🚀 **One-command setup**: `glazing init` downloads and prepares all datasets
-- 📦 **Type-safe models**: Pydantic v2 validation for all data structures
-- 🔍 **Unified search**: Query across all datasets with consistent API
-- 🔗 **Cross-references**: Automatic mapping between resources with confidence scores
-- 🎯 **Fuzzy search**: Find data with typos, spelling variants, and inconsistencies
-- 🐳 **Docker support**: Use via Docker without local installation
-- 💾 **Efficient storage**: JSON Lines format with streaming support
-- 🐍 **Modern Python**: Full type hints, Python 3.13+ support
+- **One-command setup**: `glazing init` downloads and prepares all datasets
+- **Type-safe models**: Pydantic v2 validation for all data structures
+- **Unified search**: Query across all datasets with consistent API
+- **Cross-references**: Automatic mapping between resources with confidence scores
+- **Fuzzy search**: Find data with typos, spelling variants, and inconsistencies
+- **Docker support**: Use via Docker without local installation
+- **Efficient storage**: JSON Lines format with streaming support
+- **Modern Python**: Full type hints, Python 3.13+ support
 
 ## Installation
 
@@ -34,7 +34,7 @@ Build and run Glazing in a containerized environment:
 
 ```bash
 # Build the image
-git clone https://github.com/aaronstevenwhite/glazing.git
+git clone https://github.com/factslab/glazing.git
 cd glazing
 docker build -t glazing:latest .
 
@@ -167,11 +167,11 @@ Full documentation available at [https://glazing.readthedocs.io](https://glazing
 
 ## Contributing
 
-We welcome contributions! See [CONTRIBUTING.md](https://github.com/aaronstevenwhite/glazing/blob/main/CONTRIBUTING.md) for guidelines.
+We welcome contributions! See [CONTRIBUTING.md](https://github.com/factslab/glazing/blob/main/CONTRIBUTING.md) for guidelines.
 
 ```bash
 # Development setup
-git clone https://github.com/aaronstevenwhite/glazing
+git clone https://github.com/factslab/glazing
 cd glazing
 pip install -e ".[dev]"
 ```
@@ -185,22 +185,22 @@ If you use Glazing in your research, please cite:
   author = {White, Aaron Steven},
   title = {Glazing: Unified Data Models and Interfaces for Syntactic and Semantic Frame Ontologies},
   year = {2025},
-  url = {https://github.com/aaronstevenwhite/glazing},
+  url = {https://github.com/factslab/glazing},
   doi = {10.5281/zenodo.17467082}
 }
 ```
 
 ## License
 
-This package is licensed under an MIT License. See [LICENSE](https://github.com/aaronstevenwhite/glazing/blob/main/LICENSE) file for details.
+This package is licensed under an MIT License. See [LICENSE](https://github.com/factslab/glazing/blob/main/LICENSE) file for details.
 
 ## Links
 
-- [GitHub Repository](https://github.com/aaronstevenwhite/glazing)
+- [GitHub Repository](https://github.com/factslab/glazing)
 - [PyPI Package](https://pypi.org/project/glazing/)
 - [Documentation](https://glazing.readthedocs.io)
-- [Issue Tracker](https://github.com/aaronstevenwhite/glazing/issues)
+- [Issue Tracker](https://github.com/factslab/glazing/issues)
 
 ## Acknowledgments
 
-This project was funded by a [National Science Foundation](https://www.nsf.gov/) ([BCS-2040831](https://www.nsf.gov/awardsearch/showAward?AWD_ID=2040831)) and builds upon the foundational work of the FrameNet, PropBank, VerbNet, and WordNet teams.
+This project was funded by a [National Science Foundation](https://www.nsf.gov/) ([BCS-2040831](https://www.nsf.gov/awardsearch/showAward?AWD_ID=2040831)) and builds upon the foundational work of the FrameNet, PropBank, VerbNet, and WordNet teams. It was architected and implemented with the help of Claude Code.
diff --git a/docs/api/index.md b/docs/api/index.md
index 86288bf..b42cbcd 100644
--- a/docs/api/index.md
+++ b/docs/api/index.md
@@ -118,7 +118,7 @@ except ValidationError as e:
 
 ## Version Compatibility
 
-This documentation covers Glazing version 0.2.1. Check your installed version:
+This documentation covers Glazing version 0.2.2. Check your installed version:
 
 ```python
 import glazing
diff --git a/docs/citation.md b/docs/citation.md
index 051dcfb..c05d7a1 100644
--- a/docs/citation.md
+++ b/docs/citation.md
@@ -11,23 +11,23 @@ If you use Glazing in your research, please cite our work.
   author = {White, Aaron Steven},
   title = {Glazing: Unified Data Models and Interfaces for Syntactic and Semantic Frame Ontologies},
   year = {2025},
-  url = {https://github.com/aaronstevenwhite/glazing},
-  version = {0.2.1},
+  url = {https://github.com/factslab/glazing},
+  version = {0.2.2},
   doi = {10.5281/zenodo.17467082}
 }
 ```
 
 ### APA
 
-White, A. S. (2025). *Glazing: Unified Data Models and Interfaces for Syntactic and Semantic Frame Ontologies* (Version 0.2.1) [Computer software]. https://github.com/aaronstevenwhite/glazing
+White, A. S. (2025). *Glazing: Unified Data Models and Interfaces for Syntactic and Semantic Frame Ontologies* (Version 0.2.2) [Computer software]. https://github.com/factslab/glazing
 
 ### Chicago
 
-White, Aaron Steven. 2025. *Glazing: Unified Data Models and Interfaces for Syntactic and Semantic Frame Ontologies*. Version 0.2.1. https://github.com/aaronstevenwhite/glazing.
+White, Aaron Steven. 2025. *Glazing: Unified Data Models and Interfaces for Syntactic and Semantic Frame Ontologies*. Version 0.2.2. https://github.com/factslab/glazing.
 
 ### MLA
 
-White, Aaron Steven. *Glazing: Unified Data Models and Interfaces for Syntactic and Semantic Frame Ontologies*. Version 0.2.1, 2025, https://github.com/aaronstevenwhite/glazing.
+White, Aaron Steven. *Glazing: Unified Data Models and Interfaces for Syntactic and Semantic Frame Ontologies*. Version 0.2.2, 2025, https://github.com/factslab/glazing.
 
 ## Citing Datasets
 
@@ -65,4 +65,4 @@ This project was funded by a [National Science Foundation](https://www.nsf.gov/)
 
 For questions about citing Glazing, contact:
 - Aaron Steven White: aaron.white@rochester.edu
-- GitHub Issues: https://github.com/aaronstevenwhite/glazing/issues
+- GitHub Issues: https://github.com/factslab/glazing/issues
diff --git a/docs/contributing.md b/docs/contributing.md
index 7403217..02c9b40 100644
--- a/docs/contributing.md
+++ b/docs/contributing.md
@@ -23,7 +23,7 @@ cd glazing
 3. Add the upstream repository:
 
 ```bash
-git remote add upstream https://github.com/aaronstevenwhite/glazing.git
+git remote add upstream https://github.com/factslab/glazing.git
 ```
 
 ### Development Setup
@@ -245,7 +245,7 @@ Check the issue tracker for `enhancement` labels. Feel free to discuss implement
 
 Contributors are recognized in:
 
-- The project's [CHANGELOG.md](https://github.com/aaronstevenwhite/glazing/blob/main/CHANGELOG.md)
+- The project's [CHANGELOG.md](https://github.com/factslab/glazing/blob/main/CHANGELOG.md)
 - GitHub's contributor graph
 - Special mentions for significant contributions
 
diff --git a/docs/index.md b/docs/index.md
index 51cc33f..f4a55f9 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -2,8 +2,8 @@
 
 [![PyPI version](https://img.shields.io/pypi/v/glazing)](https://pypi.org/project/glazing/)
 [![Python versions](https://img.shields.io/pypi/pyversions/glazing)](https://pypi.org/project/glazing/)
-[![License](https://img.shields.io/pypi/l/glazing)](https://github.com/aaronstevenwhite/glazing/blob/main/LICENSE)
-[![CI](https://github.com/aaronstevenwhite/glazing/actions/workflows/ci.yml/badge.svg?branch=main)](https://github.com/aaronstevenwhite/glazing/actions/workflows/ci.yml)
+[![License](https://img.shields.io/pypi/l/glazing)](https://github.com/factslab/glazing/blob/main/LICENSE)
+[![CI](https://github.com/factslab/glazing/actions/workflows/ci.yml/badge.svg?branch=main)](https://github.com/factslab/glazing/actions/workflows/ci.yml)
 [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.17467082.svg)](https://doi.org/10.5281/zenodo.17467082)
 
 Unified data models and interfaces for syntactic and semantic frame ontologies.
@@ -74,14 +74,14 @@ Glazing is actively maintained and welcomes contributions. The project follows s
 
 ## Links
 
-- [GitHub Repository](https://github.com/aaronstevenwhite/glazing)
+- [GitHub Repository](https://github.com/factslab/glazing)
 - [PyPI Package](https://pypi.org/project/glazing/)
-- [Issue Tracker](https://github.com/aaronstevenwhite/glazing/issues)
-- [Changelog](https://github.com/aaronstevenwhite/glazing/blob/main/CHANGELOG.md)
+- [Issue Tracker](https://github.com/factslab/glazing/issues)
+- [Changelog](https://github.com/factslab/glazing/blob/main/CHANGELOG.md)
 
 ## License
 
-This package is licensed under an MIT License. See [LICENSE](https://github.com/aaronstevenwhite/glazing/blob/main/LICENSE) file for details.
+This package is licensed under an MIT License. See [LICENSE](https://github.com/factslab/glazing/blob/main/LICENSE) file for details.
 
 ## Citation
 
@@ -92,8 +92,8 @@ If you use Glazing in your research, please cite:
   author = {White, Aaron Steven},
   title = {Glazing: Unified Data Models and Interfaces for Syntactic and Semantic Frame Ontologies},
   year = {2025},
-  url = {https://github.com/aaronstevenwhite/glazing},
-  version = {0.2.1},
+  url = {https://github.com/factslab/glazing},
+  version = {0.2.2},
   doi = {10.5281/zenodo.17467082}
 }
 ```
diff --git a/docs/installation.md b/docs/installation.md
index bd6a53f..5e0a898 100644
--- a/docs/installation.md
+++ b/docs/installation.md
@@ -22,7 +22,7 @@ pip install glazing
 To install the latest development version:
 
 ```bash
-git clone https://github.com/aaronstevenwhite/glazing.git
+git clone https://github.com/factslab/glazing.git
 cd glazing
 pip install -e .
 ```
@@ -149,7 +149,7 @@ Glazing provides a Docker image for containerized usage, allowing you to use the
 Clone the repository and build the image:
 
 ```bash
-git clone https://github.com/aaronstevenwhite/glazing.git
+git clone https://github.com/factslab/glazing.git
 cd glazing
 docker build -t glazing:latest .
 ```
diff --git a/mkdocs.yml b/mkdocs.yml
index d9f3d2f..ce2d0b8 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -2,8 +2,8 @@ site_name: Glazing Documentation
 site_description: Unified data models and interfaces for syntactic and semantic frame ontologies
 site_author: Aaron Steven White
 site_url: https://glazing.readthedocs.io
-repo_name: aaronstevenwhite/glazing
-repo_url: https://github.com/aaronstevenwhite/glazing
+repo_name: factslab/glazing
+repo_url: https://github.com/factslab/glazing
 edit_uri: edit/main/docs/
 
 theme:
@@ -127,7 +127,7 @@ markdown_extensions:
 extra:
   social:
     - icon: fontawesome/brands/github
-      link: https://github.com/aaronstevenwhite
+      link: https://github.com/factslab
     - icon: fontawesome/brands/python
       link: https://pypi.org/project/glazing/
   version:
diff --git a/pyproject.toml b/pyproject.toml
index 4af2bf1..d8411ff 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "glazing"
-version = "0.2.1"
+version = "0.2.2"
 description = "Unified data models and interfaces for syntactic and semantic frame ontologies"
 readme = "README.md"
 requires-python = ">=3.13"
@@ -60,9 +60,9 @@ docs = [
 glazing = "glazing.cli:cli"
 
 [project.urls]
-"Homepage" = "https://github.com/aaronstevenwhite/glazing"
-"Bug Reports" = "https://github.com/aaronstevenwhite/glazing/issues"
-"Source" = "https://github.com/aaronstevenwhite/glazing"
+"Homepage" = "https://github.com/factslab/glazing"
+"Bug Reports" = "https://github.com/factslab/glazing/issues"
+"Source" = "https://github.com/factslab/glazing"
 "Documentation" = "https://glazing.readthedocs.io"
 
 [tool.setuptools.packages.find]
diff --git a/src/glazing/__version__.py b/src/glazing/__version__.py
index 7aa23f1..051b025 100644
--- a/src/glazing/__version__.py
+++ b/src/glazing/__version__.py
@@ -1,4 +1,4 @@
 """Version information for the glazing package."""
 
-__version__ = "0.2.1"
+__version__ = "0.2.2"
 __version_info__ = tuple(int(i) for i in __version__.split("."))

From 630147e59ffdced37cb808a303c0b32e3d6ca78b Mon Sep 17 00:00:00 2001
From: Aaron Steven White <aaronstevenwhite@gmail.com>
Date: Fri, 6 Feb 2026 15:41:52 -0500
Subject: [PATCH 10/11] Adds PR and issue templates.

---
 .github/ISSUE_TEMPLATE/bug_report.md      | 36 +++++++++++++++++++++++
 .github/ISSUE_TEMPLATE/feature_request.md | 27 +++++++++++++++++
 .github/PULL_REQUEST_TEMPLATE.md          | 30 +++++++++++++++++++
 3 files changed, 93 insertions(+)
 create mode 100644 .github/ISSUE_TEMPLATE/bug_report.md
 create mode 100644 .github/ISSUE_TEMPLATE/feature_request.md
 create mode 100644 .github/PULL_REQUEST_TEMPLATE.md

diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md
new file mode 100644
index 0000000..ef79b37
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/bug_report.md
@@ -0,0 +1,36 @@
+---
+name: Bug Report
+about: Report a bug to help improve Glazing
+title: ""
+labels: bug
+assignees: ""
+---
+
+## Description
+
+<!-- A clear description of the bug -->
+
+## Steps to reproduce
+
+1.
+2.
+3.
+
+## Expected behavior
+
+<!-- What you expected to happen -->
+
+## Actual behavior
+
+<!-- What actually happened. Include full error messages or tracebacks if applicable -->
+
+## Environment
+
+- Glazing version: <!-- e.g. 0.2.2 -->
+- Python version: <!-- e.g. 3.13.1 -->
+- OS: <!-- e.g. macOS 15.2, Ubuntu 24.04 -->
+- Installation method: <!-- pip, source, docker -->
+
+## Additional context
+
+<!-- Any other relevant information, screenshots, or logs -->
diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md
new file mode 100644
index 0000000..bde95eb
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/feature_request.md
@@ -0,0 +1,27 @@
+---
+name: Feature Request
+about: Suggest a new feature or improvement
+title: ""
+labels: enhancement
+assignees: ""
+---
+
+## Description
+
+<!-- A clear description of the feature you'd like -->
+
+## Motivation
+
+<!-- Why is this feature needed? What problem does it solve? -->
+
+## Proposed solution
+
+<!-- How you think this could be implemented -->
+
+## Alternatives considered
+
+<!-- Any alternative approaches you've considered -->
+
+## Additional context
+
+<!-- Any other relevant information, mockups, or examples -->
diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
new file mode 100644
index 0000000..0d487a8
--- /dev/null
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@@ -0,0 +1,30 @@
+## Description
+
+<!-- Brief description of what this PR does -->
+
+## Type of Change
+
+- [ ] Bug fix (non-breaking change which fixes an issue)
+- [ ] New feature (non-breaking change which adds functionality)
+- [ ] Breaking change (fix or feature that would cause existing functionality to not work as expected)
+- [ ] Documentation update
+
+## Key Changes
+
+<!-- Bullet list of key changes -->
+
+-
+
+## Impact
+
+<!-- What effect does this change have? Link to related issue(s) if applicable -->
+
+Closes #
+
+## Testing
+
+- [ ] All tests pass (`pytest`)
+- [ ] New tests added (if applicable)
+- [ ] Type checking passes (`mypy --strict src/`)
+- [ ] Linting passes (`ruff check`)
+- [ ] Formatting passes (`ruff format`)

From 32819d5a043877d9936b810d7782100097e7b8bb Mon Sep 17 00:00:00 2001
From: Aaron Steven White <aaronstevenwhite@gmail.com>
Date: Fri, 6 Feb 2026 15:52:09 -0500
Subject: [PATCH 11/11] Fixes ruff PLW0108 and PLC0207 lint errors.

---
 src/glazing/cli/search.py        | 20 ++++++++++----------
 src/glazing/references/mapper.py |  4 ++--
 src/glazing/search.py            |  4 ++--
 3 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/src/glazing/cli/search.py b/src/glazing/cli/search.py
index 96244ac..a178065 100644
--- a/src/glazing/cli/search.py
+++ b/src/glazing/cli/search.py
@@ -175,7 +175,7 @@ def search() -> None:
 @click.option(
     "--data-dir",
     type=click.Path(exists=True, file_okay=False, dir_okay=True),
-    default=lambda: get_default_data_path(),
+    default=get_default_data_path,
     help="Directory containing converted JSON Lines files "
     "(default: ~/.local/share/glazing/converted).",
 )
@@ -318,7 +318,7 @@ def search_query(  # noqa: PLR0913
 @click.option(
     "--data-dir",
     type=click.Path(exists=True, file_okay=False, dir_okay=True),
-    default=lambda: get_default_data_path(),
+    default=get_default_data_path,
     help="Directory containing converted JSON Lines files "
     "(default: ~/.local/share/glazing/converted).",
 )
@@ -371,7 +371,7 @@ def get_entity(
 @click.option(
     "--data-dir",
     type=click.Path(exists=True, file_okay=False, dir_okay=True),
-    default=lambda: get_default_data_path(),
+    default=get_default_data_path,
     help="Directory containing converted JSON Lines files "
     "(default: ~/.local/share/glazing/converted).",
 )
@@ -456,7 +456,7 @@ def search_role(
 @click.option(
     "--data-dir",
     type=click.Path(exists=True, file_okay=False, dir_okay=True),
-    default=lambda: get_default_data_path(),
+    default=get_default_data_path,
     help="Directory containing converted JSON Lines files "
     "(default: ~/.local/share/glazing/converted).",
 )
@@ -518,7 +518,7 @@ def find_cross_ref(
 @click.option(
     "--data-dir",
     type=click.Path(exists=True, file_okay=False, dir_okay=True),
-    default=lambda: get_default_data_path(),
+    default=get_default_data_path,
     help="Directory containing converted JSON Lines files.",
 )
 @click.option(
@@ -579,7 +579,7 @@ def search_fuzzy(
 @click.option(
     "--data-dir",
     type=click.Path(exists=True, file_okay=False, dir_okay=True),
-    default=lambda: get_default_data_path(),
+    default=get_default_data_path,
     help="Directory containing converted JSON Lines files.",
 )
 @click.option("--optional", is_flag=True, help="Find optional roles.")
@@ -643,7 +643,7 @@ def search_roles(
 @click.option(
     "--data-dir",
     type=click.Path(exists=True, file_okay=False, dir_okay=True),
-    default=lambda: get_default_data_path(),
+    default=get_default_data_path,
     help="Directory containing converted JSON Lines files.",
 )
 @click.option(
@@ -722,7 +722,7 @@ def search_args(  # noqa: PLR0913
 @click.option(
     "--data-dir",
     type=click.Path(exists=True, file_okay=False, dir_okay=True),
-    default=lambda: get_default_data_path(),
+    default=get_default_data_path,
     help="Directory containing converted JSON Lines files.",
 )
 @click.option(
@@ -786,7 +786,7 @@ def search_relations(
 @click.option(
     "--data-dir",
     type=click.Path(exists=True, file_okay=False, dir_okay=True),
-    default=lambda: get_default_data_path(),
+    default=get_default_data_path,
     help="Directory containing converted JSON Lines files.",
 )
 @click.option(
@@ -874,7 +874,7 @@ def search_syntax(
 @click.option(
     "--data-dir",
     type=click.Path(exists=True, file_okay=False, dir_okay=True),
-    default=lambda: get_default_data_path(),
+    default=get_default_data_path,
     help="Directory containing converted JSON Lines files.",
 )
 @click.option(
diff --git a/src/glazing/references/mapper.py b/src/glazing/references/mapper.py
index 10e1b95..b97ef33 100644
--- a/src/glazing/references/mapper.py
+++ b/src/glazing/references/mapper.py
@@ -919,8 +919,8 @@ def _generate_roleset_name(self, roleset_id: str) -> str:
             Descriptive name.
         """
         if "." in roleset_id:
-            lemma_part = roleset_id.split(".")[0]
-            return f"{lemma_part} (sense {roleset_id.split('.')[-1]})"
+            lemma_part, sense_part = roleset_id.split(".", maxsplit=1)
+            return f"{lemma_part} (sense {sense_part})"
         return f"Roleset {roleset_id}"
 
     def _build_verbnet_member_refs(
diff --git a/src/glazing/search.py b/src/glazing/search.py
index 75ee365..5f4e849 100644
--- a/src/glazing/search.py
+++ b/src/glazing/search.py
@@ -876,7 +876,7 @@ def _propbank_to_verbnet_refs(self, entity_id: str) -> list[dict[str, str | floa
         if not self.propbank:
             return references
 
-        pb_frameset = self.propbank.by_lemma(entity_id.split(".")[0])
+        pb_frameset = self.propbank.by_lemma(entity_id.split(".", maxsplit=1)[0])
         if not pb_frameset:
             return references
 
@@ -1051,7 +1051,7 @@ def _propbank_to_framenet_refs(self, entity_id: str) -> list[dict[str, str | flo
         if not self.propbank:
             return references
 
-        pb_frameset = self.propbank.by_lemma(entity_id.split(".")[0])
+        pb_frameset = self.propbank.by_lemma(entity_id.split(".", maxsplit=1)[0])
         if not pb_frameset:
             return references