|
| 1 | +""" |
| 2 | +Unit tests for schema format I/O roundtrip validation. |
| 3 | +
|
| 4 | +Tests that schemas can be loaded, saved in all 4 formats (XML, MediaWiki, TSV, JSON), |
| 5 | +and reloaded with perfect fidelity for both standard and library schemas. |
| 6 | +""" |
| 7 | + |
| 8 | +import unittest |
| 9 | +import os |
| 10 | +import tempfile |
| 11 | +import shutil |
| 12 | +from hed.schema import load_schema_version, load_schema |
| 13 | + |
| 14 | + |
| 15 | +class TestSchemaFormatRoundtrip(unittest.TestCase): |
| 16 | + """Test that all 4 schema formats (XML, MediaWiki, TSV, JSON) roundtrip correctly.""" |
| 17 | + |
| 18 | + @classmethod |
| 19 | + def setUpClass(cls): |
| 20 | + """Create temporary directory for test files.""" |
| 21 | + cls.temp_dir = tempfile.mkdtemp(prefix="hed_schema_test_") |
| 22 | + |
| 23 | + @classmethod |
| 24 | + def tearDownClass(cls): |
| 25 | + """Clean up temporary directory.""" |
| 26 | + if os.path.exists(cls.temp_dir): |
| 27 | + shutil.rmtree(cls.temp_dir) |
| 28 | + |
| 29 | + def _test_format_roundtrip(self, schema, schema_name, save_merged=True): |
| 30 | + """ |
| 31 | + Test that a schema can be saved and reloaded in all 4 formats with perfect fidelity. |
| 32 | +
|
| 33 | + Parameters: |
| 34 | + schema: The HedSchema to test |
| 35 | + schema_name: Base name for saved files |
| 36 | + save_merged: Whether to save in merged format (for library schemas) |
| 37 | + """ |
| 38 | + # Create file paths for all formats |
| 39 | + basename = f"{schema_name}{'_merged' if save_merged else '_unmerged'}" |
| 40 | + xml_path = os.path.join(self.temp_dir, f"{basename}.xml") |
| 41 | + mediawiki_path = os.path.join(self.temp_dir, f"{basename}.mediawiki") |
| 42 | + tsv_dir = os.path.join(self.temp_dir, "tsv", basename) |
| 43 | + json_path = os.path.join(self.temp_dir, f"{basename}.json") |
| 44 | + |
| 45 | + # Save schema in all 4 formats |
| 46 | + schema.save_as_xml(xml_path, save_merged=save_merged) |
| 47 | + schema.save_as_mediawiki(mediawiki_path, save_merged=save_merged) |
| 48 | + schema.save_as_dataframes(tsv_dir, save_merged=save_merged) |
| 49 | + schema.save_as_json(json_path, save_merged=save_merged) |
| 50 | + |
| 51 | + # Verify files were created |
| 52 | + self.assertTrue(os.path.exists(xml_path), f"XML file not created: {xml_path}") |
| 53 | + self.assertTrue(os.path.exists(mediawiki_path), f"MediaWiki file not created: {mediawiki_path}") |
| 54 | + self.assertTrue(os.path.exists(json_path), f"JSON file not created: {json_path}") |
| 55 | + tsv_tag_file = os.path.join(tsv_dir, f"{basename}_Tag.tsv") |
| 56 | + self.assertTrue(os.path.exists(tsv_tag_file), f"TSV Tag file not created: {tsv_tag_file}") |
| 57 | + |
| 58 | + # Load schemas from all formats |
| 59 | + schema_from_xml = load_schema(xml_path) |
| 60 | + schema_from_mediawiki = load_schema(mediawiki_path) |
| 61 | + schema_from_tsv = load_schema(tsv_dir) |
| 62 | + schema_from_json = load_schema(json_path) |
| 63 | + |
| 64 | + # Compare all schemas to original |
| 65 | + self.assertEqual(schema, schema_from_xml, f"XML roundtrip failed for {schema_name} (save_merged={save_merged})") |
| 66 | + self.assertEqual( |
| 67 | + schema, schema_from_mediawiki, f"MediaWiki roundtrip failed for {schema_name} (save_merged={save_merged})" |
| 68 | + ) |
| 69 | + self.assertEqual(schema, schema_from_tsv, f"TSV roundtrip failed for {schema_name} (save_merged={save_merged})") |
| 70 | + self.assertEqual(schema, schema_from_json, f"JSON roundtrip failed for {schema_name} (save_merged={save_merged})") |
| 71 | + |
| 72 | + # Compare all formats to each other |
| 73 | + self.assertEqual( |
| 74 | + schema_from_xml, schema_from_mediawiki, f"XML vs MediaWiki mismatch for {schema_name} (save_merged={save_merged})" |
| 75 | + ) |
| 76 | + self.assertEqual( |
| 77 | + schema_from_xml, schema_from_tsv, f"XML vs TSV mismatch for {schema_name} (save_merged={save_merged})" |
| 78 | + ) |
| 79 | + self.assertEqual( |
| 80 | + schema_from_xml, schema_from_json, f"XML vs JSON mismatch for {schema_name} (save_merged={save_merged})" |
| 81 | + ) |
| 82 | + |
| 83 | + def test_standard_schema_8_4_0(self): |
| 84 | + """Test HED 8.4.0 standard schema roundtrip in all formats.""" |
| 85 | + schema = load_schema_version("8.4.0") |
| 86 | + self.assertIsNotNone(schema, "Failed to load HED 8.4.0 schema") |
| 87 | + self.assertEqual(schema.version_number, "8.4.0") |
| 88 | + self.assertEqual(schema.library, "") |
| 89 | + |
| 90 | + # Test with standard schema (save_merged not applicable but should work) |
| 91 | + self._test_format_roundtrip(schema, "HED8.4.0", save_merged=True) |
| 92 | + |
| 93 | + def test_library_schema_lang_merged(self): |
| 94 | + """Test lang 1.1.0 library schema roundtrip in merged format.""" |
| 95 | + schema = load_schema_version("lang_1.1.0") |
| 96 | + self.assertIsNotNone(schema, "Failed to load lang 1.1.0 schema") |
| 97 | + self.assertEqual(schema.library, "lang") |
| 98 | + self.assertEqual(schema.version_number, "1.1.0") |
| 99 | + self.assertEqual(schema.with_standard, "8.4.0") |
| 100 | + |
| 101 | + # Test merged format (includes standard schema tags) |
| 102 | + self._test_format_roundtrip(schema, "lang_1.1.0", save_merged=True) |
| 103 | + |
| 104 | + def test_library_schema_lang_unmerged(self): |
| 105 | + """Test lang 1.1.0 library schema roundtrip in unmerged format.""" |
| 106 | + schema = load_schema_version("lang_1.1.0") |
| 107 | + self.assertIsNotNone(schema, "Failed to load lang 1.1.0 schema") |
| 108 | + |
| 109 | + # Test unmerged format (library tags only) |
| 110 | + self._test_format_roundtrip(schema, "lang_1.1.0", save_merged=False) |
| 111 | + |
| 112 | + def test_format_compatibility_standard(self): |
| 113 | + """Test that all formats produce identical schemas for standard schema.""" |
| 114 | + schema = load_schema_version("8.4.0") |
| 115 | + |
| 116 | + # Save in all formats |
| 117 | + xml_path = os.path.join(self.temp_dir, "compat_std.xml") |
| 118 | + mediawiki_path = os.path.join(self.temp_dir, "compat_std.mediawiki") |
| 119 | + tsv_dir = os.path.join(self.temp_dir, "tsv", "compat_std") |
| 120 | + json_path = os.path.join(self.temp_dir, "compat_std.json") |
| 121 | + |
| 122 | + schema.save_as_xml(xml_path) |
| 123 | + schema.save_as_mediawiki(mediawiki_path) |
| 124 | + schema.save_as_dataframes(tsv_dir) |
| 125 | + schema.save_as_json(json_path) |
| 126 | + |
| 127 | + # Load all formats |
| 128 | + schemas = { |
| 129 | + "XML": load_schema(xml_path), |
| 130 | + "MediaWiki": load_schema(mediawiki_path), |
| 131 | + "TSV": load_schema(tsv_dir), |
| 132 | + "JSON": load_schema(json_path), |
| 133 | + } |
| 134 | + |
| 135 | + # All formats should be equal to each other |
| 136 | + format_names = list(schemas.keys()) |
| 137 | + for i, format1 in enumerate(format_names): |
| 138 | + for format2 in format_names[i + 1 :]: |
| 139 | + self.assertEqual( |
| 140 | + schemas[format1], |
| 141 | + schemas[format2], |
| 142 | + f"{format1} and {format2} formats produced different schemas for standard schema", |
| 143 | + ) |
| 144 | + |
| 145 | + def test_format_compatibility_library_merged(self): |
| 146 | + """Test that all formats produce identical schemas for library schema (merged).""" |
| 147 | + schema = load_schema_version("lang_1.1.0") |
| 148 | + |
| 149 | + # Save in all formats (merged) |
| 150 | + xml_path = os.path.join(self.temp_dir, "compat_lib_merged.xml") |
| 151 | + mediawiki_path = os.path.join(self.temp_dir, "compat_lib_merged.mediawiki") |
| 152 | + tsv_dir = os.path.join(self.temp_dir, "tsv", "compat_lib_merged") |
| 153 | + json_path = os.path.join(self.temp_dir, "compat_lib_merged.json") |
| 154 | + |
| 155 | + schema.save_as_xml(xml_path, save_merged=True) |
| 156 | + schema.save_as_mediawiki(mediawiki_path, save_merged=True) |
| 157 | + schema.save_as_dataframes(tsv_dir, save_merged=True) |
| 158 | + schema.save_as_json(json_path, save_merged=True) |
| 159 | + |
| 160 | + # Load all formats |
| 161 | + schemas = { |
| 162 | + "XML": load_schema(xml_path), |
| 163 | + "MediaWiki": load_schema(mediawiki_path), |
| 164 | + "TSV": load_schema(tsv_dir), |
| 165 | + "JSON": load_schema(json_path), |
| 166 | + } |
| 167 | + |
| 168 | + # All formats should be equal to each other |
| 169 | + format_names = list(schemas.keys()) |
| 170 | + for i, format1 in enumerate(format_names): |
| 171 | + for format2 in format_names[i + 1 :]: |
| 172 | + self.assertEqual( |
| 173 | + schemas[format1], |
| 174 | + schemas[format2], |
| 175 | + f"{format1} and {format2} formats produced different schemas for library (merged)", |
| 176 | + ) |
| 177 | + |
| 178 | + def test_format_compatibility_library_unmerged(self): |
| 179 | + """Test that all formats produce identical schemas for library schema (unmerged).""" |
| 180 | + schema = load_schema_version("lang_1.1.0") |
| 181 | + |
| 182 | + # Save in all formats (unmerged) |
| 183 | + xml_path = os.path.join(self.temp_dir, "compat_lib_unmerged.xml") |
| 184 | + mediawiki_path = os.path.join(self.temp_dir, "compat_lib_unmerged.mediawiki") |
| 185 | + tsv_dir = os.path.join(self.temp_dir, "tsv", "compat_lib_unmerged") |
| 186 | + json_path = os.path.join(self.temp_dir, "compat_lib_unmerged.json") |
| 187 | + |
| 188 | + schema.save_as_xml(xml_path, save_merged=False) |
| 189 | + schema.save_as_mediawiki(mediawiki_path, save_merged=False) |
| 190 | + schema.save_as_dataframes(tsv_dir, save_merged=False) |
| 191 | + schema.save_as_json(json_path, save_merged=False) |
| 192 | + |
| 193 | + # Load all formats |
| 194 | + schemas = { |
| 195 | + "XML": load_schema(xml_path), |
| 196 | + "MediaWiki": load_schema(mediawiki_path), |
| 197 | + "TSV": load_schema(tsv_dir), |
| 198 | + "JSON": load_schema(json_path), |
| 199 | + } |
| 200 | + |
| 201 | + # All formats should be equal to each other |
| 202 | + format_names = list(schemas.keys()) |
| 203 | + for i, format1 in enumerate(format_names): |
| 204 | + for format2 in format_names[i + 1 :]: |
| 205 | + self.assertEqual( |
| 206 | + schemas[format1], |
| 207 | + schemas[format2], |
| 208 | + f"{format1} and {format2} formats produced different schemas for library (unmerged)", |
| 209 | + ) |
| 210 | + |
| 211 | + def test_json_specific_features(self): |
| 212 | + """Test JSON-specific features like multi-value attributes and boolean preservation.""" |
| 213 | + schema = load_schema_version("lang_1.1.0") |
| 214 | + |
| 215 | + # Save as JSON |
| 216 | + json_path = os.path.join(self.temp_dir, "json_features.json") |
| 217 | + schema.save_as_json(json_path, save_merged=True) |
| 218 | + |
| 219 | + # Reload and verify |
| 220 | + schema_from_json = load_schema(json_path) |
| 221 | + self.assertEqual(schema, schema_from_json, "JSON roundtrip failed") |
| 222 | + |
| 223 | + # Verify specific tags with multi-value attributes exist |
| 224 | + # (These are in the lang schema and have annotation attributes) |
| 225 | + if "Language-item-property" in schema.tags: |
| 226 | + original_tag = schema.tags["Language-item-property"] |
| 227 | + reloaded_tag = schema_from_json.tags["Language-item-property"] |
| 228 | + |
| 229 | + # Verify attributes match |
| 230 | + self.assertEqual( |
| 231 | + original_tag.attributes, reloaded_tag.attributes, "Tag attributes don't match after JSON roundtrip" |
| 232 | + ) |
| 233 | + |
| 234 | + def test_library_schema_header_attributes(self): |
| 235 | + """Test that library schema header attributes are preserved correctly.""" |
| 236 | + schema = load_schema_version("lang_1.1.0") |
| 237 | + |
| 238 | + # Test merged |
| 239 | + json_merged = os.path.join(self.temp_dir, "header_merged.json") |
| 240 | + schema.save_as_json(json_merged, save_merged=True) |
| 241 | + schema_merged = load_schema(json_merged) |
| 242 | + |
| 243 | + self.assertEqual(schema.library, schema_merged.library) |
| 244 | + self.assertEqual(schema.version_number, schema_merged.version_number) |
| 245 | + self.assertEqual(schema.with_standard, schema_merged.with_standard) |
| 246 | + |
| 247 | + # Test unmerged |
| 248 | + json_unmerged = os.path.join(self.temp_dir, "header_unmerged.json") |
| 249 | + schema.save_as_json(json_unmerged, save_merged=False) |
| 250 | + schema_unmerged = load_schema(json_unmerged) |
| 251 | + |
| 252 | + self.assertEqual(schema.library, schema_unmerged.library) |
| 253 | + # Version number might be different for unmerged (without library prefix) |
| 254 | + self.assertEqual(schema.with_standard, schema_unmerged.with_standard) |
| 255 | + |
| 256 | + |
| 257 | +if __name__ == "__main__": |
| 258 | + unittest.main() |
0 commit comments