Patterns for ensuring prior checkpoint functionality continues to work.
Regression testing ensures that:
- Features from prior checkpoints still work
- New changes don't break existing functionality
- Tests are reused efficiently across checkpoints
By default, prior checkpoint tests become regression tests.
# config.yaml
checkpoints:
checkpoint_1:
version: 1
order: 1
state: Core Tests
checkpoint_2:
version: 1
order: 2
state: Core Tests
include_prior_tests: true # Default: true
checkpoint_3:
version: 1
order: 3
state: Core Tests
include_prior_tests: trueWhen evaluating checkpoint_2:
test_checkpoint_1.py→ REGRESSION teststest_checkpoint_2.py→ CORE/FUNCTIONALITY/ERROR tests
When evaluating checkpoint_3:
test_checkpoint_1.py→ REGRESSION teststest_checkpoint_2.py→ REGRESSION teststest_checkpoint_3.py→ CORE/FUNCTIONALITY/ERROR tests
Set include_prior_tests: false to skip prior tests:
checkpoints:
checkpoint_2:
version: 1
order: 2
include_prior_tests: false # Only run checkpoint_2 testsUse this when:
- Prior tests are incompatible with new API
- Complete rewrite of functionality
- Performance testing (fewer tests)
Mark tests as regression explicitly:
@pytest.mark.regression
def test_legacy_format(entrypoint_argv):
"""Regression: Old input format still works."""
# Format from checkpoint_1 that should still work
result = run_command(entrypoint_argv, {"old_key": "value"})
assert result.returncode == 0Use the checkpoint_name fixture for conditional logic:
def test_basic_feature(entrypoint_argv, checkpoint_name):
"""Test basic feature, handles variations."""
result = run_command(entrypoint_argv, {"data": "test"})
output = json.loads(result.stdout)
# Basic assertion works for all checkpoints
assert output["status"] == "success"
# New field added in checkpoint_2
if checkpoint_name in {"checkpoint_2", "checkpoint_3"}:
assert "version" in outputdef test_output_format(entrypoint_argv, checkpoint_name):
"""Handle different output formats by checkpoint."""
result = run_command(entrypoint_argv, {"data": "test"})
output = json.loads(result.stdout)
if checkpoint_name == "checkpoint_1":
assert output == {"result": "TEST"}
else:
# checkpoint_2+ adds metadata
assert output["result"] == "TEST"
assert "metadata" in outputWhen fields are added in later checkpoints, strip them for comparison:
@pytest.fixture(scope="session")
def strip_regression_fields():
"""Remove fields added in later checkpoints."""
def _strip(data):
data = data.copy()
# Fields added in checkpoint_2+
for field in ["version", "timestamp", "checkpoint"]:
data.pop(field, None)
return data
return _strip
def test_core_output(entrypoint_argv, strip_regression_fields, checkpoint_name):
"""Core output matches expected (ignoring new fields)."""
result = run_command(entrypoint_argv, {"data": "test"})
output = json.loads(result.stdout)
# For regression tests, strip new fields
if checkpoint_name != "checkpoint_1":
output = strip_regression_fields(output)
expected = {"result": "TEST", "data": "test"}
assert output == expectedLoad cases from prior checkpoints:
def load_all_prior_cases(current_checkpoint):
"""Load cases from all prior checkpoints."""
all_cases = []
checkpoint_num = int(current_checkpoint.split("_")[1])
for i in range(1, checkpoint_num):
prior = f"checkpoint_{i}"
cases = load_cases(DATA_DIR / prior / "core")
for case in cases:
case["source_checkpoint"] = prior
all_cases.extend(cases)
return all_cases
# Only run regression cases when not on checkpoint_1
@pytest.fixture(scope="module")
def regression_cases(checkpoint_name):
if checkpoint_name == "checkpoint_1":
return []
return load_all_prior_cases(checkpoint_name)
def test_regression_cases(entrypoint_argv, regression_cases, checkpoint_name):
"""Run all prior checkpoint cases."""
if not regression_cases:
pytest.skip("No regression cases for checkpoint_1")
for case in regression_cases:
result = run_case(entrypoint_argv, case["case"])
# May need to adjust expectations
assert result.returncode == 0def test_api_v1_compatibility(entrypoint_argv, checkpoint_name):
"""V1 API format still works in later checkpoints."""
if checkpoint_name == "checkpoint_1":
pytest.skip("V1 is the current version")
# Old V1 format
v1_input = {"type": "v1", "data": "test"}
result = run_command(entrypoint_argv, v1_input)
assert result.returncode == 0@pytest.mark.functionality
def test_deprecated_option(entrypoint_argv, checkpoint_name):
"""Deprecated option still works with warning."""
if checkpoint_name == "checkpoint_1":
pytest.skip("Option not deprecated yet")
result = subprocess.run(
entrypoint_argv + ["--old-option"], # Deprecated
capture_output=True,
text=True,
)
assert result.returncode == 0
assert "deprecated" in result.stderr.lower()tests/
├── conftest.py
├── test_checkpoint_1.py # Core tests
├── test_checkpoint_2.py # Core tests
├── test_checkpoint_3.py # Core tests
└── test_regression.py # Explicit regression tests
# test_regression.py
"""Explicit regression tests for backward compatibility."""
import pytest
@pytest.mark.regression
def test_v1_input_format(entrypoint_argv):
"""Old V1 input format still works."""
...
@pytest.mark.regression
def test_legacy_output_option(entrypoint_argv):
"""--legacy-output option still works."""
...# test_checkpoint_2.py
"""Tests for checkpoint_2."""
# Core tests for checkpoint_2
def test_new_feature(entrypoint_argv):
...
# Regression tests (checking checkpoint_1 still works)
@pytest.mark.regression
def test_checkpoint_1_basic_still_works(entrypoint_argv):
"""Regression: Basic parsing from checkpoint_1."""
...@pytest.mark.regression
def test_json_parsing(entrypoint_argv):
"""Regression: JSON parsing from checkpoint_1.
In checkpoint_1, we introduced basic JSON parsing.
This must continue to work in all future checkpoints.
"""
...Focus regression tests on:
- Core functionality that must not break
- Public API contracts
- Common user workflows
@pytest.mark.regression
def test_regression_suite(entrypoint_argv):
"""Quick regression check."""
# Test multiple scenarios in one test if they're simple
for case in CRITICAL_CASES:
result = run_case(entrypoint_argv, case)
assert result.returncode == 0# tests/data/regression/basic_case/expected.yaml
# LOCKED: Do not modify - regression baseline
checkpoint_1:
result: value
checkpoint_2:
result: value
version: 2 # New field- Markers - Test categorization
- Config Schema - include_prior_tests setting
- Examples - Real problem examples