diff --git a/.github/workflows/tck.yml b/.github/workflows/tck.yml
new file mode 100644
index 00000000..aafce02b
--- /dev/null
+++ b/.github/workflows/tck.yml
@@ -0,0 +1,584 @@
+name: Prompty TCK (Test Compatibility Kit)
+
+on:
+  push:
+    branches: [ main, develop ]
+    paths:
+      - 'runtime/**'
+      - 'tck/**'
+      - '.github/workflows/tck.yml'
+  pull_request:
+    branches: [ main, develop ]
+    paths:
+      - 'runtime/**'
+      - 'tck/**'
+      - '.github/workflows/tck.yml'
+  schedule:
+    # Run TCK daily at 2 AM UTC to catch compatibility regressions
+    - cron: '0 2 * * *'
+  workflow_dispatch:
+    inputs:
+      runtime:
+        description: 'Runtime to test (all, python, csharp)'
+        required: false
+        default: 'all'
+        type: choice
+        options:
+          - all
+          - python
+          - csharp
+      generate_report:
+        description: 'Generate detailed compatibility report'
+        required: false
+        default: true
+        type: boolean
+
+env:
+  PYTHON_VERSION: '3.11'
+  DOTNET_VERSION: '9.0'
+
+jobs:
+  tck-matrix:
+    name: TCK Tests (${{ matrix.runtime }} on ${{ matrix.os }})
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ubuntu-latest, windows-latest, macos-latest]
+        runtime: [python, csharp]
+        exclude:
+          # Skip some combinations to reduce CI time while maintaining coverage
+          - os: macos-latest
+            runtime: csharp
+        include:
+          # Add specific configurations if needed
+          - os: ubuntu-latest
+            runtime: python
+            python_version: '3.9'
+          - os: ubuntu-latest  
+            runtime: python
+            python_version: '3.12'
+
+    steps:
+    - name: Checkout repository
+      uses: actions/checkout@v4
+
+    - name: Setup Python
+      if: matrix.runtime == 'python' || matrix.runtime == 'all'
+      uses: actions/setup-python@v4
+      with:
+        python-version: ${{ matrix.python_version || env.PYTHON_VERSION }}
+        cache: 'pip'
+
+    - name: Setup .NET
+      if: matrix.runtime == 'csharp' || matrix.runtime == 'all'
+      uses: actions/setup-dotnet@v4
+      with:
+        dotnet-version: ${{ env.DOTNET_VERSION }}
+
+    - name: Install Python dependencies
+      if: matrix.runtime == 'python' || matrix.runtime == 'all'
+      run: |
+        python -m pip install --upgrade pip
+        cd runtime/prompty
+        pip install -e .
+        if [ -f "requirements-dev.txt" ]; then
+          pip install -r requirements-dev.txt
+        fi
+
+    - name: Build C# Runtime
+      if: matrix.runtime == 'csharp' || matrix.runtime == 'all'
+      run: |
+        cd runtime/promptycs
+        dotnet restore
+        dotnet build --configuration Release --no-restore
+
+    - name: Build C# TCK
+      if: matrix.runtime == 'csharp' || matrix.runtime == 'all'
+      run: |
+        cd tck/csharp
+        dotnet build --configuration Release
+
+    - name: Make TCK scripts executable (Unix)
+      if: runner.os != 'Windows'
+      run: |
+        cd tck
+        chmod +x run-tck.sh
+        chmod +x python/run-tck.sh
+        chmod +x csharp/run-tck.sh
+
+    - name: Run TCK (Unix)
+      if: runner.os != 'Windows'
+      run: |
+        cd tck
+        ./run-tck.sh --runtime ${{ matrix.runtime }}
+
+    - name: Run TCK (Windows)
+      if: runner.os == 'Windows'
+      run: |
+        cd tck
+        pwsh -File run-tck.ps1 -Runtime ${{ matrix.runtime }}
+
+    - name: Upload TCK Results
+      uses: actions/upload-artifact@v4
+      if: always()
+      with:
+        name: tck-results-${{ matrix.runtime }}-${{ matrix.os }}
+        path: |
+          tck/results/${{ matrix.runtime }}-results.json
+          tck/reports/
+        retention-days: 30
+
+    - name: Upload TCK Logs
+      uses: actions/upload-artifact@v4
+      if: failure()
+      with:
+        name: tck-logs-${{ matrix.runtime }}-${{ matrix.os }}
+        path: |
+          tck/logs/
+        retention-days: 7
+
+    - name: Display TCK Summary
+      if: always()
+      run: |
+        echo "## TCK Results Summary for ${{ matrix.runtime }} on ${{ matrix.os }}" >> $GITHUB_STEP_SUMMARY
+        if [ -f "tck/results/${{ matrix.runtime }}-results.json" ]; then
+          python -c "
+          import json
+          import sys
+          try:
+            with open('tck/results/${{ matrix.runtime }}-results.json', 'r') as f:
+              results = json.load(f)
+            total = len(results)
+            passed = len([r for r in results if r.get('result') == 'pass'])
+            failed = len([r for r in results if r.get('result') == 'fail'])
+            errors = len([r for r in results if r.get('result') == 'error'])
+            skipped = len([r for r in results if r.get('result') == 'skip'])
+            
+            print(f'- **Total Tests**: {total}')
+            print(f'- **Passed**: {passed} ✅')
+            print(f'- **Failed**: {failed} ❌')
+            print(f'- **Errors**: {errors} 🚨')
+            print(f'- **Skipped**: {skipped} ⏭️')
+            
+            if failed > 0 or errors > 0:
+              print()
+              print('### Failed/Error Tests:')
+              for result in results:
+                if result.get('result') in ['fail', 'error']:
+                  test_id = result.get('test_id', 'unknown')
+                  error_msg = result.get('error_message', 'No details')
+                  print(f'- **{test_id}**: {error_msg}')
+          except Exception as e:
+            print(f'Error reading results: {e}')
+          " >> $GITHUB_STEP_SUMMARY
+        else
+          echo "❌ No results file found" >> $GITHUB_STEP_SUMMARY
+        fi
+      shell: bash
+
+  compatibility-report:
+    name: Generate Compatibility Report
+    runs-on: ubuntu-latest
+    needs: tck-matrix
+    if: always() && (github.event.inputs.generate_report != 'false')
+
+    steps:
+    - name: Checkout repository
+      uses: actions/checkout@v4
+
+    - name: Setup Python
+      uses: actions/setup-python@v4
+      with:
+        python-version: ${{ env.PYTHON_VERSION }}
+
+    - name: Install comparison tools dependencies
+      run: |
+        cd tck
+        if [ -f "tools/requirements.txt" ]; then
+          pip install -r tools/requirements.txt
+        else
+          pip install json-diff
+        fi
+
+    - name: Download all TCK results
+      uses: actions/download-artifact@v4
+      with:
+        pattern: tck-results-*
+        path: tck/downloaded-results/
+        merge-multiple: true
+
+    - name: Organize results
+      run: |
+        cd tck
+        mkdir -p results
+        find downloaded-results -name "*-results.json" -exec cp {} results/ \;
+        ls -la results/
+
+    - name: Generate compatibility report
+      run: |
+        cd tck
+        if [ -f results/python-results.json ] && [ -f results/csharp-results.json ]; then
+          python tools/compare_runtimes.py \
+            results/python-results.json \
+            results/csharp-results.json \
+            --output reports/compatibility-report.md
+          
+          python tools/compare_runtimes.py \
+            results/python-results.json \
+            results/csharp-results.json \
+            --format json \
+            --output reports/compatibility-report.json
+        else
+          echo "Missing result files for compatibility comparison"
+          echo "Available files:"
+          ls -la results/
+          
+          # Create a minimal report if files are missing
+          mkdir -p reports
+          echo "# TCK Compatibility Report" > reports/compatibility-report.md
+          echo "" >> reports/compatibility-report.md
+          echo "⚠️ **Warning**: Could not generate full compatibility report due to missing result files." >> reports/compatibility-report.md
+          echo "" >> reports/compatibility-report.md
+          echo "Available results:" >> reports/compatibility-report.md
+          ls results/ | sed 's/^/- /' >> reports/compatibility-report.md
+        fi
+
+    - name: Check compatibility threshold
+      id: compatibility_check
+      run: |
+        cd tck
+        if [ -f reports/compatibility-report.json ]; then
+          COMPATIBILITY_RATE=$(python -c "
+          import json
+          import sys
+          try:
+            with open('reports/compatibility-report.json', 'r') as f:
+              report = json.load(f)
+            rate = report.get('overall_compatibility_rate', 0) * 100
+            print(f'{rate:.1f}')
+            
+            # Set threshold - can be configured
+            threshold = 80.0
+            if rate < threshold:
+              print(f'COMPATIBILITY_WARNING=true', file=sys.stderr)
+              sys.exit(1)
+            else:
+              print(f'COMPATIBILITY_WARNING=false', file=sys.stderr)
+              sys.exit(0)
+          except Exception as e:
+            print(f'Error: {e}', file=sys.stderr)
+            print(f'COMPATIBILITY_WARNING=true', file=sys.stderr)
+            sys.exit(1)
+          ")
+          echo "rate=$COMPATIBILITY_RATE" >> $GITHUB_OUTPUT
+        else
+          echo "No compatibility report generated"
+          echo "rate=0" >> $GITHUB_OUTPUT
+        fi
+      continue-on-error: true
+
+    - name: Add compatibility report to summary
+      run: |
+        echo "## 🔄 Cross-Runtime Compatibility Report" >> $GITHUB_STEP_SUMMARY
+        if [ -f "tck/reports/compatibility-report.md" ]; then
+          cat tck/reports/compatibility-report.md >> $GITHUB_STEP_SUMMARY
+        else
+          echo "❌ Failed to generate compatibility report" >> $GITHUB_STEP_SUMMARY
+        fi
+
+    - name: Upload compatibility report
+      uses: actions/upload-artifact@v4
+      if: always()
+      with:
+        name: tck-compatibility-report
+        path: |
+          tck/reports/compatibility-report.md
+          tck/reports/compatibility-report.json
+        retention-days: 90
+
+    - name: Comment PR with compatibility report
+      if: github.event_name == 'pull_request' && always()
+      uses: actions/github-script@v7
+      with:
+        script: |
+          const fs = require('fs');
+          const path = require('path');
+          
+          try {
+            const reportPath = 'tck/reports/compatibility-report.md';
+            if (fs.existsSync(reportPath)) {
+              const report = fs.readFileSync(reportPath, 'utf8');
+              
+              const body = `## 🔄 Prompty TCK Compatibility Report
+          
+              ${report}
+              
+              ---
+              📊 *This report was automatically generated by the Prompty TCK workflow*`;
+          
+              await github.rest.issues.createComment({
+                issue_number: context.issue.number,
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                body: body
+              });
+            } else {
+              await github.rest.issues.createComment({
+                issue_number: context.issue.number,
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                body: '## 🧪 Prompty TCK\n\n❌ TCK compatibility report could not be generated. Check the logs for details.'
+              });
+            }
+          } catch (error) {
+            console.log('Failed to post comment:', error);
+          }
+
+    - name: Create issue for compatibility regression
+      if: steps.compatibility_check.outcome == 'failure' && github.ref == 'refs/heads/main'
+      uses: actions/github-script@v7
+      with:
+        script: |
+          const compatibilityRate = '${{ steps.compatibility_check.outputs.rate }}';
+          
+          await github.rest.issues.create({
+            owner: context.repo.owner,
+            repo: context.repo.repo,
+            title: `🚨 TCK Compatibility Regression Detected (${compatibilityRate}%)`,
+            body: `## Compatibility Issue Detected
+          
+          The Prompty TCK has detected a compatibility regression between runtime implementations.
+          
+          **Current Compatibility Rate**: ${compatibilityRate}%  
+          **Required Threshold**: 80%
+          
+          ### Action Required
+          
+          1. Review the [compatibility report](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }})
+          2. Identify which tests are failing across runtimes
+          3. Fix compatibility issues in the affected runtimes
+          4. Re-run the TCK to verify fixes
+          
+          ### Related
+          
+          - Commit: ${{ github.sha }}
+          - Workflow: [TCK Run #${{ github.run_number }}](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }})
+          
+          This issue was automatically created by the Prompty TCK workflow.`,
+            labels: ['bug', 'tck', 'compatibility', 'priority-high']
+          });
+
+  runtime-specific-tests:
+    name: Runtime-Specific Validation
+    runs-on: ubuntu-latest
+    needs: tck-matrix
+    if: always()
+    
+    strategy:
+      matrix:
+        runtime: [python, csharp]
+    
+    steps:
+    - name: Checkout repository
+      uses: actions/checkout@v4
+
+    - name: Setup Python
+      uses: actions/setup-python@v4
+      with:
+        python-version: ${{ env.PYTHON_VERSION }}
+
+    - name: Setup .NET
+      if: matrix.runtime == 'csharp'
+      uses: actions/setup-dotnet@v4
+      with:
+        dotnet-version: ${{ env.DOTNET_VERSION }}
+
+    - name: Download TCK results
+      uses: actions/download-artifact@v4
+      with:
+        name: tck-results-${{ matrix.runtime }}-ubuntu-latest
+        path: tck/results/
+
+    - name: Validate runtime-specific requirements
+      run: |
+        cd tck
+        echo "## Runtime-Specific Validation: ${{ matrix.runtime }}" >> $GITHUB_STEP_SUMMARY
+        
+        if [ ! -f "results/${{ matrix.runtime }}-results.json" ]; then
+          echo "❌ Results file not found" >> $GITHUB_STEP_SUMMARY
+          exit 1
+        fi
+        
+        # Validate JSON format
+        python -c "
+        import json
+        import sys
+        
+        try:
+          with open('results/${{ matrix.runtime }}-results.json', 'r') as f:
+            results = json.load(f)
+          
+          print('✅ Valid JSON format')
+          
+          # Check required fields
+          required_fields = ['test_id', 'result', 'runtime', 'execution_time_ms']
+          missing_fields = []
+          
+          for i, result in enumerate(results):
+            for field in required_fields:
+              if field not in result:
+                missing_fields.append(f'Result {i}: missing {field}')
+          
+          if missing_fields:
+            print('❌ Missing required fields:')
+            for missing in missing_fields[:5]:  # Show first 5
+              print(f'  - {missing}')
+            if len(missing_fields) > 5:
+              print(f'  - ... and {len(missing_fields) - 5} more')
+            sys.exit(1)
+          else:
+            print('✅ All required fields present')
+          
+          # Check runtime consistency
+          runtimes = set(r.get('runtime') for r in results)
+          if len(runtimes) != 1 or '${{ matrix.runtime }}' not in runtimes:
+            print(f'❌ Runtime inconsistency: {runtimes}')
+            sys.exit(1)
+          else:
+            print(f'✅ Runtime consistently reported as ${{ matrix.runtime }}')
+          
+        except json.JSONDecodeError as e:
+          print(f'❌ Invalid JSON: {e}')
+          sys.exit(1)
+        except Exception as e:
+          print(f'❌ Validation error: {e}')
+          sys.exit(1)
+        " >> $GITHUB_STEP_SUMMARY
+
+  publish-results:
+    name: Publish TCK Results
+    runs-on: ubuntu-latest
+    needs: [tck-matrix, compatibility-report]
+    if: github.ref == 'refs/heads/main' && always()
+
+    steps:
+    - name: Checkout repository
+      uses: actions/checkout@v4
+
+    - name: Download compatibility report
+      uses: actions/download-artifact@v4
+      with:
+        name: tck-compatibility-report
+        path: tck-reports
+
+    - name: Deploy to GitHub Pages
+      if: github.repository_owner == 'microsoft' # Adjust to your org
+      uses: peaceiris/actions-gh-pages@v3
+      with:
+        github_token: ${{ secrets.GITHUB_TOKEN }}
+        publish_dir: tck-reports
+        destination_dir: tck
+        keep_files: true
+
+    - name: Create GitHub Release on schedule
+      if: github.event_name == 'schedule'
+      uses: softprops/action-gh-release@v1
+      with:
+        tag_name: tck-${{ github.run_number }}
+        name: TCK Results ${{ github.run_number }}
+        body: |
+          Automated TCK compatibility report
+          
+          Generated on: ${{ github.event.head_commit.timestamp }}
+          Commit: ${{ github.sha }}
+        files: tck-reports/*
+        draft: false
+        prerelease: false
+      env:
+        GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+
+  notification:
+    name: Send Notifications
+    runs-on: ubuntu-latest
+    needs: [tck-matrix, compatibility-report]
+    if: always()
+
+    steps:
+    - name: Determine overall status
+      id: status
+      run: |
+        TCK_STATUS="${{ needs.tck-matrix.result }}"
+        COMPAT_STATUS="${{ needs.compatibility-report.result }}"
+        
+        if [ "$TCK_STATUS" = "success" ] && [ "$COMPAT_STATUS" = "success" ]; then
+          echo "status=success" >> $GITHUB_OUTPUT
+          echo "message=✅ All TCK tests passed with good compatibility" >> $GITHUB_OUTPUT
+        elif [ "$TCK_STATUS" = "success" ]; then
+          echo "status=warning" >> $GITHUB_OUTPUT  
+          echo "message=⚠️ TCK tests passed but compatibility issues detected" >> $GITHUB_OUTPUT
+        else
+          echo "status=failure" >> $GITHUB_OUTPUT
+          echo "message=❌ TCK tests failed" >> $GITHUB_OUTPUT
+        fi
+
+    - name: Create status summary
+      run: |
+        echo "## 📋 Prompty TCK Workflow Summary" >> $GITHUB_STEP_SUMMARY
+        echo "" >> $GITHUB_STEP_SUMMARY
+        echo "**Status**: ${{ steps.status.outputs.message }}" >> $GITHUB_STEP_SUMMARY
+        echo "**Commit**: ${{ github.sha }}" >> $GITHUB_STEP_SUMMARY
+        echo "**Workflow**: [TCK Run #${{ github.run_number }}](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }})" >> $GITHUB_STEP_SUMMARY
+        echo "" >> $GITHUB_STEP_SUMMARY
+        echo "### Next Steps" >> $GITHUB_STEP_SUMMARY
+        
+        if [ "${{ steps.status.outputs.status }}" = "failure" ]; then
+          echo "1. 🔍 Review failed test results in the artifacts" >> $GITHUB_STEP_SUMMARY
+          echo "2. 🔧 Fix failing tests in the affected runtimes" >> $GITHUB_STEP_SUMMARY
+          echo "3. 🧪 Re-run TCK locally to verify fixes" >> $GITHUB_STEP_SUMMARY
+          echo "4. 📤 Push fixes and re-run workflow" >> $GITHUB_STEP_SUMMARY
+        elif [ "${{ steps.status.outputs.status }}" = "warning" ]; then
+          echo "1. 📊 Review compatibility report for details" >> $GITHUB_STEP_SUMMARY
+          echo "2. 🔄 Harmonize runtime implementations" >> $GITHUB_STEP_SUMMARY
+          echo "3. 📈 Aim for >90% compatibility rate" >> $GITHUB_STEP_SUMMARY
+        else
+          echo "1. 🎉 All tests passing - great work!" >> $GITHUB_STEP_SUMMARY
+          echo "2. 📈 Monitor compatibility in future changes" >> $GITHUB_STEP_SUMMARY
+          echo "3. 🔄 Consider adding more test coverage" >> $GITHUB_STEP_SUMMARY
+        fi
+
+    - name: Send Slack notification on failure
+      if: steps.status.outputs.status == 'failure' && (github.ref == 'refs/heads/main' || github.event_name == 'schedule') && env.SLACK_WEBHOOK_URL != ''
+      env:
+        SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }}
+      run: |
+        curl -X POST -H 'Content-type: application/json' \
+          --data '{"text":"🚨 Prompty TCK Failed\n\nRepository: ${{ github.repository }}\nBranch: ${{ github.ref }}\nCommit: ${{ github.sha }}\nWorkflow: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"}' \
+          $SLACK_WEBHOOK_URL
+
+    - name: Send Teams notification on failure
+      if: steps.status.outputs.status == 'failure' && (github.ref == 'refs/heads/main' || github.event_name == 'schedule') && env.TEAMS_WEBHOOK_URL != ''
+      env:
+        TEAMS_WEBHOOK_URL: ${{ secrets.TEAMS_WEBHOOK_URL }}
+      run: |
+        curl -X POST -H 'Content-type: application/json' \
+          --data '{
+            "@type": "MessageCard",
+            "@context": "http://schema.org/extensions",
+            "summary": "Prompty TCK Failed",
+            "themeColor": "ff0000",
+            "sections": [{
+              "activityTitle": "🚨 Prompty TCK Failed",
+              "facts": [
+                {"name": "Repository", "value": "${{ github.repository }}"},
+                {"name": "Branch", "value": "${{ github.ref }}"},
+                {"name": "Commit", "value": "${{ github.sha }}"}
+              ],
+              "potentialAction": [{
+                "@type": "OpenUri",
+                "name": "View Workflow",
+                "targets": [{"os": "default", "uri": "${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"}]
+              }]
+            }]
+          }' \
+          $TEAMS_WEBHOOK_URL
diff --git a/.gitignore b/.gitignore
index 8727016f..562c1073 100644
--- a/.gitignore
+++ b/.gitignore
@@ -7,4 +7,6 @@ runtime/promptycs/Prompty.Core/bin/
 runtime/promptycs/Prompty.Core/obj/
 runtime/promptycs/Tests/bin/
 runtime/promptycs/Tests/obj/
-.env
\ No newline at end of file
+.env
+tck/csharp/obj
+tck/csharp/bin
diff --git a/.vscode/settings.json b/.vscode/settings.json
index 0c9ae8aa..1a254317 100644
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -6,5 +6,6 @@
   "files.associations": {
     "*.css": "tailwindcss",
     "*.mdx": "markdown"
-  }
+  },
+  "java.compile.nullAnalysis.mode": "disabled"
 }
diff --git a/tck/README.md b/tck/README.md
new file mode 100644
index 00000000..faa4c3ba
--- /dev/null
+++ b/tck/README.md
@@ -0,0 +1,634 @@
+# Prompty Test Compatibility Kit (TCK)
+
+The Prompty TCK ensures that all runtime implementations follow the same specification and produce compatible results. This comprehensive testing framework validates that Python, C#, and future runtime implementations (Java, JavaScript) are fully compatible and respect the [Prompty specification](../Prompty.yaml).
+
+## Overview
+
+The TCK validates compatibility across multiple dimensions:
+
+1. **Specification Compliance** - All runtimes parse the same `.prompty` files identically
+2. **Functional Equivalence** - Same inputs produce equivalent outputs across runtimes
+3. **Error Handling** - Consistent error behavior for invalid inputs
+4. **Template Rendering** - Identical template processing results
+5. **Model Integration** - Consistent model configuration and execution
+6. **Cross-Runtime Validation** - Direct comparison between runtime outputs
+
+## Architecture
+
+The TCK consists of several key components:
+
+1. **Shared Test Data** (`testdata/`) - Common `.prompty` files and test cases
+2. **Expected Results** (`expected/`) - Reference outputs for comparison
+3. **Runtime Interfaces** (`interface/`) - Optional common interface for standardization
+4. **Runtime Implementations** - Language-specific TCK implementations
+5. **Comparison Tools** (`tools/`) - Cross-runtime result analysis
+6. **Test Runner** (`run-tck.sh`) - Main orchestration script
+
+## Test Categories
+
+### Specification Tests
+Verify that all runtimes parse `.prompty` files identically:
+- YAML frontmatter parsing
+- Metadata extraction (name, description, authors, etc.)
+- Model configuration parsing
+- Input/output specifications
+- Sample data extraction
+
+### Functional Tests
+Verify that runtimes produce equivalent outputs:
+- Template rendering with Jinja2
+- Variable substitution
+- Environment variable resolution
+- Complex template features (loops, conditionals)
+- Function calling configuration
+
+### Error Handling Tests
+Verify consistent error behavior:
+- Invalid YAML handling
+- Missing required inputs
+- Type validation errors
+- Template syntax errors
+
+### Integration Tests
+End-to-end compatibility verification:
+- Cross-runtime output comparison
+- Performance benchmarking
+- Configuration override behavior
+
+## Running the TCK
+
+### Prerequisites
+
+- Python 3.11+ (for Python runtime and comparison tools)
+- .NET 9+ SDK (for C# runtime)
+- Java 21+ JDK (for Java runtime, when available)
+- Node.js 19+ (for JavaScript runtime, when available)
+
+### Basic Usage
+
+```bash
+# Run TCK for all available runtimes
+./run-tck.sh
+
+# Run TCK for specific runtime only
+./run-tck.sh --runtime python
+./run-tck.sh --runtime csharp
+
+# Run with performance monitoring
+./run-tck.sh --performance
+
+# Run in CI mode with optimizations
+./run-tck.sh --ci
+
+# Run quick tests only
+./run-tck.sh --quick
+
+# Enable debug mode
+./run-tck.sh --debug
+```
+
+### Advanced Usage
+
+```bash
+# Compare specific runtimes
+python tck/tools/compare_runtimes.py \
+  results/python-results.json \
+  results/csharp-results.json \
+  --output reports/py-cs-comparison.md
+
+# Generate JSON report for CI/CD integration
+python tck/tools/compare_runtimes.py \
+  results/*.json \
+  --format json \
+  --output reports/tck-results.json
+
+# Check compatibility threshold
+python tools/check_compatibility_threshold.py results/compatibility-report.json
+```
+
+### Windows Support
+
+```powershell
+# PowerShell runner for Windows
+.\run-tck.ps1 -Runtime python
+.\run-tck.ps1 -Runtime csharp
+.\run-tck.ps1 -Runtime all
+.\run-tck.ps1 -Quick
+```
+
+### Validation and Setup
+
+```bash
+# Validate setup before committing
+./pre-commit-check.sh
+
+# Validate TCK configuration
+python validate-setup.py
+```
+
+## Implementing a New Runtime
+
+To add TCK support for a new runtime:
+
+1. **Create Runtime Directory**
+   ```bash
+   mkdir tck/newruntime
+   ```
+
+2. **Implement TCK Logic**
+   
+   Create functions that implement the core TCK functionality:
+   - `parse_prompty()` - Parse .prompty content into structured format
+   - `render_template()` - Render template with input data
+   - `validate_inputs()` - Validate inputs against specification
+   - `get_sample_data()` - Extract sample data from prompty
+
+3. **Create Test Runner**
+   
+   Implement a test runner that:
+   - Loads test specifications from `tck-tests.json`
+   - Executes tests using your runtime implementation
+   - Outputs results in the standard JSON format
+
+4. **Add to Main Runner**
+   
+   Update `run-tck.sh` to include your new runtime.
+
+### Example Implementation Structure
+
+```
+tck/newruntime/
+├── newruntime_tck.py      # Main TCK implementation
+├── requirements.txt       # Dependencies (if needed)
+├── README.md             # Runtime-specific setup instructions
+└── test_runner.py        # Test execution script
+```
+
+## Result Format
+
+All runtime implementations must output results in this JSON format:
+
+```json
+[
+  {
+    "test_id": "basic-parsing",
+    "result": "pass|fail|skip|error",
+    "runtime": "python",
+    "execution_time_ms": 123.45,
+    "output": { /* test-specific output */ },
+    "error_message": "Error details (if result=error)",
+    "error_type": "ExceptionType (if result=error)"
+  }
+]
+```
+
+## Adding New Tests
+
+1. **Create Test Data**
+   - Add new `.prompty` file to `testdata/`
+   - Create expected results in `expected/` if needed
+
+2. **Update Test Specification**
+   - Add test case to `tck-tests.json`
+   - Specify test category and expected behavior
+
+3. **Test Across Runtimes**
+   - Run TCK to verify all runtimes handle the new test
+   - Update runtime implementations if needed
+
+### Test Specification Format
+
+```json
+{
+  "id": "unique-test-id",
+  "name": "Human readable test name",
+  "description": "Test description",
+  "category": "specification|functional|integration|error-handling",
+  "prompty_file": "testdata/test.prompty",
+  "input_data": { /* optional input data */ },
+  "environment_vars": { /* optional env vars */ },
+  "expected_errors": [ /* for error tests */ ],
+  "skip_runtimes": [ /* runtimes to skip */ ]
+}
+```
+
+## Continuous Integration
+
+### GitHub Actions CI/CD Integration
+
+The Prompty TCK includes a comprehensive GitHub Actions workflow that automatically runs compatibility tests across multiple platforms and runtime combinations.
+
+#### Workflow Overview
+
+The TCK workflow (`.github/workflows/tck.yml`) provides:
+
+- **Multi-platform testing**: Ubuntu, Windows, and macOS
+- **Cross-runtime compatibility**: Python and C# runtimes
+- **Automated reporting**: Compatibility reports and PR comments
+- **Artifact management**: Test results and detailed logs
+- **Notification system**: Slack/Teams integration for failures
+- **Threshold monitoring**: Automatic issue creation for regressions
+
+#### Workflow Triggers
+
+The workflow runs automatically on:
+
+```yaml
+# Push to main branches
+- push:
+    branches: [ main, develop ]
+    paths: [ 'runtime/**', 'tck/**' ]
+
+# Pull requests  
+- pull_request:
+    branches: [ main, develop ]
+    paths: [ 'runtime/**', 'tck/**' ]
+
+# Daily scheduled runs at 2 AM UTC
+- schedule:
+    - cron: '0 2 * * *'
+
+# Manual workflow dispatch
+- workflow_dispatch:
+    inputs:
+      runtime: # python, csharp, all
+      generate_report: # true/false
+```
+
+#### Workflow Jobs
+
+1. **`tck-matrix`** - Core TCK Testing
+   - Runs TCK across matrix of OS and runtime combinations
+   - Builds and tests each runtime implementation
+   - Uploads test results and logs as artifacts
+   - Generates test summaries in GitHub Actions UI
+
+2. **`compatibility-report`** - Cross-Runtime Analysis
+   - Downloads results from all matrix runs
+   - Generates markdown and JSON compatibility reports
+   - Checks compatibility threshold (default: 80%)
+   - Posts results as PR comments
+   - Creates issues for compatibility regressions
+
+3. **`runtime-specific-tests`** - Validation
+   - Validates JSON format compliance
+   - Checks required field presence
+   - Verifies runtime consistency
+   - Ensures output format standards
+
+4. **`publish-results`** - Result Publishing
+   - Deploys reports to GitHub Pages (optional)
+   - Creates GitHub releases for scheduled runs
+   - Archives results for historical tracking
+
+5. **`notification`** - Status Reporting
+   - Determines overall workflow status
+   - Sends notifications for failures (Slack/Teams)
+   - Creates workflow summaries
+   - Provides actionable next steps
+
+#### Setting Up the Workflow
+
+**Prerequisites**: The workflow requires specific repository structure:
+- `/runtime/prompty/` - Python runtime implementation
+- `/runtime/promptycs/` - C# runtime implementation  
+- `/tck/` - TCK test suite and runners
+
+**Optional Configuration**: Set these repository secrets for enhanced features:
+
+```bash
+# Notification webhooks (optional)
+SLACK_WEBHOOK_URL=https://hooks.slack.com/...
+TEAMS_WEBHOOK_URL=https://outlook.office.com/webhook/...
+
+# GitHub token is automatically provided
+GITHUB_TOKEN=<auto-generated>
+```
+
+#### Example Workflow Output
+
+```markdown
+## 🔄 Prompty TCK Compatibility Report
+
+**Overall Compatibility Rate: 85.7%**
+
+### Summary
+- Total tests: 14
+- Compatible tests: 12  
+- Incompatible tests: 2
+
+### Runtime Matrix Results
+✅ Python on Ubuntu: 14/14 tests passed
+✅ C# on Ubuntu: 14/14 tests passed  
+✅ Python on Windows: 14/14 tests passed
+⚠️  C# on Windows: 12/14 tests passed
+
+### Incompatible Tests
+- `template-escaping`: Output format differences
+- `unicode-handling`: Character encoding variations
+
+---
+📊 *Generated by Prompty TCK Workflow*
+```
+
+### Manual CI Integration
+
+The TCK can be integrated into other CI/CD pipelines:
+
+1. **Run TCK in CI**
+   ```yaml
+   - name: Run Prompty TCK
+     run: |
+       cd tck
+       ./run-tck.sh --runtime python --runtime csharp
+   ```
+
+2. **Check Compatibility**
+   ```yaml
+   - name: Check Runtime Compatibility
+     run: |
+       cd tck
+       python tools/compare_runtimes.py results/*.json --format json
+   ```
+
+3. **Publish Results**
+   - Archive test results as CI artifacts
+   - Generate compatibility reports
+   - Set up notifications for compatibility regressions
+
+## Ensuring Cross-Runtime Compatibility
+
+### Standard Output Format
+
+All runtime implementations **MUST** produce output in the exact same JSON structure to ensure compatibility. The expected format is:
+
+```json
+{
+  "metadata": {
+    "name": "Prompty Name",
+    "description": "Description", 
+    "version": "1.0",
+    "authors": ["author1", "author2"],
+    "tags": ["tag1", "tag2"]
+  },
+  "model": {
+    "api": "chat",
+    "configuration": {
+      "type": "openai",
+      "model": "gpt-3.5-turbo"
+    },
+    "parameters": {
+      "max_tokens": 100,
+      "temperature": 0.0
+    },
+    "response": "first"
+  },
+  "inputs": {
+    "field_name": {
+      "type": "string|number|boolean|array|object",
+      "description": "Field description",
+      "required": true,
+      "default": "default_value"
+    }
+  },
+  "outputs": {
+    "field_name": {
+      "type": "string",
+      "description": "Output description"
+    }
+  },
+  "sample": {
+    "field_name": "sample_value"
+  },
+  "template": {
+    "format": "jinja2",
+    "parser": "prompty"
+  },
+  "content": "Template content with variables"
+}
+```
+
+### Critical Compatibility Requirements
+
+1. **Data Type Consistency**
+   - Numbers MUST be serialized as JSON numbers, not strings
+   - Booleans MUST be `true`/`false`, not `"true"`/`"false"`
+   - Arrays MUST be JSON arrays `[]`, not serialized strings
+   - Objects MUST be JSON objects `{}`, not serialized strings
+
+2. **Field Name Standardization**
+   - Use exact field names from the specification
+   - Do not add runtime-specific prefixes or suffixes
+   - Include all required fields even if empty (use `{}` or `[]`)
+
+3. **Template Format Reporting**
+   - Parse template format from YAML frontmatter first
+   - Report the actual format used (usually "jinja2")
+   - Do not report runtime-specific template engine names
+
+4. **Error Handling Consistency**
+   ```json
+   {
+     "test_id": "test-name",
+     "result": "error",
+     "runtime": "your-runtime",
+     "execution_time_ms": 123.45,
+     "error_message": "Human readable error message",
+     "error_type": "StandardErrorType"
+   }
+   ```
+
+### Implementation Checklist for New Runtimes
+
+Before submitting a new runtime implementation, verify:
+
+- [ ] All tests in `tck-tests.json` execute (pass, fail, or error - no crashes)
+- [ ] Output format exactly matches expected JSON structure
+- [ ] Numbers are JSON numbers, not strings
+- [ ] Required fields are always present (even if empty)
+- [ ] Template format matches what's in the `.prompty` file
+- [ ] Error messages follow standard patterns
+- [ ] Compatibility rate >90% with existing runtimes
+- [ ] Performance within 2x of reference implementations
+
+### Testing Your Implementation
+
+1. **Run TCK for your runtime only**:
+   ```bash
+   ./run-tck.sh --runtime yourruntime
+   ```
+
+2. **Compare with reference implementation**:
+   ```bash
+   python tools/compare_runtimes.py \
+     results/python-results.json \
+     results/yourruntime-results.json \
+     --format json
+   ```
+
+3. **Analyze specific differences**:
+   ```bash
+   python tools/compare_runtimes.py \
+     results/python-results.json \
+     results/yourruntime-results.json \
+     --detailed --test basic-parsing
+   ```
+
+4. **Check compatibility rate**:
+   ```bash
+   python tools/check_compatibility_threshold.py results/compatibility-report.json
+   ```
+
+### Output Normalization Guidelines
+
+When converting from your runtime's native format to TCK format:
+
+```pseudo
+// Example normalization
+function normalizeForTCK(runtimeOutput) {
+  return {
+    metadata: extractMetadata(runtimeOutput),
+    model: normalizeModel(runtimeOutput.model),
+    inputs: normalizeInputs(runtimeOutput.inputs),
+    outputs: normalizeOutputs(runtimeOutput.outputs),
+    sample: normalizeSample(runtimeOutput.sample),
+    template: {
+      format: runtimeOutput.template?.format || "jinja2",
+      parser: runtimeOutput.template?.parser || "prompty"
+    },
+    content: runtimeOutput.content
+  }
+}
+
+function normalizeModel(model) {
+  return {
+    api: model.api || "chat",
+    configuration: model.configuration || {},
+    parameters: ensureNumericTypes(model.parameters || {}),
+    response: model.response || "first"
+  }
+}
+```
+
+## Monitoring and Maintenance
+
+### Regular Maintenance Tasks
+
+1. **Review Compatibility Trends**
+   - Monitor daily compatibility reports
+   - Track regression patterns
+   - Update thresholds as needed
+
+2. **Update Runtime Matrix**
+   - Add new runtime implementations
+   - Update OS versions periodically
+   - Adjust exclusions based on support
+
+3. **Maintain Test Coverage**
+   - Add tests for new features
+   - Update expected results
+   - Expand error handling scenarios
+
+### Troubleshooting Common Issues
+
+**Build Failures:**
+```bash
+# Check .NET versions
+dotnet --list-runtimes
+
+# Verify Python dependencies
+pip list
+
+# Review build logs in GitHub Actions
+```
+
+**Compatibility Regressions:**
+```bash
+# Run TCK locally
+./run-tck.sh
+
+# Compare specific results
+python tools/compare_runtimes.py results/python-results.json results/csharp-results.json
+
+# Analyze specific test differences
+python tools/compare_runtimes.py --detailed --test basic-parsing
+```
+
+**Workflow Permissions:**
+- Ensure repository has Actions enabled
+- Verify GITHUB_TOKEN permissions for PR comments
+- Check organization settings for workflow restrictions
+
+### Best Practices
+
+1. **Test Locally First**
+   ```bash
+   # Always run TCK locally before pushing
+   cd tck && ./run-tck.sh
+   ```
+
+2. **Monitor Compatibility**
+   - Set up notifications for compatibility drops
+   - Review weekly compatibility trends
+   - Address issues promptly
+
+3. **Documentation Updates**
+   - Update compatibility requirements in README
+   - Document known compatibility issues
+   - Maintain implementation guides
+
+4. **Performance Optimization**
+   - Use matrix exclusions to reduce CI time
+   - Cache dependencies where possible
+   - Optimize test execution order
+
+## Environment Variables
+
+The TCK supports several environment variables for configuration:
+
+- `TCK_DEBUG` - Enable debug mode (true/false)
+- `TCK_PERFORMANCE_MODE` - Enable performance monitoring (true/false)
+- `TCK_OUTPUT_FORMAT` - Default output format (json/xml/junit)
+- `TCK_TIMEOUT` - Test timeout in seconds (default: 300)
+- `TCK_CI_MODE` - Enable CI mode optimizations (true/false)
+
+## File Structure
+
+```
+tck/
+├── run-tck.sh                 # Main test runner (Unix/Linux/macOS)
+├── run-tck.ps1                # PowerShell runner (Windows)
+├── validate-setup.py          # Setup validation script
+├── pre-commit-check.sh         # Pre-commit validation
+├── tck-tests.json             # Test specifications
+├── tck-schema.json            # Result format schema
+├── python/                    # Python TCK implementation
+│   ├── run-tck.sh
+│   └── python_tck.py
+├── csharp/                    # C# TCK implementation
+│   ├── run-tck.sh
+│   ├── CSharpTCK.cs
+│   └── CSharpTCK.csproj
+├── interface/                 # Optional shared interfaces
+│   └── tck_interface.py
+├── testdata/                  # Shared test data
+│   ├── basic-parsing.prompty
+│   ├── complex-template.prompty
+│   └── ...
+├── expected/                  # Expected results
+├── results/                   # Generated test results
+├── reports/                   # Compatibility reports
+└── tools/                     # Analysis and comparison tools
+    ├── compare_runtimes.py
+    └── check_compatibility_threshold.py
+```
+
+## Related Documentation
+
+- [`IMPLEMENTATION.md`](IMPLEMENTATION.md) - Detailed implementation guide
+- [`INTERFACE-SIMPLIFICATION.md`](INTERFACE-SIMPLIFICATION.md) - Interface design changes
+- [`TCK_COMPATIBILITY_ANALYSIS.md`](TCK_COMPATIBILITY_ANALYSIS.md) - Compatibility analysis
+- [`WORKFLOW-SUMMARY.md`](WORKFLOW-SUMMARY.md) - GitHub Actions workflow details
+- [`.github/workflows/tck.yml`](../.github/workflows/tck.yml) - CI/CD workflow configuration
+
+The GitHub Actions workflow provides comprehensive automation for maintaining runtime compatibility and catching regressions early in the development cycle.
diff --git a/tck/csharp/CSharpTCK.cs b/tck/csharp/CSharpTCK.cs
new file mode 100644
index 00000000..349cb59f
--- /dev/null
+++ b/tck/csharp/CSharpTCK.cs
@@ -0,0 +1,480 @@
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Linq;
+using System.Text.Json;
+using Newtonsoft.Json;
+using Prompty.Core;
+
+namespace Prompty.TCK
+{
+    public class CSharpTCK
+    {
+        private readonly string tckRootPath;
+        
+        public CSharpTCK()
+        {
+            // Get the parent directory of the current directory (which is csharp/)
+            // to find the TCK root directory
+            tckRootPath = Directory.GetParent(Directory.GetCurrentDirectory())?.FullName ?? Directory.GetCurrentDirectory();
+            
+            // Initialize the Prompty Core library
+            InvokerFactory.AutoDiscovery();
+        }
+        
+        public static void Main(string[] args)
+        {
+            if (args.Length < 2)
+            {
+                Console.Error.WriteLine("Usage: CSharpTCK <test-file> <output-file>");
+                Environment.Exit(1);
+            }
+            
+            string testFile = args[0];
+            string outputFile = args[1];
+            
+            var tck = new CSharpTCK();
+            try
+            {
+                tck.RunTests(testFile, outputFile);
+            }
+            catch (Exception e)
+            {
+                Console.Error.WriteLine($"TCK execution failed: {e.Message}");
+                Console.Error.WriteLine(e.StackTrace);
+                Environment.Exit(1);
+            }
+        }
+        
+        public void RunTests(string testFile, string outputFile)
+        {
+            Console.WriteLine("C# Prompty TCK Starting...");
+            
+            // Read test definitions
+            string testContent = File.ReadAllText(testFile);
+            var testData = JsonConvert.DeserializeObject<Dictionary<string, object>>(testContent);
+            
+            if (testData == null || !testData.ContainsKey("tests"))
+            {
+                throw new InvalidOperationException("Invalid test file format");
+            }
+            
+            var tests = JsonConvert.DeserializeObject<List<Dictionary<string, object>>>(testData["tests"].ToString());
+            if (tests == null)
+            {
+                throw new InvalidOperationException("No tests found in test file");
+            }
+            
+            var results = new List<Dictionary<string, object>>();
+            
+            foreach (var test in tests)
+            {
+                string testId = test.GetValueOrDefault("id", "").ToString();
+                Console.WriteLine($"Running test: {testId}");
+                
+                var result = RunSingleTest(test);
+                results.Add(result);
+            }
+            
+            // Create output metadata using the Prompty.Core library approach
+            var output = new Dictionary<string, object>
+            {
+                ["runtime"] = "csharp",
+                ["timestamp"] = DateTime.UtcNow.ToString("yyyy-MM-ddTHH:mm:ssZ"),
+                ["version"] = "1.0",
+                ["total_tests"] = results.Count,
+                ["results"] = results
+            };
+            
+            // Save results
+            Directory.CreateDirectory(Path.GetDirectoryName(outputFile) ?? ".");
+            string json = JsonConvert.SerializeObject(output, Formatting.Indented);
+            File.WriteAllText(outputFile, json);
+            
+            Console.WriteLine("C# Prompty TCK Completed");
+        }
+        
+        private Dictionary<string, object> RunSingleTest(Dictionary<string, object> test)
+        {
+            var result = new Dictionary<string, object>
+            {
+                ["test_id"] = test.GetValueOrDefault("id", ""),
+                ["test_type"] = DetermineTestType(test),
+                ["runtime"] = "csharp"
+            };
+            
+            var startTime = DateTime.UtcNow;
+            
+            try
+            {
+                string testType = DetermineTestType(test);
+                
+                switch (testType.ToLower())
+                {
+                    case "parse":
+                        result = RunParseTest(test, result);
+                        break;
+                    case "render":
+                        result = RunRenderTest(test, result);
+                        break;
+                    case "execute":
+                        result = RunExecuteTest(test, result);
+                        break;
+                    default:
+                        result["status"] = "skip";
+                        result["message"] = $"Unknown test type: {testType}";
+                        break;
+                }
+            }
+            catch (Exception e)
+            {
+                result["status"] = "error";
+                result["error"] = e.Message;
+                result["error_type"] = e.GetType().Name;
+            }
+            
+            var endTime = DateTime.UtcNow;
+            result["execution_time_ms"] = (endTime - startTime).TotalMilliseconds;
+            
+            return result;
+        }
+        
+        private string DetermineTestType(Dictionary<string, object> test)
+        {
+            // If explicit type is specified, use it
+            if (test.ContainsKey("type") && !string.IsNullOrEmpty(test["type"]?.ToString()))
+            {
+                return test["type"].ToString()!;
+            }
+            
+            // Infer test type from other fields
+            if (test.ContainsKey("expected_parsing"))
+            {
+                return "parse";
+            }
+            else if (test.ContainsKey("expected_rendering") || test.ContainsKey("input_data"))
+            {
+                return "render";
+            }
+            else if (test.ContainsKey("expected_execution"))
+            {
+                return "execute";
+            }
+            
+            // Default to parse if we can't determine
+            return "parse";
+        }
+        
+        private Dictionary<string, object> RunParseTest(Dictionary<string, object> test, Dictionary<string, object> result)
+        {
+            string promptyFile = test.GetValueOrDefault("prompty_file", "").ToString() ?? "";
+            string expectedFile = test.GetValueOrDefault("expected_parsing", test.GetValueOrDefault("expected_file", "")).ToString() ?? "";
+            
+            // Resolve paths relative to TCK root
+            promptyFile = ResolveTckPath(promptyFile);
+            expectedFile = ResolveTckPath(expectedFile);
+            
+            // Use Prompty.Core library to load and parse the prompty file
+            var prompty = Prompty.Core.Prompty.Load(promptyFile);
+            
+            // Convert to a dictionary format similar to the Python implementation
+            var parsed = ConvertPromptyToDict(prompty);
+            
+            // Load expected results if available
+            if (!string.IsNullOrEmpty(expectedFile) && File.Exists(expectedFile))
+            {
+                var expected = LoadExpectedResults(expectedFile);
+                bool matches = CompareResults(parsed, expected);
+                
+                result["status"] = matches ? "pass" : "fail";
+                result["actual"] = parsed;
+                result["expected"] = expected;
+                
+                if (!matches)
+                {
+                    result["differences"] = FindDifferences(expected, parsed);
+                }
+            }
+            else
+            {
+                result["status"] = "pass";
+                result["actual"] = parsed;
+                result["message"] = "No expected results file found";
+            }
+            
+            return result;
+        }
+        
+        private Dictionary<string, object> RunRenderTest(Dictionary<string, object> test, Dictionary<string, object> result)
+        {
+            string promptyFile = test.GetValueOrDefault("prompty_file", "").ToString() ?? "";
+            string expectedFile = test.GetValueOrDefault("expected_rendering", "").ToString() ?? "";
+            
+            // Resolve paths relative to TCK root
+            promptyFile = ResolveTckPath(promptyFile);
+            expectedFile = ResolveTckPath(expectedFile);
+            
+            // Use Prompty.Core library to load the prompty file
+            var prompty = Prompty.Core.Prompty.Load(promptyFile);
+            
+            // Get inputs from test data - check both "input_data" and "inputs"
+            var inputs = test.GetValueOrDefault("input_data", test.GetValueOrDefault("inputs", new Dictionary<string, object>())) as Dictionary<string, object> ?? new();
+            
+            try
+            {
+                // Use Prompty.Core to render the template
+                var rendered = prompty.Prepare(inputs);
+                
+                // Load expected results if available
+                if (!string.IsNullOrEmpty(expectedFile) && File.Exists(expectedFile))
+                {
+                    var expectedContent = File.ReadAllText(expectedFile);
+                    bool matches = rendered?.ToString()?.Trim() == expectedContent.Trim();
+                    
+                    result["status"] = matches ? "pass" : "fail";
+                    result["actual"] = rendered?.ToString() ?? "";
+                    result["expected"] = expectedContent;
+                    
+                    if (!matches)
+                    {
+                        result["differences"] = new Dictionary<string, object>
+                        {
+                            ["actual_length"] = rendered?.ToString()?.Length ?? 0,
+                            ["expected_length"] = expectedContent.Length,
+                            ["content_match"] = false
+                        };
+                    }
+                }
+                else
+                {
+                    result["status"] = "pass";
+                    result["actual"] = rendered?.ToString() ?? "";
+                    result["message"] = "No expected results file found";
+                }
+            }
+            catch (Exception e)
+            {
+                result["status"] = "error";
+                result["error"] = e.Message;
+                result["error_type"] = e.GetType().Name;
+            }
+            
+            return result;
+        }
+        
+        private Dictionary<string, object> RunExecuteTest(Dictionary<string, object> test, Dictionary<string, object> result)
+        {
+            string promptyFile = test.GetValueOrDefault("prompty_file", "").ToString() ?? "";
+            string expectedFile = test.GetValueOrDefault("expected_execution", test.GetValueOrDefault("expected_file", "")).ToString() ?? "";
+            
+            // Resolve paths relative to TCK root
+            promptyFile = ResolveTckPath(promptyFile);
+            expectedFile = ResolveTckPath(expectedFile);
+            
+            // Use Prompty.Core library to load the prompty file
+            var prompty = Prompty.Core.Prompty.Load(promptyFile);
+            
+            // Get inputs from test data - check both "input_data" and "inputs"
+            var inputs = test.GetValueOrDefault("input_data", test.GetValueOrDefault("inputs", new Dictionary<string, object>())) as Dictionary<string, object> ?? new();
+            
+            try
+            {
+                // For TCK purposes, we'll simulate execution since we don't have real AI endpoints
+                // This follows the same pattern as the Python TCK
+                var executed = prompty.Prepare(inputs);
+                var simulatedResponse = $"Simulated response for: {executed}";
+                
+                result["status"] = "pass";
+                result["actual"] = simulatedResponse;
+                result["message"] = "Execution simulated (no real AI endpoint)";
+                
+                // If expected file exists, compare with it
+                if (!string.IsNullOrEmpty(expectedFile) && File.Exists(expectedFile))
+                {
+                    var expectedContent = File.ReadAllText(expectedFile);
+                    result["expected"] = expectedContent;
+                    result["differences"] = new Dictionary<string, object>
+                    {
+                        ["note"] = "Execution test with simulated response",
+                        ["actual_type"] = "simulated",
+                        ["expected_type"] = "file_content"
+                    };
+                }
+            }
+            catch (Exception e)
+            {
+                result["status"] = "error";
+                result["error"] = e.Message;
+                result["error_type"] = e.GetType().Name;
+            }
+            
+            return result;
+        }
+        
+        private Dictionary<string, object> ConvertPromptyToDict(Prompty.Core.Prompty prompty)
+        {
+            var result = new Dictionary<string, object>();
+            
+            // Add content
+            result["content"] = prompty.Content?.ToString() ?? "";
+            
+            // Add model information
+            if (prompty.Model != null)
+            {
+                var modelDict = new Dictionary<string, object>
+                {
+                    ["api"] = prompty.Model.Api ?? "",
+                };
+                
+                if (prompty.Model.Connection != null)
+                {
+                    modelDict["configuration"] = prompty.Model.Connection.ExtensionData ?? new Dictionary<string, object>();
+                }
+                
+                if (prompty.Model.Options != null)
+                {
+                    modelDict["parameters"] = prompty.Model.Options;
+                }
+                
+                result["model"] = modelDict;
+            }
+            
+            // Add inputs
+            if (prompty.Inputs != null && prompty.Inputs.Any())
+            {
+                var inputsDict = new Dictionary<string, object>();
+                foreach (var input in prompty.Inputs)
+                {
+                    var inputDict = new Dictionary<string, object>
+                    {
+                        ["type"] = input.Value.Type?.ToString().ToLower() ?? "string",
+                        ["required"] = input.Value.Required
+                    };
+                    
+                    if (!string.IsNullOrEmpty(input.Value.Description))
+                        inputDict["description"] = input.Value.Description;
+                    
+                    if (input.Value.Default != null)
+                        inputDict["default"] = input.Value.Default;
+                    
+                    if (input.Value.Sample != null)
+                        inputDict["sample"] = input.Value.Sample;
+                    
+                    inputsDict[input.Key] = inputDict;
+                }
+                result["inputs"] = inputsDict;
+            }
+            
+            // Add outputs
+            if (prompty.Outputs != null && prompty.Outputs.Any())
+            {
+                var outputsDict = new Dictionary<string, object>();
+                foreach (var output in prompty.Outputs)
+                {
+                    var outputDict = new Dictionary<string, object>
+                    {
+                        ["type"] = output.Value.Type?.ToString().ToLower() ?? "string"
+                    };
+                    
+                    if (!string.IsNullOrEmpty(output.Value.Description))
+                        outputDict["description"] = output.Value.Description;
+                    
+                    outputsDict[output.Key] = outputDict;
+                }
+                result["outputs"] = outputsDict;
+            }
+            else
+            {
+                result["outputs"] = new Dictionary<string, object>();
+            }
+            
+            // Add sample data (create from inputs)
+            if (prompty.Inputs != null && prompty.Inputs.Any())
+            {
+                var sample = prompty.GetSample();
+                if (sample.Any())
+                {
+                    result["sample"] = sample;
+                }
+            }
+            
+            // Add template information
+            if (prompty.Template != null)
+            {
+                result["template"] = new Dictionary<string, object>
+                {
+                    ["format"] = prompty.Template.Format ?? "",
+                    ["parser"] = prompty.Template.Parser ?? ""
+                };
+            }
+            
+            // Add other properties
+            if (!string.IsNullOrEmpty(prompty.Name))
+                result["name"] = prompty.Name;
+                
+            if (!string.IsNullOrEmpty(prompty.Description))
+                result["description"] = prompty.Description;
+                
+            if (!string.IsNullOrEmpty(prompty.Version))
+                result["version"] = prompty.Version;
+                
+            if (prompty.Metadata?.Authors != null && prompty.Metadata.Authors.Any())
+                result["authors"] = prompty.Metadata.Authors.ToList();
+                
+            if (prompty.Metadata?.Tags != null && prompty.Metadata.Tags.Any())
+                result["tags"] = prompty.Metadata.Tags.ToList();
+            
+            return result;
+        }
+        
+        private string ResolveTckPath(string relativePath)
+        {
+            if (string.IsNullOrEmpty(relativePath)) return "";
+            
+            if (Path.IsPathRooted(relativePath))
+                return relativePath;
+            
+            return Path.Combine(tckRootPath, relativePath);
+        }
+        
+        private Dictionary<string, object> LoadExpectedResults(string filePath)
+        {
+            string content = File.ReadAllText(filePath);
+            return JsonConvert.DeserializeObject<Dictionary<string, object>>(content) ?? new Dictionary<string, object>();
+        }
+        
+        private bool CompareResults(Dictionary<string, object> actual, Dictionary<string, object> expected)
+        {
+            return JsonConvert.SerializeObject(actual) == JsonConvert.SerializeObject(expected);
+        }
+        
+        private Dictionary<string, object> FindDifferences(Dictionary<string, object> expected, Dictionary<string, object> actual)
+        {
+            var differences = new Dictionary<string, object>();
+            
+            // Find keys in expected but not in actual
+            foreach (var key in expected.Keys)
+            {
+                if (!actual.ContainsKey(key))
+                {
+                    differences[$"missing_key at `{key}`"] = $"expected={expected[key]} vs actual=None";
+                }
+                else if (!Equals(expected[key], actual[key]))
+                {
+                    differences[$"value at `{key}`"] = $"expected={expected[key]} vs actual={actual[key]}";
+                }
+            }
+            
+            // Find keys in actual but not in expected
+            foreach (var key in actual.Keys)
+            {
+                if (!expected.ContainsKey(key))
+                {
+                    differences[$"extra_key at `{key}`"] = $"expected=None vs actual={actual[key]}";
+                }
+            }
+            
+            return differences;
+        }
+    }
+}
diff --git a/tck/csharp/CSharpTCK.csproj b/tck/csharp/CSharpTCK.csproj
new file mode 100644
index 00000000..37eab5bd
--- /dev/null
+++ b/tck/csharp/CSharpTCK.csproj
@@ -0,0 +1,18 @@
+<Project Sdk="Microsoft.NET.Sdk">
+
+  <PropertyGroup>
+    <OutputType>Exe</OutputType>
+    <TargetFramework>net9.0</TargetFramework>
+    <Nullable>enable</Nullable>
+    <StartupObject>Prompty.TCK.CSharpTCK</StartupObject>
+  </PropertyGroup>
+
+  <ItemGroup>
+    <PackageReference Include="Newtonsoft.Json" Version="13.0.3" />
+  </ItemGroup>
+
+  <ItemGroup>
+    <ProjectReference Include="../../runtime/promptycs/Prompty.Core/Prompty.Core.csproj" />
+  </ItemGroup>
+
+</Project>
diff --git a/tck/csharp/run-tck.sh b/tck/csharp/run-tck.sh
new file mode 100755
index 00000000..04c05924
--- /dev/null
+++ b/tck/csharp/run-tck.sh
@@ -0,0 +1,143 @@
+#!/bin/bash
+
+# C# TCK Runner
+# This script runs TCK tests for the C# runtime implementation
+
+set -e
+
+# Get the directory of this script
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+TCK_ROOT="$(dirname "$SCRIPT_DIR")"
+
+# Configuration
+CSHARP_TCK="$SCRIPT_DIR/CSharpTCK.csproj"
+TEST_FILE="$TCK_ROOT/tck-tests.json"
+
+# Colors for output
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+BLUE='\033[0;34m'
+NC='\033[0m' # No Color
+
+# Function to print colored output
+print_status() {
+    echo -e "${BLUE}[INFO]${NC} $1"
+}
+
+print_success() {
+    echo -e "${GREEN}[SUCCESS]${NC} $1"
+}
+
+print_warning() {
+    echo -e "${YELLOW}[WARNING]${NC} $1"
+}
+
+print_error() {
+    echo -e "${RED}[ERROR]${NC} $1"
+}
+
+# Function to check if a command exists
+command_exists() {
+    command -v "$1" >/dev/null 2>&1
+}
+
+# Function to run C# TCK
+run_csharp_tck() {
+    print_status "Running C# TCK..."
+    
+    if ! command_exists dotnet; then
+        print_error ".NET SDK not found."
+        return 1
+    fi
+    
+    local output_file="$1"
+    if [ -z "$output_file" ]; then
+        output_file="$TCK_ROOT/results/csharp-results.json"
+    fi
+    
+    # Ensure output directory exists
+    mkdir -p "$(dirname "$output_file")"
+    
+    # Set environment variables for tests
+    export AZURE_OPENAI_ENDPOINT="https://test.openai.azure.com"
+    export AZURE_OPENAI_DEPLOYMENT="gpt-4"
+    export MAX_TOKENS="200"
+    
+    cd "$SCRIPT_DIR"
+    
+    # Build the project
+    print_status "Building C# TCK project..."
+    if ! dotnet build "$CSHARP_TCK" -q; then
+        print_error "Failed to build C# TCK project"
+        return 1
+    fi
+    
+    # Run the tests
+    if dotnet run --project "$CSHARP_TCK" -- "$TEST_FILE" "$output_file"; then
+        print_success "C# TCK completed successfully"
+        return 0
+    else
+        print_error "C# TCK failed"
+        return 1
+    fi
+}
+
+# Function to display help
+show_help() {
+    cat << EOF
+C# TCK Runner
+
+Usage: $0 [OUTPUT_FILE]
+
+ARGUMENTS:
+    OUTPUT_FILE    Optional path to output results file
+                   (default: ../results/csharp-results.json)
+
+EXAMPLES:
+    $0                              # Run with default output
+    $0 custom-results.json          # Run with custom output file
+    $0 /path/to/results.json        # Run with absolute path
+
+ENVIRONMENT VARIABLES:
+    TCK_DEBUG               Enable debug mode (true/false)
+    AZURE_OPENAI_ENDPOINT   Override OpenAI endpoint for tests
+    AZURE_OPENAI_DEPLOYMENT Override OpenAI deployment name
+    MAX_TOKENS             Override max tokens setting
+
+REQUIREMENTS:
+    - .NET SDK 9.0 or later
+    - All package dependencies (automatically restored)
+
+EOF
+}
+
+# Parse command line arguments
+if [[ $# -gt 1 ]]; then
+    print_error "Too many arguments"
+    show_help
+    exit 1
+fi
+
+if [[ $# -eq 1 ]]; then
+    if [[ "$1" == "--help" || "$1" == "-h" ]]; then
+        show_help
+        exit 0
+    fi
+    OUTPUT_FILE="$1"
+else
+    OUTPUT_FILE=""
+fi
+
+# Set debug mode if requested
+if [ "$TCK_DEBUG" = "true" ]; then
+    set -x
+fi
+
+# Main execution
+main() {
+    run_csharp_tck "$OUTPUT_FILE"
+}
+
+# Run main function
+main
diff --git a/tck/expected/basic.prompty.parsed.json b/tck/expected/basic.prompty.parsed.json
new file mode 100644
index 00000000..967755df
--- /dev/null
+++ b/tck/expected/basic.prompty.parsed.json
@@ -0,0 +1,48 @@
+{
+  "metadata": {
+    "name": "Basic Compatibility Test",
+    "description": "Simple test for basic functionality",
+    "version": "1.0",
+    "authors": ["tck-team"],
+    "tags": ["basic", "compatibility"]
+  },
+  "model": {
+    "api": "chat",
+    "configuration": {
+      "type": "openai",
+      "model": "gpt-3.5-turbo"
+    },
+    "parameters": {
+      "max_tokens": 100,
+      "temperature": 0.0
+    },
+    "response": "first"
+  },
+  "inputs": {
+    "name": {
+      "type": "string",
+      "description": "User's name",
+      "required": true
+    },
+    "age": {
+      "type": "number", 
+      "description": "User's age",
+      "default": 25
+    },
+    "question": {
+      "type": "string",
+      "description": "Question to ask",
+      "required": true
+    }
+  },
+  "sample": {
+    "name": "Alice",
+    "age": 30,
+    "question": "What is the meaning of life?"
+  },
+  "template": {
+    "format": "jinja2",
+    "parser": "prompty"
+  },
+  "content": "system:\nYou are a helpful assistant. Answer questions for {{name}} who is {{age}} years old.\n\nuser:\n{{question}}"
+}
diff --git a/tck/expected/basic.prompty.rendered.json b/tck/expected/basic.prompty.rendered.json
new file mode 100644
index 00000000..4a58ddaa
--- /dev/null
+++ b/tck/expected/basic.prompty.rendered.json
@@ -0,0 +1,10 @@
+[
+  {
+    "role": "system",
+    "content": "You are a helpful assistant. Answer questions for Alice who is 30 years old."
+  },
+  {
+    "role": "user", 
+    "content": "What is the meaning of life?"
+  }
+]
diff --git a/tck/expected/complex-template.prompty.parsed.json b/tck/expected/complex-template.prompty.parsed.json
new file mode 100644
index 00000000..c97308ea
--- /dev/null
+++ b/tck/expected/complex-template.prompty.parsed.json
@@ -0,0 +1,36 @@
+{
+  "name": "complex-template",
+  "description": "Complex template with loops and conditionals",
+  "version": "1.0",
+  "model": {
+    "api": "openai",
+    "configuration": {
+      "type": "azure_openai",
+      "azure_endpoint": "https://api.openai.com/v1"
+    },
+    "parameters": {
+      "model": "gpt-4",
+      "max_tokens": 500,
+      "temperature": 0.7
+    }
+  },
+  "inputs": {
+    "items": {
+      "type": "array",
+      "description": "List of items to process"
+    },
+    "include_details": {
+      "type": "boolean",
+      "description": "Whether to include detailed information"
+    },
+    "user_name": {
+      "type": "string",
+      "description": "Name of the user"
+    }
+  },
+  "template": {
+    "type": "jinja2",
+    "parser": "prompty"
+  },
+  "content": "Hello {{user_name}}!\n\n{% if include_details %}\nHere are the detailed items:\n{% for item in items %}\n- Item {{loop.index}}: {{item.name}} ({{item.category}})\n  Description: {{item.description}}\n  Price: ${{item.price}}\n{% endfor %}\n{% else %}\nItem summary:\n{% for item in items %}\n- {{item.name}}: ${{item.price}}\n{% endfor %}\n{% endif %}\n\nTotal items: {{items|length}}\n"
+}
diff --git a/tck/expected/complex-template.prompty.rendered.json b/tck/expected/complex-template.prompty.rendered.json
new file mode 100644
index 00000000..e3b87880
--- /dev/null
+++ b/tck/expected/complex-template.prompty.rendered.json
@@ -0,0 +1,21 @@
+{
+  "inputs": {
+    "user_name": "Alice",
+    "include_details": true,
+    "items": [
+      {
+        "name": "Laptop",
+        "category": "Electronics",
+        "description": "High-performance laptop for developers",
+        "price": 1299.99
+      },
+      {
+        "name": "Mouse",
+        "category": "Accessories", 
+        "description": "Ergonomic wireless mouse",
+        "price": 49.99
+      }
+    ]
+  },
+  "expected_content": "Hello Alice!\n\nHere are the detailed items:\n- Item 1: Laptop (Electronics)\n  Description: High-performance laptop for developers\n  Price: $1299.99\n- Item 2: Mouse (Accessories)\n  Description: Ergonomic wireless mouse\n  Price: $49.99\n\nTotal items: 2\n"
+}
diff --git a/tck/expected/conditional-template.prompty.rendered.json b/tck/expected/conditional-template.prompty.rendered.json
new file mode 100644
index 00000000..d65b9ff1
--- /dev/null
+++ b/tck/expected/conditional-template.prompty.rendered.json
@@ -0,0 +1,8 @@
+{
+  "inputs": {
+    "user_name": "Bob",
+    "show_details": false,
+    "items": ["Item 1", "Item 2", "Item 3"]
+  },
+  "expected_content": "Hello Bob!\n\nSimple view: 3 items\n"
+}
diff --git a/tck/expected/env-vars.prompty.parsed.json b/tck/expected/env-vars.prompty.parsed.json
new file mode 100644
index 00000000..94916af7
--- /dev/null
+++ b/tck/expected/env-vars.prompty.parsed.json
@@ -0,0 +1,29 @@
+{
+  "name": "env-vars",
+  "description": "Test environment variable handling",
+  "version": "1.0",
+  "model": {
+    "api": "openai",
+    "configuration": {
+      "type": "azure_openai",
+      "azure_endpoint": "${env:AZURE_OPENAI_ENDPOINT}",
+      "api_version": "2024-02-15-preview"
+    },
+    "parameters": {
+      "model": "gpt-35-turbo",
+      "max_tokens": 100,
+      "temperature": 0.2
+    }
+  },
+  "inputs": {
+    "question": {
+      "type": "string",
+      "description": "The user's question"
+    }
+  },
+  "template": {
+    "type": "jinja2",
+    "parser": "prompty"
+  },
+  "content": "Answer this question: {{question}}\n\nContext: This is running in environment with endpoint: ${env:AZURE_OPENAI_ENDPOINT}\n"
+}
diff --git a/tck/expected/env-vars.prompty.rendered.json b/tck/expected/env-vars.prompty.rendered.json
new file mode 100644
index 00000000..0ae1a825
--- /dev/null
+++ b/tck/expected/env-vars.prompty.rendered.json
@@ -0,0 +1,9 @@
+{
+  "inputs": {
+    "question": "What is the capital of France?"
+  },
+  "environment": {
+    "AZURE_OPENAI_ENDPOINT": "https://test-endpoint.openai.azure.com/"
+  },
+  "expected_content": "Answer this question: What is the capital of France?\n\nContext: This is running in environment with endpoint: https://test-endpoint.openai.azure.com/\n"
+}
diff --git a/tck/expected/function-calling.prompty.parsed.json b/tck/expected/function-calling.prompty.parsed.json
new file mode 100644
index 00000000..d3e639dd
--- /dev/null
+++ b/tck/expected/function-calling.prompty.parsed.json
@@ -0,0 +1,52 @@
+{
+  "name": "function-calling",
+  "description": "Test function calling capabilities",
+  "version": "1.0",
+  "model": {
+    "api": "openai",
+    "configuration": {
+      "type": "azure_openai",
+      "azure_endpoint": "https://api.openai.com/v1"
+    },
+    "parameters": {
+      "model": "gpt-4",
+      "max_tokens": 300,
+      "temperature": 0.1,
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "get_weather",
+            "description": "Get current weather information for a location",
+            "parameters": {
+              "type": "object",
+              "properties": {
+                "location": {
+                  "type": "string",
+                  "description": "The city and state, e.g. San Francisco, CA"
+                },
+                "unit": {
+                  "type": "string",
+                  "enum": ["celsius", "fahrenheit"],
+                  "description": "The temperature unit"
+                }
+              },
+              "required": ["location"]
+            }
+          }
+        }
+      ]
+    }
+  },
+  "inputs": {
+    "user_query": {
+      "type": "string",
+      "description": "User's weather query"
+    }
+  },
+  "template": {
+    "type": "jinja2",
+    "parser": "prompty"
+  },
+  "content": "User query: {{user_query}}\n\nPlease help the user with their weather request. You have access to a get_weather function that can provide current weather information for any location.\n"
+}
diff --git a/tck/expected/function-calling.prompty.rendered.json b/tck/expected/function-calling.prompty.rendered.json
new file mode 100644
index 00000000..ecec1eeb
--- /dev/null
+++ b/tck/expected/function-calling.prompty.rendered.json
@@ -0,0 +1,6 @@
+{
+  "inputs": {
+    "user_query": "What's the weather like in Seattle?"
+  },
+  "expected_content": "User query: What's the weather like in Seattle?\n\nPlease help the user with their weather request. You have access to a get_weather function that can provide current weather information for any location.\n"
+}
diff --git a/tck/expected/invalid-yaml.prompty.error.json b/tck/expected/invalid-yaml.prompty.error.json
new file mode 100644
index 00000000..24a8f7d0
--- /dev/null
+++ b/tck/expected/invalid-yaml.prompty.error.json
@@ -0,0 +1,10 @@
+{
+  "expected_error": "YAML parsing error",
+  "expected_error_type": "ParseError",
+  "error_message_contains": [
+    "yaml",
+    "invalid",
+    "parse"
+  ],
+  "should_fail": true
+}
diff --git a/tck/expected/missing-input.prompty.error.json b/tck/expected/missing-input.prompty.error.json
new file mode 100644
index 00000000..2c55d297
--- /dev/null
+++ b/tck/expected/missing-input.prompty.error.json
@@ -0,0 +1,10 @@
+{
+  "expected_error": "Required input missing",
+  "expected_error_type": "ValidationError",
+  "error_message_contains": [
+    "required",
+    "missing",
+    "required_field"
+  ],
+  "should_fail": true
+}
diff --git a/tck/interface/__pycache__/tck_interface.cpython-313.pyc b/tck/interface/__pycache__/tck_interface.cpython-313.pyc
new file mode 100644
index 00000000..37a2c5a6
Binary files /dev/null and b/tck/interface/__pycache__/tck_interface.cpython-313.pyc differ
diff --git a/tck/interface/tck_interface.py b/tck/interface/tck_interface.py
new file mode 100644
index 00000000..d85a7502
--- /dev/null
+++ b/tck/interface/tck_interface.py
@@ -0,0 +1,223 @@
+"""
+Prompty Test Compatibility Kit (TCK) Interface
+
+This module defines the common interface that all runtime implementations
+must implement to participate in the TCK.
+"""
+
+from abc import ABC, abstractmethod
+from typing import Any, Dict, List, Optional, Union
+from dataclasses import dataclass
+from enum import Enum
+import json
+
+
+class TestResult(Enum):
+    PASS = "pass"
+    FAIL = "fail"
+    SKIP = "skip"
+    ERROR = "error"
+
+
+@dataclass
+class TCKTestResult:
+    test_id: str
+    result: TestResult
+    runtime: str
+    execution_time_ms: float
+    output: Optional[Any] = None
+    error_message: Optional[str] = None
+    error_type: Optional[str] = None
+    metadata: Optional[Dict[str, Any]] = None
+
+
+@dataclass
+class TCKComparisonResult:
+    test_id: str
+    runtimes: List[str]
+    compatible: bool
+    differences: List[Dict[str, Any]]
+    notes: Optional[str] = None
+
+
+class TCKRuntimeInterface(ABC):
+    """
+    Interface that each Prompty runtime must implement for TCK testing.
+    """
+    
+    @property
+    @abstractmethod
+    def runtime_name(self) -> str:
+        """Return the name of this runtime (e.g., 'python', 'csharp', 'java')."""
+        pass
+    
+    @property
+    @abstractmethod
+    def runtime_version(self) -> str:
+        """Return the version of this runtime implementation."""
+        pass
+    
+    @abstractmethod
+    def parse_prompty(self, prompty_content: str, global_config: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
+        """
+        Parse a .prompty file content and return the structured representation.
+        
+        Args:
+            prompty_content: Raw content of the .prompty file
+            global_config: Optional global configuration
+            
+        Returns:
+            Dictionary containing the parsed prompty structure
+            
+        Raises:
+            Any parsing errors should be raised as exceptions
+        """
+        pass
+    
+    @abstractmethod
+    def render_template(self, prompty_content: str, inputs: Dict[str, Any], 
+                       global_config: Optional[Dict[str, Any]] = None) -> List[Dict[str, Any]]:
+        """
+        Render a prompty template with the given inputs.
+        
+        Args:
+            prompty_content: Raw content of the .prompty file
+            inputs: Input variables for template rendering
+            global_config: Optional global configuration
+            
+        Returns:
+            List of rendered messages (role/content pairs)
+            
+        Raises:
+            Any rendering errors should be raised as exceptions
+        """
+        pass
+    
+    @abstractmethod
+    def validate_inputs(self, prompty_content: str, inputs: Dict[str, Any]) -> List[str]:
+        """
+        Validate inputs against the prompty specification.
+        
+        Args:
+            prompty_content: Raw content of the .prompty file
+            inputs: Input variables to validate
+            
+        Returns:
+            List of validation error messages (empty if valid)
+        """
+        pass
+    
+    @abstractmethod
+    def get_sample_data(self, prompty_content: str) -> Dict[str, Any]:
+        """
+        Extract sample data from the prompty file.
+        
+        Args:
+            prompty_content: Raw content of the .prompty file
+            
+        Returns:
+            Dictionary containing sample data
+        """
+        pass
+    
+    def normalize_output(self, output: Any) -> Any:
+        """
+        Normalize output for cross-runtime comparison.
+        Override this method if runtime-specific normalization is needed.
+        
+        Args:
+            output: Output to normalize
+            
+        Returns:
+            Normalized output
+        """
+        return output
+
+
+class TCKTestRunner:
+    """
+    Test runner that executes TCK tests against runtime implementations.
+    """
+    
+    def __init__(self, runtimes: List[TCKRuntimeInterface]):
+        self.runtimes = {runtime.runtime_name: runtime for runtime in runtimes}
+    
+    def run_test(self, test_spec: Dict[str, Any], runtime_name: str) -> TCKTestResult:
+        """
+        Run a single test against a specific runtime.
+        
+        Args:
+            test_spec: Test specification from tck-tests.json
+            runtime_name: Name of the runtime to test
+            
+        Returns:
+            Test result
+        """
+        # Implementation would go here
+        pass
+    
+    def run_all_tests(self, test_specs: List[Dict[str, Any]], 
+                     runtime_names: Optional[List[str]] = None) -> List[TCKTestResult]:
+        """
+        Run all tests against specified runtimes.
+        
+        Args:
+            test_specs: List of test specifications
+            runtime_names: Optional list of runtime names to test (defaults to all)
+            
+        Returns:
+            List of test results
+        """
+        # Implementation would go here
+        pass
+    
+    def compare_runtimes(self, test_specs: List[Dict[str, Any]], 
+                        runtime_names: List[str]) -> List[TCKComparisonResult]:
+        """
+        Compare outputs between different runtimes for compatibility verification.
+        
+        Args:
+            test_specs: List of test specifications
+            runtime_names: List of runtime names to compare
+            
+        Returns:
+            List of comparison results
+        """
+        # Implementation would go here
+        pass
+
+
+def normalize_for_comparison(data: Any) -> Any:
+    """
+    Normalize data structures for cross-runtime comparison.
+    
+    This function handles differences in how different languages/runtimes
+    represent similar data structures (e.g., ordering, null vs None, etc.)
+    """
+    if isinstance(data, dict):
+        # Sort keys for consistent ordering
+        return {k: normalize_for_comparison(v) for k, v in sorted(data.items())}
+    elif isinstance(data, list):
+        return [normalize_for_comparison(item) for item in data]
+    elif data is None:
+        return None
+    elif isinstance(data, (int, float, str, bool)):
+        return data
+    else:
+        # Convert other types to string representation
+        return str(data)
+
+
+def load_test_specifications(file_path: str) -> List[Dict[str, Any]]:
+    """
+    Load test specifications from a JSON file.
+    
+    Args:
+        file_path: Path to the tck-tests.json file
+        
+    Returns:
+        List of test specifications
+    """
+    with open(file_path, 'r') as f:
+        spec = json.load(f)
+    return spec['tests']
diff --git a/tck/pre-commit-check.sh b/tck/pre-commit-check.sh
new file mode 100755
index 00000000..2a2a8fe5
--- /dev/null
+++ b/tck/pre-commit-check.sh
@@ -0,0 +1,68 @@
+#!/bin/bash
+
+# Pre-commit TCK validation script
+# Run this before committing changes that affect the TCK
+
+set -e
+
+echo "🔍 Pre-commit TCK validation"
+echo "============================="
+
+# Check if we're in the right directory
+if [ ! -f "run-tck.sh" ]; then
+    echo "❌ Please run this script from the tck/ directory"
+    exit 1
+fi
+
+# Step 1: Validate setup
+echo "1️⃣ Validating TCK setup..."
+python validate-setup.py
+if [ $? -ne 0 ]; then
+    echo "❌ Setup validation failed"
+    exit 1
+fi
+
+# Step 2: Run quick TCK test
+echo ""
+echo "2️⃣ Running quick TCK validation..."
+./run-tck.sh --runtime python
+if [ $? -ne 0 ]; then
+    echo "❌ Python TCK failed"
+    exit 1
+fi
+
+./run-tck.sh --runtime csharp
+if [ $? -ne 0 ]; then
+    echo "❌ C# TCK failed" 
+    exit 1
+fi
+
+# Step 3: Generate compatibility report
+echo ""
+echo "3️⃣ Generating compatibility report..."
+./run-tck.sh > /dev/null 2>&1
+if [ $? -ne 0 ]; then
+    echo "❌ Failed to generate compatibility report"
+    exit 1
+fi
+
+# Step 4: Check compatibility threshold
+echo ""
+echo "4️⃣ Checking compatibility threshold..."
+python tools/check_compatibility_threshold.py reports/compatibility-report.json --threshold 60
+if [ $? -ne 0 ]; then
+    echo "⚠️  Compatibility below threshold - please review changes"
+    echo "   Review: reports/compatibility-report.md"
+    # Don't exit with error - just warn
+fi
+
+echo ""
+echo "✅ Pre-commit validation complete!"
+echo ""
+echo "📋 Summary:"
+echo "   - TCK setup: ✅ Valid"
+echo "   - Python runtime: ✅ Working"
+echo "   - C# runtime: ✅ Working"
+echo "   - Compatibility: ✅ Generated"
+echo ""
+echo "🚀 Ready to commit! The GitHub Actions workflow will run automatically."
diff --git a/tck/python/python_tck.py b/tck/python/python_tck.py
new file mode 100644
index 00000000..162d8021
--- /dev/null
+++ b/tck/python/python_tck.py
@@ -0,0 +1,282 @@
+"""
+Python implementation of the Prompty TCK.
+"""
+
+import json
+import os
+import sys
+import time
+from typing import Any, Dict, List, Optional
+
+# Add the prompty runtime to path
+sys.path.append(os.path.join(os.path.dirname(__file__), '../../runtime/prompty'))
+
+import prompty
+from prompty.utils import parse
+
+
+class PythonPromptyTCK:
+    """Python implementation of Prompty TCK."""
+    
+    @property
+    def runtime_name(self) -> str:
+        return "python"
+    
+    @property
+    def runtime_version(self) -> str:
+        # Get version from prompty package if available
+        try:
+            import prompty
+            return getattr(prompty, '__version__', '1.0.0')
+        except:
+            return "1.0.0"
+    
+    def parse_prompty(self, prompty_content: str, global_config: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
+        """Parse prompty content using Python implementation."""
+        try:
+            # Use the prompty.parse function
+            parsed = parse(prompty_content)
+            
+            # Normalize the structure to match expected format
+            result = {
+                "frontmatter": parsed.get("attributes", {}),
+                "content": parsed.get("body", ""),
+                "raw_frontmatter": parsed.get("frontmatter", "")
+            }
+            
+            # Extract standard fields
+            attrs = parsed.get("attributes", {})
+            if attrs:
+                result.update({
+                    "metadata": {
+                        "name": attrs.get("name"),
+                        "description": attrs.get("description"),
+                        "version": attrs.get("version"),
+                        "authors": attrs.get("authors", []),
+                        "tags": attrs.get("tags", [])
+                    },
+                    "model": attrs.get("model", {}),
+                    "inputs": attrs.get("inputs", {}),
+                    "outputs": attrs.get("outputs", {}),
+                    "sample": attrs.get("sample", {}),
+                    "template": attrs.get("template", {"format": "jinja2", "parser": "prompty"})
+                })
+            
+            return result
+            
+        except Exception as e:
+            raise Exception(f"Python parsing error: {str(e)}")
+    
+    def render_template(self, prompty_content: str, inputs: Dict[str, Any], 
+                       global_config: Optional[Dict[str, Any]] = None) -> List[Dict[str, Any]]:
+        """Render template using Python implementation."""
+        try:
+            # Load the prompty
+            p = prompty.load_from_content(prompty_content)
+            
+            # Prepare/render the template
+            rendered = prompty.prepare(p, inputs)
+            
+            # Convert to standard message format
+            if isinstance(rendered, list):
+                messages = []
+                for item in rendered:
+                    if hasattr(item, 'role') and hasattr(item, 'content'):
+                        messages.append({
+                            "role": item.role,
+                            "content": item.content
+                        })
+                    elif isinstance(item, dict):
+                        messages.append({
+                            "role": item.get("role", "user"),
+                            "content": item.get("content", str(item))
+                        })
+                    else:
+                        messages.append({
+                            "role": "user",
+                            "content": str(item)
+                        })
+                return messages
+            else:
+                # Single string response
+                return [{"role": "user", "content": str(rendered)}]
+                
+        except Exception as e:
+            raise Exception(f"Python rendering error: {str(e)}")
+    
+    def validate_inputs(self, prompty_content: str, inputs: Dict[str, Any]) -> List[str]:
+        """Validate inputs against prompty specification."""
+        try:
+            parsed = self.parse_prompty(prompty_content)
+            input_spec = parsed.get("inputs", {})
+            errors = []
+            
+            # Check required inputs
+            for input_name, input_def in input_spec.items():
+                if isinstance(input_def, dict) and input_def.get("required", False):
+                    if input_name not in inputs:
+                        errors.append(f"Required input '{input_name}' is missing")
+            
+            # Check input types (basic validation)
+            for input_name, value in inputs.items():
+                if input_name in input_spec:
+                    input_def = input_spec[input_name]
+                    if isinstance(input_def, dict):
+                        expected_type = input_def.get("type")
+                        if expected_type == "string" and not isinstance(value, str):
+                            errors.append(f"Input '{input_name}' should be string, got {type(value).__name__}")
+                        elif expected_type == "number" and not isinstance(value, (int, float)):
+                            errors.append(f"Input '{input_name}' should be number, got {type(value).__name__}")
+                        elif expected_type == "boolean" and not isinstance(value, bool):
+                            errors.append(f"Input '{input_name}' should be boolean, got {type(value).__name__}")
+                        elif expected_type == "array" and not isinstance(value, list):
+                            errors.append(f"Input '{input_name}' should be array, got {type(value).__name__}")
+                        elif expected_type == "object" and not isinstance(value, dict):
+                            errors.append(f"Input '{input_name}' should be object, got {type(value).__name__}")
+            
+            return errors
+            
+        except Exception as e:
+            return [f"Validation error: {str(e)}"]
+    
+    def get_sample_data(self, prompty_content: str) -> Dict[str, Any]:
+        """Extract sample data from prompty."""
+        try:
+            parsed = self.parse_prompty(prompty_content)
+            return parsed.get("sample", {})
+        except Exception as e:
+            raise Exception(f"Python sample extraction error: {str(e)}")
+
+
+def run_python_tck(test_file: str, output_file: str):
+    """
+    Run TCK tests for Python implementation.
+    
+    Args:
+        test_file: Path to tck-tests.json
+        output_file: Path to write results
+    """
+    import json
+    
+    # Load test specifications
+    with open(test_file, 'r') as f:
+        spec = json.load(f)
+    test_specs = spec['tests']
+    
+    tck = PythonPromptyTCK()
+    results = []
+    
+    for test_spec in test_specs:
+        test_id = test_spec["id"]
+        
+        try:
+            # Skip if this runtime is excluded
+            if "skip_runtimes" in test_spec and "python" in test_spec["skip_runtimes"]:
+                results.append({
+                    "test_id": test_id,
+                    "result": "skip",
+                    "runtime": "python",
+                    "execution_time_ms": 0.0
+                })
+                continue
+            
+            start_time = time.time()
+            
+            # Read the prompty file
+            prompty_file = test_spec["prompty_file"]
+            with open(prompty_file, 'r') as f:
+                prompty_content = f.read()
+            
+            # Set environment variables if specified
+            env_vars = test_spec.get("environment_vars", {})
+            old_env = {}
+            for key, value in env_vars.items():
+                old_env[key] = os.environ.get(key)
+                os.environ[key] = str(value)
+            
+            try:
+                # Run the test based on category
+                category = test_spec["category"]
+                
+                if category == "specification":
+                    # Test parsing
+                    result = tck.parse_prompty(prompty_content)
+                    
+                elif category == "functional":
+                    # Test rendering
+                    input_data = test_spec.get("input_data", tck.get_sample_data(prompty_content))
+                    result = tck.render_template(prompty_content, input_data)
+                    
+                elif category == "error-handling":
+                    # Test error conditions
+                    input_data = test_spec.get("input_data", {})
+                    expected_errors = test_spec.get("expected_errors", [])
+                    
+                    try:
+                        if "input_data" in test_spec:
+                            # Test rendering with invalid input
+                            result = tck.render_template(prompty_content, input_data)
+                            # If we get here, the test should have failed
+                            raise Exception("Expected error did not occur")
+                        else:
+                            # Test parsing invalid prompty
+                            result = tck.parse_prompty(prompty_content)
+                            raise Exception("Expected parsing error did not occur")
+                    except Exception as e:
+                        # Check if this is an expected error
+                        error_matched = False
+                        for expected_error in expected_errors:
+                            import re
+                            if re.search(expected_error["message_pattern"], str(e), re.IGNORECASE):
+                                error_matched = True
+                                break
+                        
+                        if error_matched:
+                            result = {"expected_error": str(e)}
+                        else:
+                            raise e
+                
+                else:
+                    result = {"message": f"Test category '{category}' not yet implemented"}
+                
+                execution_time = (time.time() - start_time) * 1000
+                
+                results.append({
+                    "test_id": test_id,
+                    "result": "pass",
+                    "runtime": "python",
+                    "execution_time_ms": execution_time,
+                    "output": result
+                })
+                
+            finally:
+                # Restore environment variables
+                for key, old_value in old_env.items():
+                    if old_value is None:
+                        os.environ.pop(key, None)
+                    else:
+                        os.environ[key] = old_value
+                        
+        except Exception as e:
+            execution_time = (time.time() - start_time) * 1000
+            results.append({
+                "test_id": test_id,
+                "result": "error",
+                "runtime": "python",
+                "execution_time_ms": execution_time,
+                "error_message": str(e),
+                "error_type": type(e).__name__
+            })
+    
+    # Write results to file
+    with open(output_file, 'w') as f:
+        json.dump(results, f, indent=2)
+
+
+if __name__ == "__main__":
+    import sys
+    if len(sys.argv) != 3:
+        print("Usage: python python_tck.py <test-file> <output-file>")
+        sys.exit(1)
+    
+    run_python_tck(sys.argv[1], sys.argv[2])
diff --git a/tck/python/run-tck.sh b/tck/python/run-tck.sh
new file mode 100755
index 00000000..3e96b0d3
--- /dev/null
+++ b/tck/python/run-tck.sh
@@ -0,0 +1,142 @@
+#!/bin/bash
+
+# Python TCK Runner
+# This script runs TCK tests for the Python runtime implementation
+
+set -e
+
+# Get the directory of this script
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+TCK_ROOT="$(dirname "$SCRIPT_DIR")"
+
+# Configuration
+PYTHON_TCK="$SCRIPT_DIR/python_tck.py"
+TEST_FILE="$TCK_ROOT/tck-tests.json"
+
+# Colors for output
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+BLUE='\033[0;34m'
+NC='\033[0m' # No Color
+
+# Function to print colored output
+print_status() {
+    echo -e "${BLUE}[INFO]${NC} $1"
+}
+
+print_success() {
+    echo -e "${GREEN}[SUCCESS]${NC} $1"
+}
+
+print_warning() {
+    echo -e "${YELLOW}[WARNING]${NC} $1"
+}
+
+print_error() {
+    echo -e "${RED}[ERROR]${NC} $1"
+}
+
+# Function to check if a command exists
+command_exists() {
+    command -v "$1" >/dev/null 2>&1
+}
+
+# Function to run Python TCK
+run_python_tck() {
+    print_status "Running Python TCK..."
+    
+    # Check for virtual environment Python first, then system Python
+    local python_cmd=""
+    if [ -f "$TCK_ROOT/../.venv/bin/python" ]; then
+        python_cmd="$TCK_ROOT/../.venv/bin/python"
+    elif command_exists python3; then
+        python_cmd="python3"
+    else
+        print_error "Python 3 not found."
+        return 1
+    fi
+    
+    # Check if Python prompty is available
+    if ! "$python_cmd" -c "import prompty" 2>/dev/null; then
+        print_error "Python prompty runtime not found."
+        return 1
+    fi
+    
+    local output_file="$1"
+    if [ -z "$output_file" ]; then
+        output_file="$TCK_ROOT/results/python-results.json"
+    fi
+    
+    # Ensure output directory exists
+    mkdir -p "$(dirname "$output_file")"
+    
+    # Set environment variables for tests
+    export AZURE_OPENAI_ENDPOINT="https://test.openai.azure.com"
+    export AZURE_OPENAI_DEPLOYMENT="gpt-4"
+    export MAX_TOKENS="200"
+    
+    cd "$TCK_ROOT"
+    if "$python_cmd" "$PYTHON_TCK" "$TEST_FILE" "$output_file"; then
+        print_success "Python TCK completed successfully"
+        return 0
+    else
+        print_error "Python TCK failed"
+        return 1
+    fi
+}
+
+# Function to display help
+show_help() {
+    cat << EOF
+Python TCK Runner
+
+Usage: $0 [OUTPUT_FILE]
+
+ARGUMENTS:
+    OUTPUT_FILE    Optional path to output results file
+                   (default: ../results/python-results.json)
+
+EXAMPLES:
+    $0                              # Run with default output
+    $0 custom-results.json          # Run with custom output file
+    $0 /path/to/results.json        # Run with absolute path
+
+ENVIRONMENT VARIABLES:
+    TCK_DEBUG               Enable debug mode (true/false)
+    AZURE_OPENAI_ENDPOINT   Override OpenAI endpoint for tests
+    AZURE_OPENAI_DEPLOYMENT Override OpenAI deployment name
+    MAX_TOKENS             Override max tokens setting
+
+EOF
+}
+
+# Parse command line arguments
+if [[ $# -gt 1 ]]; then
+    print_error "Too many arguments"
+    show_help
+    exit 1
+fi
+
+if [[ $# -eq 1 ]]; then
+    if [[ "$1" == "--help" || "$1" == "-h" ]]; then
+        show_help
+        exit 0
+    fi
+    OUTPUT_FILE="$1"
+else
+    OUTPUT_FILE=""
+fi
+
+# Set debug mode if requested
+if [ "$TCK_DEBUG" = "true" ]; then
+    set -x
+fi
+
+# Main execution
+main() {
+    run_python_tck "$OUTPUT_FILE"
+}
+
+# Run main function
+main
diff --git a/tck/reports/compatibility-report.json b/tck/reports/compatibility-report.json
new file mode 100644
index 00000000..722a2be1
--- /dev/null
+++ b/tck/reports/compatibility-report.json
@@ -0,0 +1,248 @@
+[
+  {
+    "test_id": "basic-parsing",
+    "compatible": false,
+    "runtimes_tested": [
+      "python",
+      "csharp"
+    ],
+    "differences": [
+      {
+        "type": "result_status",
+        "runtime1": "python",
+        "runtime2": "csharp",
+        "runtime1_status": "pass",
+        "runtime2_status": "fail"
+      }
+    ],
+    "notes": null
+  },
+  {
+    "test_id": "basic-rendering",
+    "compatible": true,
+    "runtimes_tested": [
+      "python",
+      "csharp"
+    ],
+    "differences": [],
+    "notes": null
+  },
+  {
+    "test_id": "complex-template",
+    "compatible": true,
+    "runtimes_tested": [
+      "python",
+      "csharp"
+    ],
+    "differences": [],
+    "notes": null
+  },
+  {
+    "test_id": "conditional-rendering",
+    "compatible": true,
+    "runtimes_tested": [
+      "python",
+      "csharp"
+    ],
+    "differences": [],
+    "notes": null
+  },
+  {
+    "test_id": "env-var-resolution",
+    "compatible": true,
+    "runtimes_tested": [
+      "python",
+      "csharp"
+    ],
+    "differences": [],
+    "notes": null
+  },
+  {
+    "test_id": "function-calling-parsing",
+    "compatible": false,
+    "runtimes_tested": [
+      "python",
+      "csharp"
+    ],
+    "differences": [
+      {
+        "path": "version",
+        "type": "missing_key",
+        "runtime1_value": null,
+        "runtime2_value": "1.0",
+        "runtime1": "python",
+        "runtime2": "csharp"
+      },
+      {
+        "path": "name",
+        "type": "missing_key",
+        "runtime1_value": null,
+        "runtime2_value": "Function Calling Test",
+        "runtime1": "python",
+        "runtime2": "csharp"
+      },
+      {
+        "path": "metadata",
+        "type": "extra_key",
+        "runtime1_value": {
+          "authors": [],
+          "description": "Test function calling capabilities",
+          "name": "Function Calling Test",
+          "tags": [],
+          "version": "1.0"
+        },
+        "runtime2_value": null,
+        "runtime1": "python",
+        "runtime2": "csharp"
+      },
+      {
+        "path": "frontmatter",
+        "type": "extra_key",
+        "runtime1_value": {
+          "description": "Test function calling capabilities",
+          "inputs": {
+            "question": {
+              "required": true,
+              "type": "string"
+            }
+          },
+          "model": {
+            "api": "chat",
+            "configuration": {
+              "model": "gpt-4",
+              "type": "openai"
+            },
+            "parameters": {
+              "max_tokens": 300,
+              "temperature": 0.0,
+              "tools": [
+                {
+                  "function": {
+                    "description": "Get current weather for a location",
+                    "name": "get_weather",
+                    "parameters": {
+                      "properties": {
+                        "location": {
+                          "description": "City name",
+                          "type": "string"
+                        },
+                        "unit": {
+                          "default": "celsius",
+                          "enum": [
+                            "celsius",
+                            "fahrenheit"
+                          ],
+                          "type": "string"
+                        }
+                      },
+                      "required": [
+                        "location"
+                      ],
+                      "type": "object"
+                    }
+                  },
+                  "type": "function"
+                }
+              ]
+            }
+          },
+          "name": "Function Calling Test",
+          "sample": {
+            "question": "What's the weather like in Paris?"
+          },
+          "version": "1.0"
+        },
+        "runtime2_value": null,
+        "runtime1": "python",
+        "runtime2": "csharp"
+      },
+      {
+        "path": "sample",
+        "type": "extra_key",
+        "runtime1_value": {
+          "question": "What's the weather like in Paris?"
+        },
+        "runtime2_value": null,
+        "runtime1": "python",
+        "runtime2": "csharp"
+      },
+      {
+        "path": "template.format",
+        "type": "value_difference",
+        "runtime1_value": "jinja2",
+        "runtime2_value": "liquid",
+        "runtime1": "python",
+        "runtime2": "csharp"
+      },
+      {
+        "path": "raw_frontmatter",
+        "type": "extra_key",
+        "runtime1_value": "\nname: \"Function Calling Test\"\ndescription: \"Test function calling capabilities\"\nversion: \"1.0\"\nmodel:\n  api: chat\n  configuration:\n    type: openai\n    model: gpt-4\n  parameters:\n    max_tokens: 300\n    temperature: 0.0\n    tools:\n      - type: function\n        function:\n          name: get_weather\n          description: Get current weather for a location\n          parameters:\n            type: object\n            properties:\n              location:\n                type: string\n                description: City name\n              unit:\n                type: string\n                enum: [\"celsius\", \"fahrenheit\"]\n                default: \"celsius\"\n            required: [\"location\"]\nsample:\n  question: \"What's the weather like in Paris?\"\ninputs:\n  question:\n    type: string\n    required: true\n",
+        "runtime2_value": null,
+        "runtime1": "python",
+        "runtime2": "csharp"
+      },
+      {
+        "path": "description",
+        "type": "missing_key",
+        "runtime1_value": null,
+        "runtime2_value": "Test function calling capabilities",
+        "runtime1": "python",
+        "runtime2": "csharp"
+      },
+      {
+        "path": "model.configuration.type",
+        "type": "extra_key",
+        "runtime1_value": "openai",
+        "runtime2_value": null,
+        "runtime1": "python",
+        "runtime2": "csharp"
+      },
+      {
+        "path": "model.parameters.max_tokens",
+        "type": "value_difference",
+        "runtime1_value": 300,
+        "runtime2_value": "300",
+        "runtime1": "python",
+        "runtime2": "csharp"
+      },
+      {
+        "path": "model.parameters.temperature",
+        "type": "value_difference",
+        "runtime1_value": 0.0,
+        "runtime2_value": "0.0",
+        "runtime1": "python",
+        "runtime2": "csharp"
+      }
+    ],
+    "notes": null
+  },
+  {
+    "test_id": "invalid-yaml",
+    "compatible": false,
+    "runtimes_tested": [
+      "python",
+      "csharp"
+    ],
+    "differences": [
+      {
+        "type": "result_status",
+        "runtime1": "python",
+        "runtime2": "csharp",
+        "runtime1_status": "pass",
+        "runtime2_status": "error"
+      }
+    ],
+    "notes": null
+  },
+  {
+    "test_id": "missing-required-input",
+    "compatible": true,
+    "runtimes_tested": [
+      "python",
+      "csharp"
+    ],
+    "differences": [],
+    "notes": null
+  }
+]
\ No newline at end of file
diff --git a/tck/reports/compatibility-report.md b/tck/reports/compatibility-report.md
new file mode 100644
index 00000000..097e48fc
--- /dev/null
+++ b/tck/reports/compatibility-report.md
@@ -0,0 +1,101 @@
+# Prompty Runtime Compatibility Report
+
+**Overall Compatibility Rate: 62.5% (5/8)**
+
+## Summary
+
+- Total tests: 8
+- Compatible tests: 5
+- Incompatible tests: 3
+
+## Test Results
+
+### ❌ basic-parsing
+- **Status**: Incompatible
+- **Runtimes tested**: python, csharp
+- **Differences**:
+  - Result status differs: python=pass vs csharp=fail
+
+### ✅ basic-rendering
+- **Status**: Compatible
+- **Runtimes tested**: python, csharp
+
+### ✅ complex-template
+- **Status**: Compatible
+- **Runtimes tested**: python, csharp
+
+### ✅ conditional-rendering
+- **Status**: Compatible
+- **Runtimes tested**: python, csharp
+
+### ✅ env-var-resolution
+- **Status**: Compatible
+- **Runtimes tested**: python, csharp
+
+### ❌ function-calling-parsing
+- **Status**: Incompatible
+- **Runtimes tested**: python, csharp
+- **Differences**:
+  - extra_key at `raw_frontmatter`: python=
+name: "Function Calling Test"
+description: "Test function calling capabilities"
+version: "1.0"
+model:
+  api: chat
+  configuration:
+    type: openai
+    model: gpt-4
+  parameters:
+    max_tokens: 300
+    temperature: 0.0
+    tools:
+      - type: function
+        function:
+          name: get_weather
+          description: Get current weather for a location
+          parameters:
+            type: object
+            properties:
+              location:
+                type: string
+                description: City name
+              unit:
+                type: string
+                enum: ["celsius", "fahrenheit"]
+                default: "celsius"
+            required: ["location"]
+sample:
+  question: "What's the weather like in Paris?"
+inputs:
+  question:
+    type: string
+    required: true
+ vs csharp=None
+  - Value at `template.format`: python=jinja2 vs csharp=liquid
+  - extra_key at `metadata`: python={'authors': [], 'description': 'Test function calling capabilities', 'name': 'Function Calling Test', 'tags': [], 'version': '1.0'} vs csharp=None
+  - extra_key at `model.configuration.type`: python=openai vs csharp=None
+  - Value at `model.parameters.temperature`: python=0.0 vs csharp=0.0
+  - Value at `model.parameters.max_tokens`: python=300 vs csharp=300
+  - missing_key at `name`: python=None vs csharp=Function Calling Test
+  - missing_key at `version`: python=None vs csharp=1.0
+  - extra_key at `frontmatter`: python={'description': 'Test function calling capabilities', 'inputs': {'question': {'required': True, 'type': 'string'}}, 'model': {'api': 'chat', 'configuration': {'model': 'gpt-4', 'type': 'openai'}, 'parameters': {'max_tokens': 300, 'temperature': 0.0, 'tools': [{'function': {'description': 'Get current weather for a location', 'name': 'get_weather', 'parameters': {'properties': {'location': {'description': 'City name', 'type': 'string'}, 'unit': {'default': 'celsius', 'enum': ['celsius', 'fahrenheit'], 'type': 'string'}}, 'required': ['location'], 'type': 'object'}}, 'type': 'function'}]}}, 'name': 'Function Calling Test', 'sample': {'question': "What's the weather like in Paris?"}, 'version': '1.0'} vs csharp=None
+  - missing_key at `description`: python=None vs csharp=Test function calling capabilities
+  - extra_key at `sample`: python={'question': "What's the weather like in Paris?"} vs csharp=None
+
+### ❌ invalid-yaml
+- **Status**: Incompatible
+- **Runtimes tested**: python, csharp
+- **Differences**:
+  - Result status differs: python=pass vs csharp=error
+
+### ✅ missing-required-input
+- **Status**: Compatible
+- **Runtimes tested**: python, csharp
+
+## Incompatible Tests Summary
+
+The following tests show differences between runtimes:
+
+- **basic-parsing**: 1 differences
+- **function-calling-parsing**: 11 differences
+- **invalid-yaml**: 1 differences
\ No newline at end of file
diff --git a/tck/results/csharp-results.json b/tck/results/csharp-results.json
new file mode 100644
index 00000000..f8334f23
--- /dev/null
+++ b/tck/results/csharp-results.json
@@ -0,0 +1,258 @@
+{
+  "runtime": "csharp",
+  "timestamp": "2025-06-26T01:18:04Z",
+  "version": "1.0",
+  "total_tests": 9,
+  "results": [
+    {
+      "test_id": "basic-parsing",
+      "test_type": "parse",
+      "runtime": "csharp",
+      "status": "fail",
+      "actual": {
+        "content": "system:\nYou are a helpful assistant. Answer questions for {{name}} who is {{age}} years old.\n\nuser:\n{{question}}\n",
+        "model": {
+          "api": "chat",
+          "configuration": {
+            "model": "gpt-3.5-turbo"
+          },
+          "parameters": {
+            "max_tokens": "100",
+            "temperature": "0.0"
+          }
+        },
+        "inputs": {
+          "name": {
+            "type": "string",
+            "required": true,
+            "description": "User's name"
+          },
+          "age": {
+            "type": "number",
+            "required": true,
+            "description": "User's age",
+            "default": 25
+          },
+          "question": {
+            "type": "string",
+            "required": true,
+            "description": "Question to ask"
+          }
+        },
+        "outputs": {},
+        "sample": {
+          "age": 25
+        },
+        "template": {
+          "format": "jinja2",
+          "parser": "prompty"
+        },
+        "name": "Basic Compatibility Test",
+        "description": "Simple test for basic functionality",
+        "version": "1.0",
+        "authors": [
+          "tck-team"
+        ],
+        "tags": [
+          "basic",
+          "compatibility"
+        ]
+      },
+      "expected": {
+        "metadata": {
+          "name": "Basic Compatibility Test",
+          "description": "Simple test for basic functionality",
+          "version": "1.0",
+          "authors": [
+            "tck-team"
+          ],
+          "tags": [
+            "basic",
+            "compatibility"
+          ]
+        },
+        "model": {
+          "api": "chat",
+          "configuration": {
+            "type": "openai",
+            "model": "gpt-3.5-turbo"
+          },
+          "parameters": {
+            "max_tokens": 100,
+            "temperature": 0.0
+          },
+          "response": "first"
+        },
+        "inputs": {
+          "name": {
+            "type": "string",
+            "description": "User's name",
+            "required": true
+          },
+          "age": {
+            "type": "number",
+            "description": "User's age",
+            "default": 25
+          },
+          "question": {
+            "type": "string",
+            "description": "Question to ask",
+            "required": true
+          }
+        },
+        "sample": {
+          "name": "Alice",
+          "age": 30,
+          "question": "What is the meaning of life?"
+        },
+        "template": {
+          "format": "jinja2",
+          "parser": "prompty"
+        },
+        "content": "system:\nYou are a helpful assistant. Answer questions for {{name}} who is {{age}} years old.\n\nuser:\n{{question}}"
+      },
+      "differences": {
+        "missing_key at `metadata`": "expected={\n  \"name\": \"Basic Compatibility Test\",\n  \"description\": \"Simple test for basic functionality\",\n  \"version\": \"1.0\",\n  \"authors\": [\n    \"tck-team\"\n  ],\n  \"tags\": [\n    \"basic\",\n    \"compatibility\"\n  ]\n} vs actual=None",
+        "value at `model`": "expected={\n  \"api\": \"chat\",\n  \"configuration\": {\n    \"type\": \"openai\",\n    \"model\": \"gpt-3.5-turbo\"\n  },\n  \"parameters\": {\n    \"max_tokens\": 100,\n    \"temperature\": 0.0\n  },\n  \"response\": \"first\"\n} vs actual=System.Collections.Generic.Dictionary`2[System.String,System.Object]",
+        "value at `inputs`": "expected={\n  \"name\": {\n    \"type\": \"string\",\n    \"description\": \"User's name\",\n    \"required\": true\n  },\n  \"age\": {\n    \"type\": \"number\",\n    \"description\": \"User's age\",\n    \"default\": 25\n  },\n  \"question\": {\n    \"type\": \"string\",\n    \"description\": \"Question to ask\",\n    \"required\": true\n  }\n} vs actual=System.Collections.Generic.Dictionary`2[System.String,System.Object]",
+        "value at `sample`": "expected={\n  \"name\": \"Alice\",\n  \"age\": 30,\n  \"question\": \"What is the meaning of life?\"\n} vs actual=System.Collections.Generic.Dictionary`2[System.String,System.Object]",
+        "value at `template`": "expected={\n  \"format\": \"jinja2\",\n  \"parser\": \"prompty\"\n} vs actual=System.Collections.Generic.Dictionary`2[System.String,System.Object]",
+        "value at `content`": "expected=system:\nYou are a helpful assistant. Answer questions for {{name}} who is {{age}} years old.\n\nuser:\n{{question}} vs actual=system:\nYou are a helpful assistant. Answer questions for {{name}} who is {{age}} years old.\n\nuser:\n{{question}}\n",
+        "extra_key at `outputs`": "expected=None vs actual=System.Collections.Generic.Dictionary`2[System.String,System.Object]",
+        "extra_key at `name`": "expected=None vs actual=Basic Compatibility Test",
+        "extra_key at `description`": "expected=None vs actual=Simple test for basic functionality",
+        "extra_key at `version`": "expected=None vs actual=1.0",
+        "extra_key at `authors`": "expected=None vs actual=System.Collections.Generic.List`1[System.String]",
+        "extra_key at `tags`": "expected=None vs actual=System.Collections.Generic.List`1[System.String]"
+      },
+      "execution_time_ms": 60.897
+    },
+    {
+      "test_id": "basic-rendering",
+      "test_type": "render",
+      "runtime": "csharp",
+      "status": "error",
+      "error": "Missing required input 'name'",
+      "error_type": "Exception",
+      "execution_time_ms": 2.682
+    },
+    {
+      "test_id": "env-var-resolution",
+      "test_type": "render",
+      "runtime": "csharp",
+      "status": "error",
+      "error": "Missing required input 'context'",
+      "error_type": "Exception",
+      "execution_time_ms": 1.246
+    },
+    {
+      "test_id": "complex-template",
+      "test_type": "render",
+      "runtime": "csharp",
+      "status": "error",
+      "error": "Missing required input 'user_name'",
+      "error_type": "Exception",
+      "execution_time_ms": 1.626
+    },
+    {
+      "test_id": "function-calling-parsing",
+      "test_type": "parse",
+      "runtime": "csharp",
+      "status": "pass",
+      "actual": {
+        "content": "system:\nYou are a helpful assistant with access to weather information.\n\nuser:\n{{question}}\n",
+        "model": {
+          "api": "chat",
+          "configuration": {
+            "model": "gpt-4"
+          },
+          "parameters": {
+            "max_tokens": "300",
+            "temperature": "0.0",
+            "tools": [
+              {
+                "type": "function",
+                "function": {
+                  "name": "get_weather",
+                  "description": "Get current weather for a location",
+                  "parameters": {
+                    "type": "object",
+                    "properties": {
+                      "location": {
+                        "type": "string",
+                        "description": "City name"
+                      },
+                      "unit": {
+                        "type": "string",
+                        "enum": [
+                          "celsius",
+                          "fahrenheit"
+                        ],
+                        "default": "celsius"
+                      }
+                    },
+                    "required": [
+                      "location"
+                    ]
+                  }
+                }
+              }
+            ]
+          }
+        },
+        "inputs": {
+          "question": {
+            "type": "string",
+            "required": true
+          }
+        },
+        "outputs": {},
+        "template": {
+          "format": "liquid",
+          "parser": "prompty"
+        },
+        "name": "Function Calling Test",
+        "description": "Test function calling capabilities",
+        "version": "1.0"
+      },
+      "message": "No expected results file found",
+      "execution_time_ms": 2.368
+    },
+    {
+      "test_id": "missing-required-input",
+      "test_type": "render",
+      "runtime": "csharp",
+      "status": "error",
+      "error": "Missing required input 'name'",
+      "error_type": "Exception",
+      "execution_time_ms": 1.25
+    },
+    {
+      "test_id": "invalid-yaml",
+      "test_type": "parse",
+      "runtime": "csharp",
+      "status": "error",
+      "error": "Exception during deserialization",
+      "error_type": "YamlException",
+      "execution_time_ms": 1.458
+    },
+    {
+      "test_id": "conditional-rendering",
+      "test_type": "render",
+      "runtime": "csharp",
+      "status": "error",
+      "error": "Missing required input 'show_details'",
+      "error_type": "Exception",
+      "execution_time_ms": 1.233
+    },
+    {
+      "test_id": "missing-required-input",
+      "test_type": "render",
+      "runtime": "csharp",
+      "status": "error",
+      "error": "Missing required input 'required_field'",
+      "error_type": "Exception",
+      "execution_time_ms": 1.146
+    }
+  ]
+}
\ No newline at end of file
diff --git a/tck/results/python-results.json b/tck/results/python-results.json
new file mode 100644
index 00000000..bd5df9c4
--- /dev/null
+++ b/tck/results/python-results.json
@@ -0,0 +1,293 @@
+[
+  {
+    "test_id": "basic-parsing",
+    "result": "pass",
+    "runtime": "python",
+    "execution_time_ms": 1.461029052734375,
+    "output": {
+      "frontmatter": {
+        "name": "Basic Compatibility Test",
+        "description": "Simple test for basic functionality",
+        "version": "1.0",
+        "authors": [
+          "tck-team"
+        ],
+        "tags": [
+          "basic",
+          "compatibility"
+        ],
+        "model": {
+          "api": "chat",
+          "configuration": {
+            "type": "openai",
+            "model": "gpt-3.5-turbo"
+          },
+          "parameters": {
+            "max_tokens": 100,
+            "temperature": 0.0
+          }
+        },
+        "sample": {
+          "name": "Alice",
+          "age": 30,
+          "question": "What is the meaning of life?"
+        },
+        "inputs": {
+          "name": {
+            "type": "string",
+            "description": "User's name",
+            "required": true
+          },
+          "age": {
+            "type": "number",
+            "description": "User's age",
+            "default": 25
+          },
+          "question": {
+            "type": "string",
+            "description": "Question to ask",
+            "required": true
+          }
+        },
+        "template": {
+          "format": "jinja2",
+          "parser": "prompty"
+        }
+      },
+      "content": "system:\nYou are a helpful assistant. Answer questions for {{name}} who is {{age}} years old.\n\nuser:\n{{question}}\n",
+      "raw_frontmatter": "\nname: \"Basic Compatibility Test\"\ndescription: \"Simple test for basic functionality\"\nversion: \"1.0\"\nauthors:\n  - \"tck-team\"\ntags:\n  - \"basic\"\n  - \"compatibility\"\nmodel:\n  api: chat\n  configuration:\n    type: openai\n    model: gpt-3.5-turbo\n  parameters:\n    max_tokens: 100\n    temperature: 0.0\nsample:\n  name: \"Alice\"\n  age: 30\n  question: \"What is the meaning of life?\"\ninputs:\n  name:\n    type: string\n    description: \"User's name\"\n    required: true\n  age:\n    type: number\n    description: \"User's age\"\n    default: 25\n  question:\n    type: string\n    description: \"Question to ask\"\n    required: true\ntemplate:\n  format: jinja2\n  parser: prompty\n",
+      "metadata": {
+        "name": "Basic Compatibility Test",
+        "description": "Simple test for basic functionality",
+        "version": "1.0",
+        "authors": [
+          "tck-team"
+        ],
+        "tags": [
+          "basic",
+          "compatibility"
+        ]
+      },
+      "model": {
+        "api": "chat",
+        "configuration": {
+          "type": "openai",
+          "model": "gpt-3.5-turbo"
+        },
+        "parameters": {
+          "max_tokens": 100,
+          "temperature": 0.0
+        }
+      },
+      "inputs": {
+        "name": {
+          "type": "string",
+          "description": "User's name",
+          "required": true
+        },
+        "age": {
+          "type": "number",
+          "description": "User's age",
+          "default": 25
+        },
+        "question": {
+          "type": "string",
+          "description": "Question to ask",
+          "required": true
+        }
+      },
+      "outputs": {},
+      "sample": {
+        "name": "Alice",
+        "age": 30,
+        "question": "What is the meaning of life?"
+      },
+      "template": {
+        "format": "jinja2",
+        "parser": "prompty"
+      }
+    }
+  },
+  {
+    "test_id": "basic-rendering",
+    "result": "error",
+    "runtime": "python",
+    "execution_time_ms": 1.2679100036621094,
+    "error_message": "Python rendering error: module 'prompty' has no attribute 'load_from_content'",
+    "error_type": "Exception"
+  },
+  {
+    "test_id": "env-var-resolution",
+    "result": "error",
+    "runtime": "python",
+    "execution_time_ms": 0.9610652923583984,
+    "error_message": "Python rendering error: module 'prompty' has no attribute 'load_from_content'",
+    "error_type": "Exception"
+  },
+  {
+    "test_id": "complex-template",
+    "result": "error",
+    "runtime": "python",
+    "execution_time_ms": 1.3890266418457031,
+    "error_message": "Python rendering error: module 'prompty' has no attribute 'load_from_content'",
+    "error_type": "Exception"
+  },
+  {
+    "test_id": "function-calling-parsing",
+    "result": "pass",
+    "runtime": "python",
+    "execution_time_ms": 1.2710094451904297,
+    "output": {
+      "frontmatter": {
+        "name": "Function Calling Test",
+        "description": "Test function calling capabilities",
+        "version": "1.0",
+        "model": {
+          "api": "chat",
+          "configuration": {
+            "type": "openai",
+            "model": "gpt-4"
+          },
+          "parameters": {
+            "max_tokens": 300,
+            "temperature": 0.0,
+            "tools": [
+              {
+                "type": "function",
+                "function": {
+                  "name": "get_weather",
+                  "description": "Get current weather for a location",
+                  "parameters": {
+                    "type": "object",
+                    "properties": {
+                      "location": {
+                        "type": "string",
+                        "description": "City name"
+                      },
+                      "unit": {
+                        "type": "string",
+                        "enum": [
+                          "celsius",
+                          "fahrenheit"
+                        ],
+                        "default": "celsius"
+                      }
+                    },
+                    "required": [
+                      "location"
+                    ]
+                  }
+                }
+              }
+            ]
+          }
+        },
+        "sample": {
+          "question": "What's the weather like in Paris?"
+        },
+        "inputs": {
+          "question": {
+            "type": "string",
+            "required": true
+          }
+        }
+      },
+      "content": "system:\nYou are a helpful assistant with access to weather information.\n\nuser:\n{{question}}\n",
+      "raw_frontmatter": "\nname: \"Function Calling Test\"\ndescription: \"Test function calling capabilities\"\nversion: \"1.0\"\nmodel:\n  api: chat\n  configuration:\n    type: openai\n    model: gpt-4\n  parameters:\n    max_tokens: 300\n    temperature: 0.0\n    tools:\n      - type: function\n        function:\n          name: get_weather\n          description: Get current weather for a location\n          parameters:\n            type: object\n            properties:\n              location:\n                type: string\n                description: City name\n              unit:\n                type: string\n                enum: [\"celsius\", \"fahrenheit\"]\n                default: \"celsius\"\n            required: [\"location\"]\nsample:\n  question: \"What's the weather like in Paris?\"\ninputs:\n  question:\n    type: string\n    required: true\n",
+      "metadata": {
+        "name": "Function Calling Test",
+        "description": "Test function calling capabilities",
+        "version": "1.0",
+        "authors": [],
+        "tags": []
+      },
+      "model": {
+        "api": "chat",
+        "configuration": {
+          "type": "openai",
+          "model": "gpt-4"
+        },
+        "parameters": {
+          "max_tokens": 300,
+          "temperature": 0.0,
+          "tools": [
+            {
+              "type": "function",
+              "function": {
+                "name": "get_weather",
+                "description": "Get current weather for a location",
+                "parameters": {
+                  "type": "object",
+                  "properties": {
+                    "location": {
+                      "type": "string",
+                      "description": "City name"
+                    },
+                    "unit": {
+                      "type": "string",
+                      "enum": [
+                        "celsius",
+                        "fahrenheit"
+                      ],
+                      "default": "celsius"
+                    }
+                  },
+                  "required": [
+                    "location"
+                  ]
+                }
+              }
+            }
+          ]
+        }
+      },
+      "inputs": {
+        "question": {
+          "type": "string",
+          "required": true
+        }
+      },
+      "outputs": {},
+      "sample": {
+        "question": "What's the weather like in Paris?"
+      },
+      "template": {
+        "format": "jinja2",
+        "parser": "prompty"
+      }
+    }
+  },
+  {
+    "test_id": "missing-required-input",
+    "result": "error",
+    "runtime": "python",
+    "execution_time_ms": 0.1239776611328125,
+    "error_message": "Python rendering error: module 'prompty' has no attribute 'load_from_content'",
+    "error_type": "Exception"
+  },
+  {
+    "test_id": "invalid-yaml",
+    "result": "pass",
+    "runtime": "python",
+    "execution_time_ms": 0.2627372741699219,
+    "output": {
+      "expected_error": "Python parsing error: while parsing a flow sequence\n  in \"<unicode string>\", line 4, column 15:\n    invalid_yaml: [unclosed array\n                  ^\nexpected ',' or ']', but got ':'\n  in \"<unicode string>\", line 5, column 6:\n    model:\n         ^"
+    }
+  },
+  {
+    "test_id": "conditional-rendering",
+    "result": "error",
+    "runtime": "python",
+    "execution_time_ms": 0.8780956268310547,
+    "error_message": "Python rendering error: module 'prompty' has no attribute 'load_from_content'",
+    "error_type": "Exception"
+  },
+  {
+    "test_id": "missing-required-input",
+    "result": "error",
+    "runtime": "python",
+    "execution_time_ms": 0.07987022399902344,
+    "error_message": "Python rendering error: module 'prompty' has no attribute 'load_from_content'",
+    "error_type": "Exception"
+  }
+]
\ No newline at end of file
diff --git a/tck/run-tck.ps1 b/tck/run-tck.ps1
new file mode 100644
index 00000000..df896d10
--- /dev/null
+++ b/tck/run-tck.ps1
@@ -0,0 +1,131 @@
+# PowerShell script for running TCK on Windows
+param(
+    [string]$Runtime = "all",
+    [string]$Category = "all",
+    [switch]$Clean = $false,
+    [switch]$Compare = $false,
+    [switch]$Help = $false
+)
+
+if ($Help) {
+    Write-Host "Prompty TCK Runner (Windows PowerShell)"
+    Write-Host ""
+    Write-Host "Usage: ./run-tck.ps1 [options]"
+    Write-Host ""
+    Write-Host "Options:"
+    Write-Host "  -Runtime <runtime>   Run TCK for specific runtime (python, csharp, all)"
+    Write-Host "  -Category <category> Run specific test category (specification, functional, error-handling, all)"
+    Write-Host "  -Clean              Clean previous results before running"
+    Write-Host "  -Compare            Generate comparison report from existing results"
+    Write-Host "  -Help               Show this help message"
+    Write-Host ""
+    Write-Host "Examples:"
+    Write-Host "  ./run-tck.ps1                    # Run all runtimes"
+    Write-Host "  ./run-tck.ps1 -Runtime python    # Run Python TCK only"
+    Write-Host "  ./run-tck.ps1 -Runtime csharp    # Run C# TCK only"
+    Write-Host "  ./run-tck.ps1 -Clean             # Clean and run all"
+    Write-Host "  ./run-tck.ps1 -Compare           # Generate comparison report"
+    exit 0
+}
+
+Write-Host "[INFO] Starting Prompty TCK v1.0 (Windows PowerShell)"
+
+# Clean previous results if requested
+if ($Clean) {
+    Write-Host "[INFO] Cleaning previous results..."
+    if (Test-Path "results") {
+        Remove-Item -Recurse -Force "results"
+    }
+    if (Test-Path "reports") {
+        Remove-Item -Recurse -Force "reports"
+    }
+}
+
+# Create directories
+New-Item -ItemType Directory -Force -Path "results" | Out-Null
+New-Item -ItemType Directory -Force -Path "reports" | Out-Null
+
+# Only generate comparison report
+if ($Compare) {
+    Write-Host "[INFO] Generating compatibility report..."
+    if (Test-Path "results/python-results.json" -And Test-Path "results/csharp-results.json") {
+        python tools/compare_runtimes.py results/python-results.json results/csharp-results.json --output reports/compatibility-report.md
+        python tools/compare_runtimes.py results/python-results.json results/csharp-results.json --format json --output reports/compatibility-report.json
+        Write-Host "[SUCCESS] Compatibility report generated"
+    } else {
+        Write-Host "[ERROR] Missing result files for comparison"
+        exit 1
+    }
+    exit 0
+}
+
+$exitCode = 0
+
+# Run Python TCK
+if ($Runtime -eq "python" -or $Runtime -eq "all") {
+    Write-Host "[INFO] Running Python TCK..."
+    
+    Push-Location "python"
+    try {
+        python python_tck.py ../tck-tests.json ../results/python-results.json
+        if ($LASTEXITCODE -eq 0) {
+            Write-Host "[SUCCESS] Python TCK completed successfully"
+        } else {
+            Write-Host "[ERROR] Python TCK failed with exit code $LASTEXITCODE"
+            $exitCode = 1
+        }
+    } catch {
+        Write-Host "[ERROR] Python TCK execution failed: $($_.Exception.Message)"
+        $exitCode = 1
+    } finally {
+        Pop-Location
+    }
+}
+
+# Run C# TCK
+if ($Runtime -eq "csharp" -or $Runtime -eq "all") {
+    Write-Host "[INFO] Running C# TCK..."
+    
+    Push-Location "csharp"
+    try {
+        Write-Host "[INFO] Building C# TCK project..."
+        dotnet build
+        if ($LASTEXITCODE -ne 0) {
+            Write-Host "[ERROR] C# TCK build failed"
+            $exitCode = 1
+        } else {
+            dotnet run ../tck-tests.json ../results/csharp-results.json
+            if ($LASTEXITCODE -eq 0) {
+                Write-Host "[SUCCESS] C# TCK completed successfully"
+            } else {
+                Write-Host "[ERROR] C# TCK failed with exit code $LASTEXITCODE"
+                $exitCode = 1
+            }
+        }
+    } catch {
+        Write-Host "[ERROR] C# TCK execution failed: $($_.Exception.Message)"
+        $exitCode = 1
+    } finally {
+        Pop-Location
+    }
+}
+
+# Generate comparison report if multiple runtimes were run
+if ($Runtime -eq "all" -and $exitCode -eq 0) {
+    Write-Host "[INFO] Generating compatibility report..."
+    if (Test-Path "results/python-results.json" -And Test-Path "results/csharp-results.json") {
+        python tools/compare_runtimes.py results/python-results.json results/csharp-results.json --output reports/compatibility-report.md
+        python tools/compare_runtimes.py results/python-results.json results/csharp-results.json --format json --output reports/compatibility-report.json
+        Write-Host "[SUCCESS] Compatibility report generated"
+    } else {
+        Write-Host "[WARNING] Cannot generate comparison report - missing result files"
+    }
+}
+
+if ($exitCode -eq 0) {
+    Write-Host "[SUCCESS] All TCK tests completed successfully"
+} else {
+    Write-Host "[ERROR] TCK execution completed with errors"
+}
+
+exit $exitCode
diff --git a/tck/run-tck.sh b/tck/run-tck.sh
new file mode 100755
index 00000000..657ace72
--- /dev/null
+++ b/tck/run-tck.sh
@@ -0,0 +1,354 @@
+#!/bin/bash
+
+# Prompty Test Compatibility Kit (TCK) Master Runner
+# This script coordinates TCK tests across all available runtimes and generates compatibility reports
+# It delegates to individual runtime-specific runners for modular execution
+
+set -e
+
+# Configuration
+TCK_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+RESULTS_DIR="$TCK_DIR/results"
+REPORTS_DIR="$TCK_DIR/reports"
+
+# Runtime-specific runner scripts
+PYTHON_RUNNER="$TCK_DIR/python/run-tck.sh"
+CSHARP_RUNNER="$TCK_DIR/csharp/run-tck.sh"
+
+# Colors for output
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+BLUE='\033[0;34m'
+NC='\033[0m' # No Color
+
+# Function to print colored output
+print_status() {
+    echo -e "${BLUE}[INFO]${NC} $1"
+}
+
+print_success() {
+    echo -e "${GREEN}[SUCCESS]${NC} $1"
+}
+
+print_warning() {
+    echo -e "${YELLOW}[WARNING]${NC} $1"
+}
+
+print_error() {
+    echo -e "${RED}[ERROR]${NC} $1"
+}
+
+# Function to check if a command exists
+command_exists() {
+    command -v "$1" >/dev/null 2>&1
+}
+
+# Function to setup environment
+setup_environment() {
+    print_status "Setting up TCK environment..."
+    
+    # Create directories
+    mkdir -p "$RESULTS_DIR"
+    mkdir -p "$REPORTS_DIR"
+    
+    # Export environment variables for runtime-specific runners
+    export AZURE_OPENAI_ENDPOINT="https://test.openai.azure.com"
+    export AZURE_OPENAI_DEPLOYMENT="gpt-4"
+    export MAX_TOKENS="200"
+    
+    # Export TCK configuration variables
+    export TCK_OUTPUT_FORMAT="${OUTPUT_FORMAT:-json}"
+    export TCK_DEBUG="${DEBUG_MODE:-false}"
+    export TCK_PERFORMANCE_MODE="${PERFORMANCE_MODE:-false}"
+    export TCK_CI_MODE="${CI_MODE:-false}"
+    
+    print_success "Environment setup complete"
+}
+
+# Function to run Python TCK using the runtime-specific runner
+run_python_tck() {
+    print_status "Delegating to Python TCK runner..."
+    
+    if [ ! -f "$PYTHON_RUNNER" ]; then
+        print_error "Python TCK runner not found at $PYTHON_RUNNER"
+        return 1
+    fi
+    
+    if [ ! -x "$PYTHON_RUNNER" ]; then
+        print_error "Python TCK runner is not executable: $PYTHON_RUNNER"
+        return 1
+    fi
+    
+    local output_file="$RESULTS_DIR/python-results.json"
+    
+    if "$PYTHON_RUNNER" "$output_file"; then
+        print_success "Python TCK completed successfully"
+        return 0
+    else
+        print_error "Python TCK failed"
+        return 1
+    fi
+}
+
+# Function to run C# TCK using the runtime-specific runner
+run_csharp_tck() {
+    print_status "Delegating to C# TCK runner..."
+    
+    if [ ! -f "$CSHARP_RUNNER" ]; then
+        print_error "C# TCK runner not found at $CSHARP_RUNNER"
+        return 1
+    fi
+    
+    if [ ! -x "$CSHARP_RUNNER" ]; then
+        print_error "C# TCK runner is not executable: $CSHARP_RUNNER"
+        return 1
+    fi
+    
+    local output_file="$RESULTS_DIR/csharp-results.json"
+    
+    if "$CSHARP_RUNNER" "$output_file"; then
+        print_success "C# TCK completed successfully"
+        return 0
+    else
+        print_error "C# TCK failed"
+        return 1
+    fi
+}
+
+# Function to generate comparison report
+generate_report() {
+    print_status "Generating compatibility report..."
+    
+    local result_files=()
+    
+    # Collect available result files
+    for runtime in python csharp; do
+        local result_file="$RESULTS_DIR/${runtime}-results.json"
+        if [[ -f "$result_file" ]]; then
+            result_files+=("$result_file")
+        fi
+    done
+    
+    if [[ ${#result_files[@]} -lt 2 ]]; then
+        print_warning "Need at least 2 runtime results for comparison. Only found ${#result_files[@]} result file(s)."
+        return 1
+    fi
+    
+    local report_file="$REPORTS_DIR/compatibility-report.md"
+    local json_report_file="$REPORTS_DIR/compatibility-report.json"
+    
+    # Determine Python command to use
+    local python_cmd=""
+    if [ -f "$TCK_DIR/../.venv/bin/python" ]; then
+        python_cmd="$TCK_DIR/../.venv/bin/python"
+    else
+        python_cmd="python3"
+    fi
+    
+    # Generate markdown report
+    if "$python_cmd" "$TCK_DIR/tools/compare_runtimes.py" "${result_files[@]}" --output "$report_file" --format markdown; then
+        print_success "Markdown report generated: $report_file"
+    else
+        print_error "Failed to generate markdown report"
+        return 1
+    fi
+    
+    # Generate JSON report
+    if "$python_cmd" "$TCK_DIR/tools/compare_runtimes.py" "${result_files[@]}" --output "$json_report_file" --format json; then
+        print_success "JSON report generated: $json_report_file"
+    else
+        print_error "Failed to generate JSON report"
+        return 1
+    fi
+    
+    return 0
+}
+
+# Function to display help
+show_help() {
+    cat << EOF
+Prompty Test Compatibility Kit (TCK) Master Runner
+
+This is the master runner that coordinates TCK tests across all available runtimes.
+It delegates to runtime-specific runners for modular execution and generates compatibility reports.
+
+Usage: $0 [OPTIONS]
+
+OPTIONS:
+    --runtime RUNTIME    Run TCK for specific runtime only (python, csharp)
+    --quick             Run quick tests only (skip slow/comprehensive tests)
+    --performance       Enable performance monitoring and metrics collection
+    --debug             Enable debug mode with verbose output
+    --help              Show this help message
+    --version           Show TCK version
+    --ci                CI mode - optimized for continuous integration
+    --output-dir DIR    Custom output directory for results (default: results/)
+    --format FORMAT     Output format (json, xml, junit) (default: json)
+
+EXAMPLES:
+    $0                          # Run full TCK for all runtimes
+    $0 --runtime python         # Run TCK for Python only
+    $0 --runtime csharp         # Run TCK for C# only
+    $0 --quick --ci             # Quick run in CI mode
+    $0 --performance --debug    # Full run with performance monitoring and debug
+
+RUNTIME-SPECIFIC RUNNERS:
+    python/run-tck.sh           # Python TCK runner (standalone)
+    csharp/run-tck.sh           # C# TCK runner (standalone)
+
+ENVIRONMENT VARIABLES:
+    TCK_DEBUG               Enable debug mode (true/false)
+    TCK_PERFORMANCE_MODE    Enable performance monitoring (true/false)
+    TCK_OUTPUT_FORMAT       Default output format (json/xml/junit)
+    TCK_TIMEOUT            Test timeout in seconds (default: 300)
+    TCK_CI_MODE            Enable CI mode optimizations (true/false)
+
+SUPPORTED RUNTIMES:
+    python                 Python runtime implementation
+    csharp                 C# (.NET) runtime implementation
+
+For runtime-specific help, run:
+    python/run-tck.sh --help
+    csharp/run-tck.sh --help
+
+EOF
+}
+
+# Parse command line arguments
+RUNTIME_FILTER=""
+QUICK_MODE=false
+PERFORMANCE_MODE=false
+DEBUG_MODE=false
+CI_MODE=false
+OUTPUT_FORMAT="json"
+CUSTOM_OUTPUT_DIR=""
+
+while [[ $# -gt 0 ]]; do
+    case $1 in
+        --runtime)
+            RUNTIME_FILTER="$2"
+            shift 2
+            ;;
+        --quick)
+            QUICK_MODE=true
+            shift
+            ;;
+        --performance)
+            PERFORMANCE_MODE=true
+            export TCK_PERFORMANCE_MODE=true
+            shift
+            ;;
+        --debug)
+            DEBUG_MODE=true
+            export TCK_DEBUG=true
+            shift
+            ;;
+        --ci)
+            CI_MODE=true
+            shift
+            ;;
+        --output-dir)
+            CUSTOM_OUTPUT_DIR="$2"
+            shift 2
+            ;;
+        --format)
+            OUTPUT_FORMAT="$2"
+            shift 2
+            ;;
+        --help)
+            show_help
+            exit 0
+            ;;
+        --version)
+            echo "Prompty TCK v1.0"
+            exit 0
+            ;;
+        *)
+            print_error "Unknown option: $1"
+            show_help
+            exit 1
+            ;;
+    esac
+done
+
+# Override output directory if specified
+if [ -n "$CUSTOM_OUTPUT_DIR" ]; then
+    RESULTS_DIR="$CUSTOM_OUTPUT_DIR"
+fi
+
+# Set environment variables
+export TCK_OUTPUT_FORMAT="${OUTPUT_FORMAT}"
+if [ "$DEBUG_MODE" = true ]; then
+    set -x
+fi
+
+# CI mode optimizations
+if [ "$CI_MODE" = true ]; then
+    export TCK_CI_MODE=true
+    export TCK_PARALLEL_EXECUTION=true
+    # Reduce verbosity in CI
+    if [ "$DEBUG_MODE" = false ]; then
+        exec > >(grep -v "^\[INFO\]" | grep -v "^Running test:")
+    fi
+fi
+
+# Validate runtime if specified
+if [[ -n "$RUNTIME_FILTER" && "$RUNTIME_FILTER" != "python" && "$RUNTIME_FILTER" != "csharp" ]]; then
+    print_error "Invalid runtime: $RUNTIME_FILTER. Must be one of: python, csharp"
+    exit 1
+fi
+
+# Main execution logic with runtime filtering
+main() {
+    print_status "Starting Prompty TCK v1.0 (Master Runner)"
+    if [ "$QUICK_MODE" = true ]; then
+        print_status "Running in quick mode"
+    fi
+    if [ "$PERFORMANCE_MODE" = true ]; then
+        print_status "Performance monitoring enabled"
+    fi
+    if [ "$CI_MODE" = true ]; then
+        print_status "Running in CI mode"
+    fi
+    
+    setup_environment
+    
+    local python_result=0
+    local csharp_result=0
+    
+    # Run tests based on runtime filter using delegated runners
+    if [ -z "$RUNTIME_FILTER" ] || [ "$RUNTIME_FILTER" = "python" ]; then
+        print_status "Running Python TCK via runtime-specific runner..."
+        if ! run_python_tck; then
+            python_result=1
+        fi
+    fi
+    
+    if [ -z "$RUNTIME_FILTER" ] || [ "$RUNTIME_FILTER" = "csharp" ]; then
+        print_status "Running C# TCK via runtime-specific runner..."
+        if ! run_csharp_tck; then
+            csharp_result=1
+        fi
+    fi
+    
+    # Generate reports only if not filtered to single runtime
+    if [ -z "$RUNTIME_FILTER" ]; then
+        generate_report
+    else
+        print_status "Skipping report generation (single runtime mode)"
+    fi
+    
+    # Exit with error code if any runtime failed
+    local total_failures=$((python_result + csharp_result))
+    if [ $total_failures -gt 0 ]; then
+        print_error "TCK completed with $total_failures runtime failure(s)"
+        exit 1
+    else
+        print_success "All TCK tests completed successfully"
+        exit 0
+    fi
+}
+
+# Run main function
+main "$@"
diff --git a/tck/tck-schema.json b/tck/tck-schema.json
new file mode 100644
index 00000000..16f8329a
--- /dev/null
+++ b/tck/tck-schema.json
@@ -0,0 +1,86 @@
+{
+  "$schema": "http://json-schema.org/draft-07/schema#",
+  "title": "Prompty TCK Test Specification",
+  "description": "Schema for defining TCK test cases",
+  "type": "object",
+  "properties": {
+    "version": {
+      "type": "string",
+      "description": "TCK specification version"
+    },
+    "tests": {
+      "type": "array",
+      "items": {
+        "$ref": "#/definitions/test"
+      }
+    }
+  },
+  "required": ["version", "tests"],
+  "definitions": {
+    "test": {
+      "type": "object",
+      "properties": {
+        "id": {
+          "type": "string",
+          "description": "Unique test identifier"
+        },
+        "name": {
+          "type": "string",
+          "description": "Human-readable test name"
+        },
+        "description": {
+          "type": "string",
+          "description": "Test description"
+        },
+        "category": {
+          "type": "string",
+          "enum": ["specification", "functional", "integration", "error-handling", "performance"],
+          "description": "Test category"
+        },
+        "prompty_file": {
+          "type": "string",
+          "description": "Path to the .prompty file to test"
+        },
+        "input_data": {
+          "type": "object",
+          "description": "Input data for template rendering"
+        },
+        "environment_vars": {
+          "type": "object",
+          "description": "Environment variables to set for the test"
+        },
+        "expected_parsing": {
+          "type": "object",
+          "description": "Expected parsed structure"
+        },
+        "expected_rendering": {
+          "type": "array",
+          "description": "Expected rendered messages"
+        },
+        "expected_errors": {
+          "type": "array",
+          "items": {
+            "type": "object",
+            "properties": {
+              "type": {
+                "type": "string"
+              },
+              "message_pattern": {
+                "type": "string"
+              }
+            }
+          },
+          "description": "Expected error conditions"
+        },
+        "skip_runtimes": {
+          "type": "array",
+          "items": {
+            "type": "string"
+          },
+          "description": "Runtimes to skip for this test"
+        }
+      },
+      "required": ["id", "name", "category", "prompty_file"]
+    }
+  }
+}
diff --git a/tck/tck-tests.json b/tck/tck-tests.json
new file mode 100644
index 00000000..0c1f39db
--- /dev/null
+++ b/tck/tck-tests.json
@@ -0,0 +1,120 @@
+{
+  "version": "1.0.0",
+  "tests": [
+    {
+      "id": "basic-parsing",
+      "name": "Basic YAML Frontmatter Parsing",
+      "description": "Verify that all runtimes parse YAML frontmatter identically",
+      "category": "specification",
+      "prompty_file": "testdata/basic.prompty",
+      "expected_parsing": "expected/basic.prompty.parsed.json"
+    },
+    {
+      "id": "basic-rendering",
+      "name": "Basic Template Rendering",
+      "description": "Verify that template rendering produces identical output",
+      "category": "functional",
+      "prompty_file": "testdata/basic.prompty",
+      "input_data": {
+        "name": "Alice",
+        "age": 30,
+        "question": "What is the meaning of life?"
+      },
+      "expected_rendering": "expected/basic.prompty.rendered.json"
+    },
+    {
+      "id": "env-var-resolution",
+      "name": "Environment Variable Resolution",
+      "description": "Test that environment variables are resolved consistently",
+      "category": "functional",
+      "prompty_file": "testdata/env-vars.prompty",
+      "environment_vars": {
+        "AZURE_OPENAI_ENDPOINT": "https://test.openai.azure.com",
+        "AZURE_OPENAI_DEPLOYMENT": "gpt-4",
+        "MAX_TOKENS": "200"
+      },
+      "input_data": {
+        "context": "The weather is sunny today.",
+        "question": "What's the weather like?"
+      }
+    },
+    {
+      "id": "complex-template",
+      "name": "Complex Jinja2 Template Processing",
+      "description": "Test complex template features like loops and conditionals",
+      "category": "functional",
+      "prompty_file": "testdata/complex-template.prompty",
+      "input_data": {
+        "user_name": "Bob",
+        "items": [
+          {"name": "Apple", "price": 1.50},
+          {"name": "Banana", "price": 0.75}
+        ],
+        "show_prices": true,
+        "currency": "USD"
+      }
+    },
+    {
+      "id": "function-calling-parsing",
+      "name": "Function Calling Configuration",
+      "description": "Test that function calling configuration is parsed correctly",
+      "category": "specification",
+      "prompty_file": "testdata/function-calling.prompty"
+    },
+    {
+      "id": "missing-required-input",
+      "name": "Missing Required Input Error",
+      "description": "Test error handling for missing required inputs",
+      "category": "error-handling",
+      "prompty_file": "testdata/basic.prompty",
+      "input_data": {
+        "age": 30
+      },
+      "expected_errors": [
+        {
+          "type": "ValidationError",
+          "message_pattern": ".*required.*name.*"
+        }
+      ]
+    },
+    {
+      "id": "invalid-yaml",
+      "name": "Invalid YAML Frontmatter",
+      "description": "Test error handling for malformed YAML",
+      "category": "error-handling",
+      "prompty_file": "testdata/invalid-yaml.prompty",
+      "expected_errors": [
+        {
+          "type": "YAMLError",
+          "message_pattern": ".*invalid.*yaml.*"
+        }
+      ]
+    },
+    {
+      "id": "conditional-rendering",
+      "name": "Conditional Template Rendering",
+      "description": "Test conditional template logic with Jinja2",
+      "category": "functional", 
+      "prompty_file": "testdata/conditional-template.prompty",
+      "input_data": {
+        "user_name": "Bob",
+        "show_details": false,
+        "items": ["Item 1", "Item 2", "Item 3"]
+      }
+    },
+    {
+      "id": "missing-required-input",
+      "name": "Missing Required Input",
+      "description": "Test validation of required inputs",
+      "category": "error-handling",
+      "prompty_file": "testdata/missing-input.prompty",
+      "input_data": {},
+      "expected_errors": [
+        {
+          "type": "ValidationError",
+          "message_pattern": ".*required.*"
+        }
+      ]
+    }
+  ]
+}
diff --git a/tck/testdata/basic.prompty b/tck/testdata/basic.prompty
new file mode 100644
index 00000000..66684b70
--- /dev/null
+++ b/tck/testdata/basic.prompty
@@ -0,0 +1,43 @@
+---
+name: "Basic Compatibility Test"
+description: "Simple test for basic functionality"
+version: "1.0"
+authors:
+  - "tck-team"
+tags:
+  - "basic"
+  - "compatibility"
+model:
+  api: chat
+  configuration:
+    type: openai
+    model: gpt-3.5-turbo
+  parameters:
+    max_tokens: 100
+    temperature: 0.0
+sample:
+  name: "Alice"
+  age: 30
+  question: "What is the meaning of life?"
+inputs:
+  name:
+    type: string
+    description: "User's name"
+    required: true
+  age:
+    type: number
+    description: "User's age"
+    default: 25
+  question:
+    type: string
+    description: "Question to ask"
+    required: true
+template:
+  format: jinja2
+  parser: prompty
+---
+system:
+You are a helpful assistant. Answer questions for {{name}} who is {{age}} years old.
+
+user:
+{{question}}
diff --git a/tck/testdata/complex-template.prompty b/tck/testdata/complex-template.prompty
new file mode 100644
index 00000000..f1e8fda3
--- /dev/null
+++ b/tck/testdata/complex-template.prompty
@@ -0,0 +1,59 @@
+---
+name: "Complex Template Test"
+description: "Test complex template features including conditionals and loops"
+version: "1.0"
+model:
+  api: chat
+  configuration:
+    type: openai
+    model: gpt-4
+  parameters:
+    max_tokens: 500
+    temperature: 0.1
+sample:
+  user_name: "Bob"
+  items:
+    - name: "Apple"
+      price: 1.50
+    - name: "Banana"
+      price: 0.75
+  show_prices: true
+  currency: "USD"
+inputs:
+  user_name:
+    type: string
+    required: true
+  items:
+    type: array
+    items:
+      type: object
+      properties:
+        name:
+          type: string
+        price:
+          type: number
+  show_prices:
+    type: boolean
+    default: false
+  currency:
+    type: string
+    default: "USD"
+template:
+  format: jinja2
+---
+system:
+You are a shopping assistant for {{user_name}}.
+
+Here are the available items:
+{% for item in items %}
+- {{item.name}}{% if show_prices %} - {{item.price}} {{currency}}{% endif %}
+{% endfor %}
+
+{% if items|length > 0 %}
+Total items: {{items|length}}
+{% else %}
+No items available.
+{% endif %}
+
+user:
+Help me with my shopping list.
diff --git a/tck/testdata/conditional-template.prompty b/tck/testdata/conditional-template.prompty
new file mode 100644
index 00000000..08f73b64
--- /dev/null
+++ b/tck/testdata/conditional-template.prompty
@@ -0,0 +1,37 @@
+---
+name: conditional-template
+description: Test conditional rendering
+version: 1.0
+model:
+  api: openai
+  configuration:
+    type: azure_openai
+    azure_endpoint: https://api.openai.com/v1
+  parameters:
+    model: gpt-4
+    max_tokens: 200
+    temperature: 0.3
+inputs:
+  show_details:
+    type: boolean
+    description: Whether to show detailed information
+  user_name:
+    type: string
+    description: Name of the user
+  items:
+    type: array
+    description: List of items
+template:
+  type: jinja2
+  parser: prompty
+---
+Hello {{user_name}}!
+
+{% if show_details %}
+Detailed view:
+{% for item in items %}
+- {{item}}
+{% endfor %}
+{% else %}
+Simple view: {{items|length}} items
+{% endif %}
diff --git a/tck/testdata/env-vars.prompty b/tck/testdata/env-vars.prompty
new file mode 100644
index 00000000..b6e99b69
--- /dev/null
+++ b/tck/testdata/env-vars.prompty
@@ -0,0 +1,34 @@
+---
+name: "Environment Variable Test"
+description: "Test environment variable resolution"
+version: "1.0"
+authors:
+  - "tck-team"
+model:
+  api: chat
+  configuration:
+    type: azure_openai
+    azure_endpoint: ${env:AZURE_OPENAI_ENDPOINT}
+    azure_deployment: ${env:AZURE_OPENAI_DEPLOYMENT:gpt-35-turbo}
+    api_version: "2024-07-01-preview"
+  parameters:
+    max_tokens: ${env:MAX_TOKENS:150}
+    temperature: 0.0
+sample:
+  context: "The weather is sunny today."
+  question: "What's the weather like?"
+inputs:
+  context:
+    type: string
+    description: "Context information"
+  question:
+    type: string
+    description: "User's question"
+---
+system:
+You are a helpful assistant. Use the following context to answer questions.
+
+Context: {{context}}
+
+user:
+{{question}}
diff --git a/tck/testdata/function-calling.prompty b/tck/testdata/function-calling.prompty
new file mode 100644
index 00000000..69f2875f
--- /dev/null
+++ b/tck/testdata/function-calling.prompty
@@ -0,0 +1,40 @@
+---
+name: "Function Calling Test"
+description: "Test function calling capabilities"
+version: "1.0"
+model:
+  api: chat
+  configuration:
+    type: openai
+    model: gpt-4
+  parameters:
+    max_tokens: 300
+    temperature: 0.0
+    tools:
+      - type: function
+        function:
+          name: get_weather
+          description: Get current weather for a location
+          parameters:
+            type: object
+            properties:
+              location:
+                type: string
+                description: City name
+              unit:
+                type: string
+                enum: ["celsius", "fahrenheit"]
+                default: "celsius"
+            required: ["location"]
+sample:
+  question: "What's the weather like in Paris?"
+inputs:
+  question:
+    type: string
+    required: true
+---
+system:
+You are a helpful assistant with access to weather information.
+
+user:
+{{question}}
diff --git a/tck/testdata/invalid-yaml.prompty b/tck/testdata/invalid-yaml.prompty
new file mode 100644
index 00000000..a048740b
--- /dev/null
+++ b/tck/testdata/invalid-yaml.prompty
@@ -0,0 +1,8 @@
+---
+name: "Invalid YAML Test"
+description: "Test with malformed YAML frontmatter"
+invalid_yaml: [unclosed array
+model:
+  api: chat
+---
+This should fail to parse.
diff --git a/tck/testdata/missing-input.prompty b/tck/testdata/missing-input.prompty
new file mode 100644
index 00000000..b42b5a69
--- /dev/null
+++ b/tck/testdata/missing-input.prompty
@@ -0,0 +1,22 @@
+---
+name: missing-input
+description: Test with missing required input
+version: 1.0
+model:
+  api: openai
+  configuration:
+    type: azure_openai
+    azure_endpoint: https://api.openai.com/v1
+  parameters:
+    model: gpt-4
+    max_tokens: 100
+inputs:
+  required_field:
+    type: string
+    description: This field is required
+    required: true
+template:
+  type: jinja2
+  parser: prompty
+---
+You said: {{required_field}}
diff --git a/tck/tools/check_compatibility_threshold.py b/tck/tools/check_compatibility_threshold.py
new file mode 100755
index 00000000..b1679edb
--- /dev/null
+++ b/tck/tools/check_compatibility_threshold.py
@@ -0,0 +1,124 @@
+#!/usr/bin/env python3
+"""
+Check compatibility threshold for TCK results.
+
+This tool validates that the compatibility rate between runtimes
+meets the minimum threshold requirements.
+"""
+
+import argparse
+import json
+import sys
+from pathlib import Path
+
+
+def check_compatibility_threshold(report_file: Path, threshold: float) -> bool:
+    """
+    Check if compatibility rate meets the threshold.
+    
+    Args:
+        report_file: Path to the compatibility report JSON file
+        threshold: Minimum compatibility rate (0-100)
+        
+    Returns:
+        True if threshold is met, False otherwise
+    """
+    try:
+        with open(report_file, 'r') as f:
+            report = json.load(f)
+        
+        # Handle both report formats
+        if isinstance(report, list):
+            # List format: array of test results
+            total_tests = len(report)
+            compatible_tests = len([test for test in report if test.get('compatible', False)])
+            compatibility_rate = (compatible_tests / total_tests) * 100 if total_tests > 0 else 0
+            
+            print(f"Current compatibility rate: {compatibility_rate:.1f}%")
+            print(f"Required threshold: {threshold:.1f}%")
+            
+            if compatibility_rate >= threshold:
+                print(f"✅ Compatibility threshold met ({compatibility_rate:.1f}% >= {threshold:.1f}%)")
+                return True
+            else:
+                print(f"❌ Compatibility threshold not met ({compatibility_rate:.1f}% < {threshold:.1f}%)")
+                
+                # Show incompatible tests
+                incompatible_tests = [test for test in report if not test.get('compatible', False)]
+                if incompatible_tests:
+                    print(f"\nIncompatible tests: {len(incompatible_tests)}")
+                    for i, test in enumerate(incompatible_tests[:5]):
+                        test_id = test.get('test_id', f'test_{i}')
+                        differences = test.get('differences', [])
+                        if differences:
+                            reason = differences[0].get('type', 'Unknown reason')
+                        else:
+                            reason = 'No differences recorded'
+                        print(f"  - {test_id}: {reason}")
+                    
+                    if len(incompatible_tests) > 5:
+                        print(f"  ... and {len(incompatible_tests) - 5} more")
+                
+                return False
+        else:
+            # Object format: report with overall_compatibility_rate
+            compatibility_rate = report.get('overall_compatibility_rate', 0) * 100
+            
+            print(f"Current compatibility rate: {compatibility_rate:.1f}%")
+            print(f"Required threshold: {threshold:.1f}%")
+            
+            if compatibility_rate >= threshold:
+                print(f"✅ Compatibility threshold met ({compatibility_rate:.1f}% >= {threshold:.1f}%)")
+                return True
+            else:
+                print(f"❌ Compatibility threshold not met ({compatibility_rate:.1f}% < {threshold:.1f}%)")
+                
+                # Show which tests are causing issues
+                if 'incompatible_tests' in report:
+                    incompatible_count = len(report['incompatible_tests'])
+                    print(f"\nIncompatible tests: {incompatible_count}")
+                    
+                    # Show a few examples
+                    for i, test in enumerate(report['incompatible_tests'][:5]):
+                        test_id = test.get('test_id', f'test_{i}')
+                        reason = test.get('reason', 'Unknown reason')
+                        print(f"  - {test_id}: {reason}")
+                    
+                    if incompatible_count > 5:
+                        print(f"  ... and {incompatible_count - 5} more")
+                
+                return False
+            
+    except FileNotFoundError:
+        print(f"❌ Report file not found: {report_file}")
+        return False
+    except json.JSONDecodeError as e:
+        print(f"❌ Invalid JSON in report file: {e}")
+        return False
+    except Exception as e:
+        print(f"❌ Error reading report: {e}")
+        return False
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Check TCK compatibility threshold")
+    parser.add_argument("report_file", type=Path, help="Path to compatibility report JSON file")
+    parser.add_argument("--threshold", type=float, default=80.0, 
+                       help="Minimum compatibility rate threshold (default: 80.0)")
+    
+    args = parser.parse_args()
+    
+    if not args.report_file.exists():
+        print(f"❌ Report file does not exist: {args.report_file}")
+        sys.exit(1)
+    
+    if not (0 <= args.threshold <= 100):
+        print("❌ Threshold must be between 0 and 100")
+        sys.exit(1)
+    
+    success = check_compatibility_threshold(args.report_file, args.threshold)
+    sys.exit(0 if success else 1)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tck/tools/compare_runtimes.py b/tck/tools/compare_runtimes.py
new file mode 100644
index 00000000..b5918560
--- /dev/null
+++ b/tck/tools/compare_runtimes.py
@@ -0,0 +1,374 @@
+#!/usr/bin/env python3
+"""
+Cross-runtime comparison tool for Prompty TCK results.
+"""
+
+import json
+import argparse
+import sys
+from typing import Dict, List, Any, Optional
+from dataclasses import dataclass
+from pathlib import Path
+import difflib
+
+
+@dataclass
+class RuntimeResult:
+    runtime: str
+    test_id: str
+    result: str
+    execution_time_ms: float
+    output: Any
+    error_message: Optional[str] = None
+    error_type: Optional[str] = None
+
+
+@dataclass
+class ComparisonResult:
+    test_id: str
+    compatible: bool
+    runtimes_tested: List[str]
+    differences: List[Dict[str, Any]]
+    notes: Optional[str] = None
+
+
+def normalize_output(output: Any) -> Any:
+    """Normalize output for cross-runtime comparison."""
+    if isinstance(output, dict):
+        # Sort keys for consistent ordering
+        return {k: normalize_output(v) for k, v in sorted(output.items())}
+    elif isinstance(output, list):
+        return [normalize_output(item) for item in output]
+    elif output is None:
+        return None
+    elif isinstance(output, (int, float, str, bool)):
+        return output
+    else:
+        # Convert other types to string representation
+        return str(output)
+
+
+def load_runtime_results(file_path: str) -> List[RuntimeResult]:
+    """Load test results from a runtime result file."""
+    with open(file_path, 'r') as f:
+        data = json.load(f)
+    
+    # Handle both formats: simple array and metadata wrapper
+    runtime_name = 'unknown'
+    if isinstance(data, list):
+        # Simple array format (like Python TCK)
+        test_results = data
+    elif isinstance(data, dict) and 'results' in data:
+        # Metadata wrapper format (like C# TCK)
+        test_results = data['results']
+        # Extract runtime from metadata if available
+        if 'metadata' in data and 'runtime' in data['metadata']:
+            runtime_name = data['metadata']['runtime']
+    else:
+        raise ValueError(f"Unknown result format in {file_path}")
+    
+    results = []
+    for item in test_results:
+        # Handle different field names between implementations
+        runtime = item.get('runtime') or runtime_name
+        test_id = item.get('test_id') or item.get('id', 'unknown')
+        result = item.get('result') or item.get('status', 'unknown')
+        execution_time = item.get('execution_time_ms') or item.get('execution_time_ms', 0)
+        output = item.get('output') or item.get('actual')
+        error_message = item.get('error_message') or item.get('error') or item.get('message')
+        error_type = item.get('error_type')
+        
+        results.append(RuntimeResult(
+            runtime=runtime,
+            test_id=test_id,
+            result=result,
+            execution_time_ms=execution_time,
+            output=output,
+            error_message=error_message,
+            error_type=error_type
+        ))
+    
+    return results
+
+
+def compare_outputs(output1: Any, output2: Any, path: str = "") -> List[Dict[str, Any]]:
+    """Compare two outputs and return list of differences."""
+    differences = []
+    
+    norm1 = normalize_output(output1)
+    norm2 = normalize_output(output2)
+    
+    if norm1 != norm2:
+        if isinstance(norm1, dict) and isinstance(norm2, dict):
+            # Compare dictionaries
+            all_keys = set(norm1.keys()) | set(norm2.keys())
+            for key in all_keys:
+                key_path = f"{path}.{key}" if path else key
+                if key not in norm1:
+                    differences.append({
+                        "path": key_path,
+                        "type": "missing_key",
+                        "runtime1_value": None,
+                        "runtime2_value": norm2[key]
+                    })
+                elif key not in norm2:
+                    differences.append({
+                        "path": key_path,
+                        "type": "extra_key",
+                        "runtime1_value": norm1[key],
+                        "runtime2_value": None
+                    })
+                else:
+                    differences.extend(compare_outputs(norm1[key], norm2[key], key_path))
+        
+        elif isinstance(norm1, list) and isinstance(norm2, list):
+            # Compare lists
+            max_len = max(len(norm1), len(norm2))
+            for i in range(max_len):
+                item_path = f"{path}[{i}]" if path else f"[{i}]"
+                if i >= len(norm1):
+                    differences.append({
+                        "path": item_path,
+                        "type": "missing_item",
+                        "runtime1_value": None,
+                        "runtime2_value": norm2[i]
+                    })
+                elif i >= len(norm2):
+                    differences.append({
+                        "path": item_path,
+                        "type": "extra_item",
+                        "runtime1_value": norm1[i],
+                        "runtime2_value": None
+                    })
+                else:
+                    differences.extend(compare_outputs(norm1[i], norm2[i], item_path))
+        
+        else:
+            # Direct value comparison
+            differences.append({
+                "path": path or "root",
+                "type": "value_difference",
+                "runtime1_value": norm1,
+                "runtime2_value": norm2
+            })
+    
+    return differences
+
+
+def compare_runtimes(runtime_results: Dict[str, List[RuntimeResult]]) -> List[ComparisonResult]:
+    """Compare results across multiple runtimes."""
+    if len(runtime_results) < 2:
+        print("Need at least 2 runtimes for comparison")
+        return []
+    
+    # Get all test IDs
+    all_test_ids = set()
+    for results in runtime_results.values():
+        all_test_ids.update(result.test_id for result in results)
+    
+    # Create lookup dictionaries
+    runtime_lookups = {}
+    for runtime_name, results in runtime_results.items():
+        runtime_lookups[runtime_name] = {result.test_id: result for result in results}
+    
+    comparison_results = []
+    runtimes_list = list(runtime_results.keys())
+    
+    for test_id in sorted(all_test_ids):
+        # Get results for this test from all runtimes
+        test_results = {}
+        runtimes_tested = []
+        
+        for runtime_name in runtimes_list:
+            if test_id in runtime_lookups[runtime_name]:
+                test_results[runtime_name] = runtime_lookups[runtime_name][test_id]
+                runtimes_tested.append(runtime_name)
+        
+        if len(test_results) < 2:
+            # Not enough runtimes have this test
+            comparison_results.append(ComparisonResult(
+                test_id=test_id,
+                compatible=False,
+                runtimes_tested=runtimes_tested,
+                differences=[],
+                notes=f"Test only available in {len(test_results)} runtime(s): {', '.join(runtimes_tested)}"
+            ))
+            continue
+        
+        # Compare all pairs
+        all_compatible = True
+        all_differences = []
+        
+        runtime_pairs = [(runtimes_list[i], runtimes_list[j]) 
+                        for i in range(len(runtimes_list)) 
+                        for j in range(i + 1, len(runtimes_list))
+                        if runtimes_list[i] in test_results and runtimes_list[j] in test_results]
+        
+        for runtime1, runtime2 in runtime_pairs:
+            result1 = test_results[runtime1]
+            result2 = test_results[runtime2]
+            
+            # Compare result status
+            if result1.result != result2.result:
+                all_compatible = False
+                all_differences.append({
+                    "type": "result_status",
+                    "runtime1": runtime1,
+                    "runtime2": runtime2,
+                    "runtime1_status": result1.result,
+                    "runtime2_status": result2.result
+                })
+            
+            # If both passed, compare outputs
+            if result1.result == "pass" and result2.result == "pass":
+                output_diffs = compare_outputs(result1.output, result2.output)
+                if output_diffs:
+                    all_compatible = False
+                    for diff in output_diffs:
+                        diff.update({
+                            "runtime1": runtime1,
+                            "runtime2": runtime2
+                        })
+                    all_differences.extend(output_diffs)
+            
+            # If both errored, compare error types
+            elif result1.result == "error" and result2.result == "error":
+                if result1.error_type != result2.error_type:
+                    all_compatible = False
+                    all_differences.append({
+                        "type": "error_type",
+                        "runtime1": runtime1,
+                        "runtime2": runtime2,
+                        "runtime1_error_type": result1.error_type,
+                        "runtime2_error_type": result2.error_type
+                    })
+        
+        comparison_results.append(ComparisonResult(
+            test_id=test_id,
+            compatible=all_compatible,
+            runtimes_tested=runtimes_tested,
+            differences=all_differences
+        ))
+    
+    return comparison_results
+
+
+def generate_report(comparison_results: List[ComparisonResult], output_file: Optional[str] = None):
+    """Generate a compatibility report."""
+    total_tests = len(comparison_results)
+    compatible_tests = sum(1 for result in comparison_results if result.compatible)
+    compatibility_rate = (compatible_tests / total_tests * 100) if total_tests > 0 else 0
+    
+    report_lines = [
+        "# Prompty Runtime Compatibility Report",
+        "",
+        f"**Overall Compatibility Rate: {compatibility_rate:.1f}% ({compatible_tests}/{total_tests})**",
+        "",
+        "## Summary",
+        "",
+        f"- Total tests: {total_tests}",
+        f"- Compatible tests: {compatible_tests}",
+        f"- Incompatible tests: {total_tests - compatible_tests}",
+        "",
+        "## Test Results",
+        ""
+    ]
+    
+    for result in comparison_results:
+        status_icon = "✅" if result.compatible else "❌"
+        report_lines.append(f"### {status_icon} {result.test_id}")
+        report_lines.append(f"- **Status**: {'Compatible' if result.compatible else 'Incompatible'}")
+        report_lines.append(f"- **Runtimes tested**: {', '.join(result.runtimes_tested)}")
+        
+        if result.notes:
+            report_lines.append(f"- **Notes**: {result.notes}")
+        
+        if result.differences:
+            report_lines.append("- **Differences**:")
+            for diff in result.differences:
+                if diff["type"] == "result_status":
+                    report_lines.append(f"  - Result status differs: {diff['runtime1']}={diff['runtime1_status']} vs {diff['runtime2']}={diff['runtime2_status']}")
+                elif diff["type"] == "error_type":
+                    report_lines.append(f"  - Error type differs: {diff['runtime1']}={diff['runtime1_error_type']} vs {diff['runtime2']}={diff['runtime2_error_type']}")
+                elif diff["type"] == "value_difference":
+                    report_lines.append(f"  - Value at `{diff['path']}`: {diff['runtime1']}={diff['runtime1_value']} vs {diff['runtime2']}={diff['runtime2_value']}")
+                elif diff["type"] in ["missing_key", "extra_key", "missing_item", "extra_item"]:
+                    report_lines.append(f"  - {diff['type']} at `{diff['path']}`: {diff['runtime1']}={diff['runtime1_value']} vs {diff['runtime2']}={diff['runtime2_value']}")
+        
+        report_lines.append("")
+    
+    # Add incompatible tests summary
+    incompatible_tests = [r for r in comparison_results if not r.compatible]
+    if incompatible_tests:
+        report_lines.extend([
+            "## Incompatible Tests Summary",
+            "",
+            "The following tests show differences between runtimes:",
+            ""
+        ])
+        
+        for result in incompatible_tests:
+            report_lines.append(f"- **{result.test_id}**: {len(result.differences)} differences")
+    
+    report_content = "\n".join(report_lines)
+    
+    if output_file:
+        with open(output_file, 'w') as f:
+            f.write(report_content)
+        print(f"Report written to {output_file}")
+    else:
+        print(report_content)
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Compare Prompty TCK results across runtimes")
+    parser.add_argument("result_files", nargs="+", help="Runtime result JSON files")
+    parser.add_argument("--output", "-o", help="Output report file (default: stdout)")
+    parser.add_argument("--format", choices=["markdown", "json"], default="markdown", help="Output format")
+    
+    args = parser.parse_args()
+    
+    if len(args.result_files) < 2:
+        print("Error: Need at least 2 runtime result files for comparison")
+        sys.exit(1)
+    
+    # Load results from all files
+    runtime_results = {}
+    for file_path in args.result_files:
+        try:
+            results = load_runtime_results(file_path)
+            if results:
+                runtime_name = results[0].runtime
+                runtime_results[runtime_name] = results
+                print(f"Loaded {len(results)} results for {runtime_name}")
+        except Exception as e:
+            print(f"Error loading {file_path}: {e}")
+            sys.exit(1)
+    
+    # Compare runtimes
+    comparison_results = compare_runtimes(runtime_results)
+    
+    if args.format == "json":
+        # JSON output
+        json_output = []
+        for result in comparison_results:
+            json_output.append({
+                "test_id": result.test_id,
+                "compatible": result.compatible,
+                "runtimes_tested": result.runtimes_tested,
+                "differences": result.differences,
+                "notes": result.notes
+            })
+        
+        if args.output:
+            with open(args.output, 'w') as f:
+                json.dump(json_output, f, indent=2)
+        else:
+            print(json.dumps(json_output, indent=2))
+    else:
+        # Markdown report
+        generate_report(comparison_results, args.output)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tck/tools/performance_monitor.py b/tck/tools/performance_monitor.py
new file mode 100644
index 00000000..811abcba
--- /dev/null
+++ b/tck/tools/performance_monitor.py
@@ -0,0 +1,337 @@
+#!/usr/bin/env python3
+"""
+Prompty TCK Performance Monitor
+
+This script analyzes TCK results to track performance metrics and detect regressions.
+"""
+
+import json
+import argparse
+import os
+import sys
+import time
+from pathlib import Path
+from typing import Dict, List, Any
+import statistics
+
+class PerformanceMonitor:
+    def __init__(self, results_dir: str, baseline_file: str = None):
+        self.results_dir = Path(results_dir)
+        self.baseline_file = baseline_file
+        self.metrics = {}
+        
+    def analyze_results(self) -> Dict[str, Any]:
+        """Analyze TCK results for performance metrics."""
+        
+        performance_data = {
+            'timestamp': time.time(),
+            'runtimes': {},
+            'summary': {}
+        }
+        
+        # Process each runtime's results
+        for runtime in ['python', 'csharp', 'java']:
+            result_file = self.results_dir / f"{runtime}-results.json"
+            if result_file.exists():
+                runtime_metrics = self._analyze_runtime_results(result_file, runtime)
+                performance_data['runtimes'][runtime] = runtime_metrics
+        
+        # Calculate cross-runtime summary
+        performance_data['summary'] = self._calculate_summary(performance_data['runtimes'])
+        
+        return performance_data
+    
+    def _analyze_runtime_results(self, result_file: Path, runtime: str) -> Dict[str, Any]:
+        """Analyze results for a specific runtime."""
+        
+        with open(result_file, 'r') as f:
+            results = json.load(f)
+        
+        metrics = {
+            'runtime': runtime,
+            'total_tests': 0,
+            'passed_tests': 0,
+            'failed_tests': 0,
+            'error_tests': 0,
+            'execution_times': [],
+            'memory_usage': [],
+            'test_breakdown': {}
+        }
+        
+        if 'results' in results:
+            for test_result in results['results']:
+                metrics['total_tests'] += 1
+                
+                status = test_result.get('status', 'unknown')
+                if status == 'pass':
+                    metrics['passed_tests'] += 1
+                elif status == 'fail':
+                    metrics['failed_tests'] += 1
+                elif status == 'error':
+                    metrics['error_tests'] += 1
+                
+                # Collect execution time if available
+                exec_time = test_result.get('execution_time_ms', 0)
+                if exec_time > 0:
+                    metrics['execution_times'].append(exec_time)
+                
+                # Collect memory usage if available
+                memory = test_result.get('memory_usage_mb', 0)
+                if memory > 0:
+                    metrics['memory_usage'].append(memory)
+                
+                # Test type breakdown
+                test_type = test_result.get('type', 'unknown')
+                if test_type not in metrics['test_breakdown']:
+                    metrics['test_breakdown'][test_type] = {'count': 0, 'passed': 0}
+                metrics['test_breakdown'][test_type]['count'] += 1
+                if status == 'pass':
+                    metrics['test_breakdown'][test_type]['passed'] += 1
+        
+        # Calculate statistics
+        if metrics['execution_times']:
+            metrics['avg_execution_time'] = statistics.mean(metrics['execution_times'])
+            metrics['median_execution_time'] = statistics.median(metrics['execution_times'])
+            metrics['max_execution_time'] = max(metrics['execution_times'])
+            metrics['min_execution_time'] = min(metrics['execution_times'])
+        
+        if metrics['memory_usage']:
+            metrics['avg_memory_usage'] = statistics.mean(metrics['memory_usage'])
+            metrics['peak_memory_usage'] = max(metrics['memory_usage'])
+        
+        metrics['success_rate'] = (
+            metrics['passed_tests'] / metrics['total_tests'] 
+            if metrics['total_tests'] > 0 else 0
+        )
+        
+        return metrics
+    
+    def _calculate_summary(self, runtime_metrics: Dict[str, Dict]) -> Dict[str, Any]:
+        """Calculate cross-runtime summary metrics."""
+        
+        summary = {
+            'total_runtimes': len(runtime_metrics),
+            'overall_success_rate': 0,
+            'fastest_runtime': None,
+            'slowest_runtime': None,
+            'most_memory_efficient': None,
+            'compatibility_matrix': {}
+        }
+        
+        if not runtime_metrics:
+            return summary
+        
+        # Calculate overall success rate
+        total_tests = sum(m['total_tests'] for m in runtime_metrics.values())
+        total_passed = sum(m['passed_tests'] for m in runtime_metrics.values())
+        summary['overall_success_rate'] = total_passed / total_tests if total_tests > 0 else 0
+        
+        # Find fastest/slowest runtimes
+        avg_times = {}
+        for runtime, metrics in runtime_metrics.items():
+            if 'avg_execution_time' in metrics:
+                avg_times[runtime] = metrics['avg_execution_time']
+        
+        if avg_times:
+            summary['fastest_runtime'] = min(avg_times, key=avg_times.get)
+            summary['slowest_runtime'] = max(avg_times, key=avg_times.get)
+        
+        # Find most memory efficient
+        avg_memory = {}
+        for runtime, metrics in runtime_metrics.items():
+            if 'avg_memory_usage' in metrics:
+                avg_memory[runtime] = metrics['avg_memory_usage']
+        
+        if avg_memory:
+            summary['most_memory_efficient'] = min(avg_memory, key=avg_memory.get)
+        
+        # Compatibility matrix
+        for runtime, metrics in runtime_metrics.items():
+            summary['compatibility_matrix'][runtime] = {
+                'success_rate': metrics['success_rate'],
+                'total_tests': metrics['total_tests'],
+                'test_types': list(metrics['test_breakdown'].keys())
+            }
+        
+        return summary
+    
+    def compare_with_baseline(self, current_data: Dict[str, Any]) -> Dict[str, Any]:
+        """Compare current results with baseline if available."""
+        
+        if not self.baseline_file or not os.path.exists(self.baseline_file):
+            return {'baseline_available': False}
+        
+        with open(self.baseline_file, 'r') as f:
+            baseline_data = json.load(f)
+        
+        comparison = {
+            'baseline_available': True,
+            'regressions': [],
+            'improvements': [],
+            'performance_delta': {}
+        }
+        
+        # Compare runtime performance
+        for runtime in current_data['runtimes']:
+            if runtime in baseline_data.get('runtimes', {}):
+                current_metrics = current_data['runtimes'][runtime]
+                baseline_metrics = baseline_data['runtimes'][runtime]
+                
+                # Compare execution time
+                current_time = current_metrics.get('avg_execution_time', 0)
+                baseline_time = baseline_metrics.get('avg_execution_time', 0)
+                
+                if baseline_time > 0:
+                    time_delta = ((current_time - baseline_time) / baseline_time) * 100
+                    comparison['performance_delta'][runtime] = {
+                        'execution_time_change_percent': time_delta
+                    }
+                    
+                    # Flag significant regressions/improvements
+                    if time_delta > 20:  # 20% slower
+                        comparison['regressions'].append({
+                            'runtime': runtime,
+                            'type': 'execution_time',
+                            'change_percent': time_delta
+                        })
+                    elif time_delta < -20:  # 20% faster
+                        comparison['improvements'].append({
+                            'runtime': runtime,
+                            'type': 'execution_time',
+                            'change_percent': abs(time_delta)
+                        })
+                
+                # Compare success rate
+                current_success = current_metrics.get('success_rate', 0)
+                baseline_success = baseline_metrics.get('success_rate', 0)
+                
+                if current_success < baseline_success:
+                    comparison['regressions'].append({
+                        'runtime': runtime,
+                        'type': 'success_rate',
+                        'current': current_success,
+                        'baseline': baseline_success
+                    })
+        
+        return comparison
+    
+    def generate_report(self, output_file: str = None) -> str:
+        """Generate a comprehensive performance report."""
+        
+        performance_data = self.analyze_results()
+        comparison = self.compare_with_baseline(performance_data)
+        
+        report = []
+        report.append("# Prompty TCK Performance Report")
+        report.append(f"Generated at: {time.strftime('%Y-%m-%d %H:%M:%S')}")
+        report.append("")
+        
+        # Summary section
+        summary = performance_data['summary']
+        report.append("## Summary")
+        report.append(f"- Total Runtimes Tested: {summary['total_runtimes']}")
+        report.append(f"- Overall Success Rate: {summary['overall_success_rate']:.2%}")
+        
+        if summary.get('fastest_runtime'):
+            report.append(f"- Fastest Runtime: {summary['fastest_runtime']}")
+        if summary.get('slowest_runtime'):
+            report.append(f"- Slowest Runtime: {summary['slowest_runtime']}")
+        if summary.get('most_memory_efficient'):
+            report.append(f"- Most Memory Efficient: {summary['most_memory_efficient']}")
+        
+        report.append("")
+        
+        # Runtime details
+        report.append("## Runtime Performance Details")
+        for runtime, metrics in performance_data['runtimes'].items():
+            report.append(f"### {runtime.title()} Runtime")
+            report.append(f"- Tests: {metrics['passed_tests']}/{metrics['total_tests']} passed")
+            report.append(f"- Success Rate: {metrics['success_rate']:.2%}")
+            
+            if 'avg_execution_time' in metrics:
+                report.append(f"- Average Execution Time: {metrics['avg_execution_time']:.2f}ms")
+                report.append(f"- Median Execution Time: {metrics['median_execution_time']:.2f}ms")
+                report.append(f"- Execution Time Range: {metrics['min_execution_time']:.2f}ms - {metrics['max_execution_time']:.2f}ms")
+            
+            if 'avg_memory_usage' in metrics:
+                report.append(f"- Average Memory Usage: {metrics['avg_memory_usage']:.2f}MB")
+                report.append(f"- Peak Memory Usage: {metrics['peak_memory_usage']:.2f}MB")
+            
+            report.append("")
+        
+        # Baseline comparison
+        if comparison['baseline_available']:
+            report.append("## Baseline Comparison")
+            
+            if comparison['regressions']:
+                report.append("### ⚠️ Performance Regressions")
+                for regression in comparison['regressions']:
+                    if regression['type'] == 'execution_time':
+                        report.append(f"- {regression['runtime']}: {regression['change_percent']:.1f}% slower")
+                    elif regression['type'] == 'success_rate':
+                        report.append(f"- {regression['runtime']}: Success rate dropped from {regression['baseline']:.2%} to {regression['current']:.2%}")
+                report.append("")
+            
+            if comparison['improvements']:
+                report.append("### ✅ Performance Improvements")
+                for improvement in comparison['improvements']:
+                    report.append(f"- {improvement['runtime']}: {improvement['change_percent']:.1f}% faster")
+                report.append("")
+            
+            if not comparison['regressions'] and not comparison['improvements']:
+                report.append("- No significant performance changes detected")
+                report.append("")
+        
+        # Detailed metrics
+        report.append("## Detailed Metrics")
+        report.append("```json")
+        report.append(json.dumps(performance_data, indent=2))
+        report.append("```")
+        
+        report_text = "\n".join(report)
+        
+        if output_file:
+            with open(output_file, 'w') as f:
+                f.write(report_text)
+            print(f"Performance report written to: {output_file}")
+        
+        return report_text
+
+def main():
+    parser = argparse.ArgumentParser(description="Prompty TCK Performance Monitor")
+    parser.add_argument("--results-dir", default="results", 
+                       help="Directory containing TCK results")
+    parser.add_argument("--baseline", 
+                       help="Baseline performance data file for comparison")
+    parser.add_argument("--output", 
+                       help="Output file for performance report")
+    parser.add_argument("--format", choices=['markdown', 'json'], default='markdown',
+                       help="Output format")
+    parser.add_argument("--save-baseline", 
+                       help="Save current results as new baseline")
+    
+    args = parser.parse_args()
+    
+    if not os.path.exists(args.results_dir):
+        print(f"Error: Results directory '{args.results_dir}' not found")
+        sys.exit(1)
+    
+    monitor = PerformanceMonitor(args.results_dir, args.baseline)
+    
+    if args.format == 'json':
+        data = monitor.analyze_results()
+        output = json.dumps(data, indent=2)
+    else:
+        output = monitor.generate_report(args.output)
+    
+    if args.save_baseline:
+        data = monitor.analyze_results()
+        with open(args.save_baseline, 'w') as f:
+            json.dump(data, f, indent=2)
+        print(f"Baseline saved to: {args.save_baseline}")
+    
+    if not args.output:
+        print(output)
+
+if __name__ == "__main__":
+    main()
diff --git a/tck/tools/requirements.txt b/tck/tools/requirements.txt
new file mode 100644
index 00000000..058bf213
--- /dev/null
+++ b/tck/tools/requirements.txt
@@ -0,0 +1,29 @@
+# Prompty TCK Tools Requirements
+# Install with: pip install -r requirements.txt
+
+# Core dependencies
+pyyaml>=6.0
+jinja2>=3.1.0
+jsonschema>=4.17.0
+
+# Data analysis and reporting
+pandas>=1.5.0
+matplotlib>=3.6.0
+seaborn>=0.12.0
+
+# CLI and utilities
+click>=8.1.0
+rich>=13.0.0
+tabulate>=0.9.0
+
+# Testing and validation
+requests>=2.28.0
+jsonpath-ng>=1.5.0
+
+# Optional: Performance monitoring
+psutil>=5.9.0
+memory-profiler>=0.60.0
+
+# Optional: Advanced reporting
+plotly>=5.11.0
+jinja2>=3.1.0
diff --git a/tck/validate-setup.py b/tck/validate-setup.py
new file mode 100755
index 00000000..f4e50741
--- /dev/null
+++ b/tck/validate-setup.py
@@ -0,0 +1,143 @@
+#!/usr/bin/env python3
+"""
+Validate TCK workflow setup.
+
+This script checks that all necessary components are in place
+for the GitHub Actions TCK workflow to run successfully.
+"""
+
+import os
+import sys
+from pathlib import Path
+
+
+def check_file_exists(path: Path, description: str) -> bool:
+    """Check if a file exists and report the result."""
+    if path.exists():
+        print(f"✅ {description}: {path}")
+        return True
+    else:
+        print(f"❌ {description}: {path} (NOT FOUND)")
+        return False
+
+
+def check_executable(path: Path, description: str) -> bool:
+    """Check if a file exists and is executable."""
+    if path.exists() and os.access(path, os.X_OK):
+        print(f"✅ {description}: {path} (executable)")
+        return True
+    elif path.exists():
+        print(f"⚠️  {description}: {path} (exists but not executable)")
+        return False
+    else:
+        print(f"❌ {description}: {path} (NOT FOUND)")
+        return False
+
+
+def main():
+    """Main validation function."""
+    print("🔍 Validating TCK Workflow Setup")
+    print("=" * 50)
+    
+    # Get the repository root (should be parent of tck directory)
+    tck_dir = Path(__file__).parent
+    repo_root = tck_dir.parent
+    
+    issues = []
+    
+    # Check workflow file
+    workflow_file = repo_root / ".github" / "workflows" / "tck.yml"
+    if not check_file_exists(workflow_file, "GitHub Actions workflow"):
+        issues.append("Missing workflow file")
+    
+    # Check TCK runners
+    main_runner = tck_dir / "run-tck.sh"
+    if not check_executable(main_runner, "Main TCK runner"):
+        issues.append("Main runner not executable")
+    
+    ps_runner = tck_dir / "run-tck.ps1"
+    if not check_file_exists(ps_runner, "PowerShell TCK runner"):
+        issues.append("Missing PowerShell runner")
+    
+    # Check runtime-specific runners
+    python_runner = tck_dir / "python" / "run-tck.sh"
+    if not check_executable(python_runner, "Python TCK runner"):
+        issues.append("Python runner not executable")
+    
+    csharp_runner = tck_dir / "csharp" / "run-tck.sh"
+    if not check_executable(csharp_runner, "C# TCK runner"):
+        issues.append("C# runner not executable")
+    
+    # Check TCK test data
+    test_spec = tck_dir / "tck-tests.json"
+    if not check_file_exists(test_spec, "TCK test specification"):
+        issues.append("Missing test specification")
+    
+    # Check runtime implementations
+    python_tck = tck_dir / "python" / "python_tck.py"
+    if not check_file_exists(python_tck, "Python TCK implementation"):
+        issues.append("Missing Python TCK implementation")
+    
+    csharp_tck = tck_dir / "csharp" / "CSharpTCK.cs"
+    if not check_file_exists(csharp_tck, "C# TCK implementation"):
+        issues.append("Missing C# TCK implementation")
+    
+    csharp_proj = tck_dir / "csharp" / "CSharpTCK.csproj"
+    if not check_file_exists(csharp_proj, "C# project file"):
+        issues.append("Missing C# project file")
+    
+    # Check runtime libraries
+    python_runtime = repo_root / "runtime" / "prompty"
+    if not check_file_exists(python_runtime, "Python runtime directory"):
+        issues.append("Missing Python runtime")
+    
+    csharp_runtime = repo_root / "runtime" / "promptycs"
+    if not check_file_exists(csharp_runtime, "C# runtime directory"):
+        issues.append("Missing C# runtime")
+    
+    # Check comparison tools
+    compare_tool = tck_dir / "tools" / "compare_runtimes.py"
+    if not check_file_exists(compare_tool, "Runtime comparison tool"):
+        issues.append("Missing comparison tool")
+    
+    threshold_tool = tck_dir / "tools" / "check_compatibility_threshold.py"
+    if not check_executable(threshold_tool, "Compatibility threshold checker"):
+        issues.append("Threshold checker not executable")
+    
+    # Check directories
+    required_dirs = [
+        (tck_dir / "testdata", "Test data directory"),
+        (tck_dir / "expected", "Expected results directory"),
+        (tck_dir / "results", "Results directory (may be created)"),
+        (tck_dir / "reports", "Reports directory (may be created)")
+    ]
+    
+    for dir_path, description in required_dirs:
+        if dir_path.exists():
+            print(f"✅ {description}: {dir_path}")
+        else:
+            print(f"⚠️  {description}: {dir_path} (will be created if needed)")
+    
+    # Summary
+    print("\n" + "=" * 50)
+    if issues:
+        print(f"❌ Found {len(issues)} issue(s):")
+        for issue in issues:
+            print(f"   - {issue}")
+        print("\nPlease fix these issues before running the TCK workflow.")
+        return 1
+    else:
+        print("✅ All checks passed! TCK workflow should work correctly.")
+        
+        # Additional recommendations
+        print("\n🔧 Recommendations:")
+        print("   - Test locally: ./run-tck.sh")
+        print("   - Verify .NET version: dotnet --version")
+        print("   - Check Python version: python --version")
+        print("   - Review workflow: .github/workflows/tck.yml")
+        
+        return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())