diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 5579dc1..96adae6 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -1,63 +1,31 @@
-name: CI
+name: Check
 
 on:
-  workflow_dispatch:
-  pull_request:
   push:
     branches:
       - main
       - 'renovate/**'
+  pull_request:
 
 jobs:
-  check:
-    name: Code Quality Checks
+  python:
     runs-on: ubuntu-latest
-
     steps:
-      - name: Clone the code
-        uses: 'actions/checkout@v5'
+      - name: Checkout
+        uses: 'actions/checkout@v6'
 
       - name: Install uv
-        uses: 'astral-sh/setup-uv@v6'
+        uses: 'astral-sh/setup-uv@v7'
         with:
-          version: "0.8.17"
+          version: "0.9.26"
           enable-cache: true
 
-      - name: Setup Python
-        uses: 'actions/setup-python@v6'
-        with:
-          python-version-file: ".python-version"
-
       - name: Install Dependencies
         run: uv sync
 
       - name: Run Checks
         run: uv run poe check
 
-  test:
-    name: Unit Tests
-    runs-on: ubuntu-latest
-    permissions:
-      contents: 'read'
-
-    steps:
-      - name: Clone the code
-        uses: 'actions/checkout@v5'
-
-      - name: Install uv
-        uses: 'astral-sh/setup-uv@v6'
-        with:
-          version: "0.8.17"
-          enable-cache: true
-
-      - name: Setup Python
-        uses: 'actions/setup-python@v6'
-        with:
-          python-version-file: ".python-version"
-
-      - name: Install Dependencies
-        run: uv sync
-
       - name: Run Tests
         run: uv run poe test
 
@@ -83,6 +51,20 @@ jobs:
         with:
           github_token: ${{ secrets.GITHUB_TOKEN }}
 
+  helm:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: 'actions/checkout@v6'
+
+      - name: Install Helm
+        uses: azure/setup-helm@v4
+        with:
+          version: 'v3.14.0'
+
+      - name: Lint Helm Chart
+        run: helm lint chart/
+
   test-e2e:
     name: E2E Tests
     runs-on: ubuntu-latest
@@ -92,8 +74,8 @@ jobs:
       GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
 
     steps:
-      - name: Clone the code
-        uses: 'actions/checkout@v5'
+      - name: Checkout
+        uses: 'actions/checkout@v6'
 
       - name: Extract image tag
         id: extract-tag
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index c51fc06..2201ba6 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -12,11 +12,10 @@ jobs:
     permissions:
       contents: write
       packages: write
-      id-token: write
 
     steps:
-      - name: 'Checkout'
-        uses: 'actions/checkout@v5'
+      - name: Checkout
+        uses: 'actions/checkout@v6'
         with:
           fetch-depth: 0
 
@@ -50,3 +49,66 @@ jobs:
               --title="${tag#v}" \
               --generate-notes \
               install.yaml
+
+  helm-chart:
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      packages: write
+
+    steps:
+      - name: Checkout
+        uses: 'actions/checkout@v6'
+        with:
+          fetch-depth: 0
+
+      - name: Install Helm
+        uses: azure/setup-helm@v4
+        with:
+          version: 'v3.14.0'
+
+      - name: Determine Chart Version
+        id: version
+        run: |
+          if [[ "${{ github.ref }}" == refs/tags/v* ]]; then
+            # For tags: use semver (e.g., v1.2.3 → 1.2.3)
+            VERSION="${{ github.ref_name }}"
+            VERSION="${VERSION#v}"  # Remove 'v' prefix
+          else
+            # For branches/PRs: use pre-release version (e.g., 0.0.0-main-abc1234)
+            GIT_BRANCH="${{ github.ref_name }}"
+            GIT_SHA="${{ github.sha }}"
+            # Replace slashes with dashes for branch names (e.g., feature/foo → feature-foo)
+            GIT_BRANCH="${GIT_BRANCH//\//-}"
+            VERSION="0.0.0-${GIT_BRANCH}-${GIT_SHA:0:7}"
+          fi
+          echo "version=$VERSION" >> $GITHUB_OUTPUT
+          echo "Chart Version: $VERSION"
+
+      - name: Update Chart Version
+        run: |
+          # Update Chart.yaml with dynamic version
+          sed -i "s|^version:.*|version: ${{ steps.version.outputs.version }}|" chart/Chart.yaml
+          sed -i "s|^appVersion:.*|appVersion: ${{ steps.version.outputs.version }}|" chart/Chart.yaml
+
+      - name: Update Image Tags for Release
+        run: |
+          # Update image tags in values.yaml to use the versioned images
+          # This assumes Docker images were built with the same git tag
+          sed -i "s|tag: latest|tag: ${{ steps.version.outputs.version }}|g" chart/values.yaml
+
+      - name: Package Helm Chart
+        run: |
+          helm package chart/ --destination .
+
+      - name: Log into Docker Registry
+        uses: docker/login-action@v3
+        with:
+          registry: ghcr.io
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Push Helm Chart to OCI Registry
+        if: startsWith(github.ref, 'refs/tags/v')
+        run: |
+          helm push *.tgz oci://ghcr.io/${{ github.repository_owner }}/charts
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index b85083d..56ac341 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -5,6 +5,7 @@ repos:
     hooks:
       - id: check-yaml
         args: [--allow-multiple-documents]
+        exclude: ^chart/templates/
       - id: check-ast
       - id: check-case-conflict
       - id: check-json
@@ -17,13 +18,6 @@ repos:
       - id: requirements-txt-fixer
       - id: mixed-line-ending
 
-  - repo: https://github.com/astral-sh/uv-pre-commit
-    # uv version.
-    rev: 0.9.11
-    hooks:
-      # Update the uv lockfile
-      - id: uv-lock
-
   - repo: local
     hooks:
       - id: python-lint
@@ -32,9 +26,17 @@ repos:
         entry: "uv run poe lint"
         types_or: [ python ]
         pass_filenames: false
+
       - id: python-ruff
         name: python-ruff
         language: system
         entry: "uv run poe ruff"
         types_or: [ python ]
         pass_filenames: false
+
+      - id: helm-lint
+        name: Helm Lint
+        entry: helm lint chart/
+        language: system
+        pass_filenames: false
+        files: ^chart/
diff --git a/Tiltfile b/Tiltfile
index d67eba0..990a212 100644
--- a/Tiltfile
+++ b/Tiltfile
@@ -38,11 +38,22 @@ helm_resource(
     'testkube',
     'oci://docker.io/kubeshop/testkube',
     namespace='testkube',
-    flags=['--version=2.5.3', '--create-namespace', '--values=deploy/local/testkube/values.yaml', '--wait',
+    flags=['--version=2.5.3', '--values=deploy/local/testkube/values.yaml', '--wait',
     '--wait-for-jobs', '--timeout=10m'],
 )
 
-# Apply Kubernetes manifests
+# Deploy testbench Helm chart
+k8s_yaml(helm(
+    'chart',
+    name='testbench',
+    namespace='testkube',
+    values=['chart/values.yaml'],
+    set=[
+        'image.repository=testworkflows',
+    ],
+))
+
+# Apply local development manifests
 k8s_yaml(kustomize('deploy/local'))
 
 k8s_resource('ai-gateway-litellm', port_forwards=['11001:4000'])
diff --git a/chart/.helmignore b/chart/.helmignore
new file mode 100644
index 0000000..0e8a0eb
--- /dev/null
+++ b/chart/.helmignore
@@ -0,0 +1,23 @@
+# Patterns to ignore when building packages.
+# This supports shell glob matching, relative path matching, and
+# negation (prefixed with !). Only one pattern per line.
+.DS_Store
+# Common VCS dirs
+.git/
+.gitignore
+.bzr/
+.bzrignore
+.hg/
+.hgignore
+.svn/
+# Common backup files
+*.swp
+*.bak
+*.tmp
+*.orig
+*~
+# Various IDEs
+.project
+.idea/
+*.tmproj
+.vscode/
diff --git a/chart/Chart.yaml b/chart/Chart.yaml
new file mode 100644
index 0000000..5d62f56
--- /dev/null
+++ b/chart/Chart.yaml
@@ -0,0 +1,18 @@
+apiVersion: v2
+name: testbench
+description: A Helm chart for RAGAS-based agent evaluation via Testkube workflows
+type: application
+version: 0.1.0
+appVersion: "0.1.0"
+keywords:
+  - ragas
+  - testkube
+  - evaluation
+  - testing
+  - a2a
+maintainers:
+  - name: Agentic Layer Team
+    url: https://docs.agentic-layer.ai/
+home: https://github.com/agentic-layer/testbench
+sources:
+  - https://github.com/agentic-layer/testbench
diff --git a/chart/dashboards/evaluation-dashboard.json b/chart/dashboards/evaluation-dashboard.json
new file mode 100644
index 0000000..46b95a7
--- /dev/null
+++ b/chart/dashboards/evaluation-dashboard.json
@@ -0,0 +1,1033 @@
+{
+  "annotations": {
+    "list": [
+      {
+        "builtIn": 1,
+        "datasource": {
+          "type": "grafana",
+          "uid": "-- Grafana --"
+        },
+        "enable": true,
+        "hide": true,
+        "iconColor": "rgba(0, 211, 255, 1)",
+        "name": "Annotations & Alerts",
+        "type": "dashboard"
+      }
+    ]
+  },
+  "editable": true,
+  "fiscalYearStartMonth": 0,
+  "graphTooltip": 0,
+  "id": 5,
+  "links": [],
+  "panels": [
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "prometheus"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "barWidthFactor": 0.6,
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "insertNulls": false,
+            "lineInterpolation": "linear",
+            "lineStyle": {
+              "fill": "solid"
+            },
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "auto",
+            "showValues": false,
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": 0
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          }
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 0,
+        "y": 0
+      },
+      "id": 15,
+      "options": {
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "hideZeros": false,
+          "mode": "single",
+          "sort": "none"
+        }
+      },
+      "pluginVersion": "12.3.1",
+      "targets": [
+        {
+          "editorMode": "builder",
+          "expr": "max by(type) (avg by(execution_number, type) (testbench_evaluation_token_usage{workflow_name=\"$workflow\"}))",
+          "legendFormat": "__auto",
+          "range": true,
+          "refId": "A"
+        }
+      ],
+      "title": "Token Usage Over Time",
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "prometheus"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "fillOpacity": 80,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "lineWidth": 1,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "decimals": 2,
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": 0
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          }
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 0,
+        "y": 8
+      },
+      "id": 16,
+      "options": {
+        "barRadius": 0,
+        "barWidth": 0.97,
+        "fullHighlight": false,
+        "groupWidth": 0.7,
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "orientation": "auto",
+        "showValue": "auto",
+        "stacking": "none",
+        "tooltip": {
+          "hideZeros": false,
+          "mode": "single",
+          "sort": "none"
+        },
+        "xTickLabelRotation": 0,
+        "xTickLabelSpacing": 0
+      },
+      "pluginVersion": "12.3.1",
+      "targets": [
+        {
+          "editorMode": "builder",
+          "expr": "avg by(execution_number) (testbench_evaluation_cost{workflow_name=\"$workflow\"})",
+          "legendFormat": "__auto",
+          "range": true,
+          "refId": "A"
+        }
+      ],
+      "title": "Cost Per Test (USD)",
+      "transformations": [
+        {
+          "id": "labelsToFields",
+          "options": {}
+        },
+        {
+          "id": "groupBy",
+          "options": {
+            "fields": {
+              "Value": {
+                "aggregations": [
+                  "lastNotNull"
+                ],
+                "operation": "aggregate"
+              },
+              "execution_number": {
+                "aggregations": [],
+                "operation": "groupby"
+              }
+            }
+          }
+        },
+        {
+          "id": "merge",
+          "options": {}
+        }
+      ],
+      "type": "barchart"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "prometheus"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "series",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "fillOpacity": 80,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "lineWidth": 1,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "fieldMinMax": false,
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": 0
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          }
+        },
+        "overrides": [
+          {
+            "__systemRef": "hideSeriesFrom",
+            "matcher": {
+              "id": "byNames",
+              "options": {
+                "mode": "exclude",
+                "names": [
+                  "Value (lastNotNull) input_tokens"
+                ],
+                "prefix": "All except:",
+                "readOnly": true
+              }
+            },
+            "properties": [
+              {
+                "id": "custom.hideFrom",
+                "value": {
+                  "legend": false,
+                  "tooltip": true,
+                  "viz": true
+                }
+              }
+            ]
+          }
+        ]
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 0,
+        "y": 16
+      },
+      "id": 18,
+      "options": {
+        "barRadius": 0,
+        "barWidth": 0.97,
+        "colorByField": "execution_number",
+        "fullHighlight": false,
+        "groupWidth": 0.7,
+        "legend": {
+          "calcs": [],
+          "displayMode": "table",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "orientation": "auto",
+        "showValue": "auto",
+        "stacking": "none",
+        "tooltip": {
+          "hideZeros": false,
+          "mode": "single",
+          "sort": "none"
+        },
+        "xTickLabelRotation": 0,
+        "xTickLabelSpacing": 0
+      },
+      "pluginVersion": "12.3.1",
+      "targets": [
+        {
+          "editorMode": "builder",
+          "expr": "avg by(execution_number, type) (testbench_evaluation_token_usage{workflow_name=\"$workflow\", type=\"input_tokens\"})",
+          "legendFormat": "__auto",
+          "range": true,
+          "refId": "A"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "prometheus"
+          },
+          "editorMode": "builder",
+          "expr": "avg by(execution_number, type) (testbench_evaluation_token_usage{workflow_name=\"$workflow\", type=\"output_tokens\"})",
+          "hide": false,
+          "legendFormat": "__auto",
+          "range": true,
+          "refId": "B"
+        }
+      ],
+      "title": "Tokens per Test",
+      "transformations": [
+        {
+          "id": "labelsToFields",
+          "options": {}
+        },
+        {
+          "id": "merge",
+          "options": {}
+        },
+        {
+          "id": "sortBy",
+          "options": {
+            "fields": {},
+            "sort": [
+              {
+                "field": "execution_number"
+              }
+            ]
+          }
+        },
+        {
+          "id": "groupBy",
+          "options": {
+            "fields": {
+              "Value": {
+                "aggregations": [
+                  "lastNotNull"
+                ],
+                "operation": "aggregate"
+              },
+              "execution_number": {
+                "aggregations": [],
+                "operation": "groupby"
+              },
+              "type": {
+                "aggregations": [],
+                "operation": "groupby"
+              }
+            }
+          }
+        },
+        {
+          "id": "groupingToMatrix",
+          "options": {
+            "columnField": "type",
+            "rowField": "execution_number",
+            "valueField": "Value (lastNotNull)"
+          }
+        }
+      ],
+      "type": "barchart"
+    },
+    {
+      "collapsed": false,
+      "gridPos": {
+        "h": 1,
+        "w": 24,
+        "x": 0,
+        "y": 24
+      },
+      "id": 4,
+      "panels": [],
+      "repeat": "metric",
+      "title": "Evaluation of $metric",
+      "type": "row"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "prometheus"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "barWidthFactor": 0.6,
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "insertNulls": false,
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "auto",
+            "showValues": false,
+            "spanNulls": true,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": 0
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          }
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 0,
+        "y": 25
+      },
+      "id": 11,
+      "options": {
+        "legend": {
+          "calcs": [
+            "last"
+          ],
+          "displayMode": "table",
+          "placement": "bottom",
+          "showLegend": false,
+          "sortBy": "Name",
+          "sortDesc": false
+        },
+        "tooltip": {
+          "hideZeros": false,
+          "mode": "multi",
+          "sort": "none"
+        }
+      },
+      "pluginVersion": "12.3.1",
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "prometheus"
+          },
+          "editorMode": "code",
+          "exemplar": false,
+          "expr": "avg by(name) (last_over_time(testbench_evaluation_metric{name=\"$metric\", workflow_name=\"$workflow\"}[$__interval]))",
+          "hide": false,
+          "instant": false,
+          "legendFormat": "Average",
+          "range": true,
+          "refId": "B"
+        }
+      ],
+      "title": "Average $metric over time",
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "prometheus"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "fillOpacity": 50,
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "pointShape": "square",
+            "pointSize": {
+              "fixed": 7
+            },
+            "pointStrokeWidth": 1,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "show": "points+lines"
+          },
+          "fieldMinMax": false,
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": 0
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          }
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 12,
+        "y": 25
+      },
+      "id": 14,
+      "options": {
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom",
+          "showLegend": false
+        },
+        "mapping": "auto",
+        "series": [
+          {
+            "frame": {
+              "matcher": {
+                "id": "byIndex",
+                "options": 0
+              }
+            },
+            "x": {
+              "matcher": {
+                "id": "byName",
+                "options": "execution_number"
+              }
+            },
+            "y": {
+              "matcher": {
+                "id": "byName",
+                "options": "Value"
+              }
+            }
+          }
+        ],
+        "tooltip": {
+          "hideZeros": false,
+          "mode": "single",
+          "sort": "none"
+        }
+      },
+      "pluginVersion": "12.3.1",
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "prometheus"
+          },
+          "editorMode": "builder",
+          "expr": "avg by(execution_number) (testbench_evaluation_metric{workflow_name=\"$workflow\", name=~\"$metric\"})",
+          "hide": false,
+          "instant": false,
+          "legendFormat": "Average",
+          "range": true,
+          "refId": "A"
+        }
+      ],
+      "title": "Average $metric per test",
+      "transformations": [
+        {
+          "id": "labelsToFields",
+          "options": {
+            "mode": "columns"
+          }
+        },
+        {
+          "id": "convertFieldType",
+          "options": {
+            "conversions": [
+              {
+                "destinationType": "number",
+                "enumConfig": {
+                  "text": [
+                    "62"
+                  ]
+                },
+                "targetField": "execution_number"
+              }
+            ],
+            "fields": {}
+          }
+        },
+        {
+          "id": "merge",
+          "options": {}
+        }
+      ],
+      "type": "xychart"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "prometheus"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "barWidthFactor": 0.6,
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "insertNulls": false,
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "auto",
+            "showValues": false,
+            "spanNulls": true,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": 0
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          }
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 0,
+        "y": 33
+      },
+      "id": 8,
+      "options": {
+        "legend": {
+          "calcs": [
+            "last"
+          ],
+          "displayMode": "table",
+          "placement": "bottom",
+          "showLegend": true,
+          "sortBy": "Name",
+          "sortDesc": true
+        },
+        "tooltip": {
+          "hideZeros": false,
+          "mode": "multi",
+          "sort": "none"
+        }
+      },
+      "pluginVersion": "12.3.1",
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "prometheus"
+          },
+          "editorMode": "code",
+          "expr": "max by(sample_hash, user_input_truncated) (last_over_time(testbench_evaluation_metric{name=\"$metric\", workflow_name=\"$workflow\"}[$__interval]))",
+          "hide": false,
+          "instant": false,
+          "legendFormat": "Question: \"{{user_input_truncated}}\"",
+          "range": true,
+          "refId": "A"
+        }
+      ],
+      "title": "Individual $metric results over time",
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "prometheus"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "fillOpacity": 50,
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "pointShape": "square",
+            "pointSize": {
+              "fixed": 7
+            },
+            "pointStrokeWidth": 1,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "show": "points+lines"
+          },
+          "fieldMinMax": false,
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": 0
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          }
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 12,
+        "y": 33
+      },
+      "id": 13,
+      "options": {
+        "legend": {
+          "calcs": [
+            "last"
+          ],
+          "displayMode": "table",
+          "placement": "bottom",
+          "showLegend": true,
+          "sortBy": "Name",
+          "sortDesc": true
+        },
+        "mapping": "auto",
+        "series": [
+          {
+            "x": {
+              "matcher": {
+                "id": "byName",
+                "options": "execution_number (lastNotNull)"
+              }
+            },
+            "y": {
+              "matcher": {
+                "id": "byName",
+                "options": "Value (lastNotNull)"
+              }
+            }
+          }
+        ],
+        "tooltip": {
+          "hideZeros": false,
+          "mode": "single",
+          "sort": "none"
+        }
+      },
+      "pluginVersion": "12.3.1",
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "prometheus"
+          },
+          "editorMode": "builder",
+          "expr": "avg by(sample_hash, trace_id, user_input_truncated, execution_number) (testbench_evaluation_metric{workflow_name=\"$workflow\", name=~\"$metric\"})",
+          "hide": false,
+          "instant": false,
+          "legendFormat": "Question: \"{{user_input_truncated}}\"",
+          "range": true,
+          "refId": "A"
+        }
+      ],
+      "title": "Individual $metric results per test",
+      "transformations": [
+        {
+          "id": "labelsToFields",
+          "options": {
+            "mode": "columns"
+          }
+        },
+        {
+          "id": "convertFieldType",
+          "options": {
+            "conversions": [
+              {
+                "destinationType": "number",
+                "targetField": "execution_number"
+              }
+            ],
+            "fields": {}
+          }
+        },
+        {
+          "id": "groupBy",
+          "options": {
+            "fields": {
+              "Time": {
+                "aggregations": [
+                  "last"
+                ],
+                "operation": "aggregate"
+              },
+              "Value": {
+                "aggregations": [
+                  "lastNotNull"
+                ],
+                "operation": "aggregate"
+              },
+              "execution_number": {
+                "aggregations": [
+                  "lastNotNull"
+                ],
+                "operation": "aggregate"
+              },
+              "sample_hash": {
+                "aggregations": []
+              },
+              "trace_id": {
+                "aggregations": [
+                  "lastNotNull"
+                ],
+                "operation": "aggregate"
+              },
+              "user_input_truncated": {
+                "aggregations": [],
+                "operation": "groupby"
+              }
+            }
+          }
+        },
+        {
+          "id": "merge",
+          "options": {}
+        },
+        {
+          "id": "seriesToRows",
+          "options": {}
+        },
+        {
+          "id": "partitionByValues",
+          "options": {
+            "fields": [
+              "user_input_truncated"
+            ],
+            "keepFields": false,
+            "naming": {
+              "asLabels": false
+            }
+          }
+        },
+        {
+          "id": "renameByRegex",
+          "options": {
+            "regex": "(.*?) Value \\(lastNotNull\\)",
+            "renamePattern": "$1"
+          }
+        }
+      ],
+      "type": "xychart"
+    }
+  ],
+  "preload": false,
+  "schemaVersion": 42,
+  "tags": [],
+  "templating": {
+    "list": [
+      {
+        "current": {
+          "text": "multi-turn-workflow",
+          "value": "multi-turn-workflow"
+        },
+        "definition": "label_values(workflow_name)",
+        "description": "name of the TestWorkflow CR",
+        "label": "Workflow Name",
+        "name": "workflow",
+        "options": [],
+        "query": {
+          "qryType": 1,
+          "query": "label_values(workflow_name)",
+          "refId": "PrometheusVariableQueryEditor-VariableQuery"
+        },
+        "refresh": 1,
+        "regex": "",
+        "sort": 5,
+        "type": "query"
+      },
+      {
+        "allowCustomValue": false,
+        "current": {
+          "text": "All",
+          "value": "$__all"
+        },
+        "definition": "label_values(testbench_evaluation_metric,name)",
+        "includeAll": true,
+        "name": "metric",
+        "options": [],
+        "query": {
+          "qryType": 1,
+          "query": "label_values(testbench_evaluation_metric,name)",
+          "refId": "PrometheusVariableQueryEditor-VariableQuery"
+        },
+        "refresh": 1,
+        "regex": "",
+        "type": "query"
+      }
+    ]
+  },
+  "time": {
+    "from": "now-15m",
+    "to": "now"
+  },
+  "timepicker": {},
+  "timezone": "browser",
+  "title": "Workflow Evaluations",
+  "uid": "g9lx76",
+  "version": 1
+}
diff --git a/chart/dashboards/testkube-dashboard.json b/chart/dashboards/testkube-dashboard.json
new file mode 100644
index 0000000..85cf3fd
--- /dev/null
+++ b/chart/dashboards/testkube-dashboard.json
@@ -0,0 +1,316 @@
+{
+  "__requires": [
+    {
+      "type": "grafana",
+      "id": "grafana",
+      "name": "Grafana",
+      "version": "8.1.2"
+    },
+    {
+      "type": "datasource",
+      "id": "prometheus",
+      "name": "Prometheus",
+      "version": "1.0.0"
+    },
+    {
+      "type": "panel",
+      "id": "stat",
+      "name": "Stat",
+      "version": ""
+    },
+    {
+      "type": "panel",
+      "id": "timeseries",
+      "name": "Time series",
+      "version": ""
+    }
+  ],
+  "annotations": {
+    "list": [
+      {
+        "builtIn": 1,
+        "datasource": "-- Grafana --",
+        "enable": true,
+        "hide": true,
+        "iconColor": "rgba(0, 211, 255, 1)",
+        "name": "Annotations & Alerts",
+        "target": {
+          "limit": 100,
+          "matchAny": false,
+          "tags": [],
+          "type": "dashboard"
+        },
+        "type": "dashboard"
+      }
+    ]
+  },
+  "editable": true,
+  "gnetId": null,
+  "graphTooltip": 0,
+  "id": null,
+  "links": [],
+  "panels": [
+    {
+      "datasource": {"type": "prometheus"},
+      "description": "Executions distributions",
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": -1,
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "gradientMode": "opacity",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "lineInterpolation": "smooth",
+            "lineWidth": 1,
+            "pointSize": 3,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "always",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          }
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 9,
+        "w": 12,
+        "x": 0,
+        "y": 0
+      },
+      "id": 2,
+      "options": {
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom"
+        },
+        "tooltip": {
+          "mode": "single"
+        }
+      },
+      "targets": [
+        {
+          "exemplar": true,
+          "expr": "testkube_testworkflow_executions_count_total{result=\"passed\"}",
+          "hide": false,
+          "instant": false,
+          "interval": "1",
+          "legendFormat": "{{name}}",
+          "refId": "A"
+        },
+        {
+          "hide": false,
+          "refId": "B"
+        }
+      ],
+      "title": "SUCCESSFUL workflow executions",
+      "transparent": true,
+      "type": "timeseries"
+    },
+    {
+      "datasource": {"type": "prometheus"},
+      "description": "Executions distributions",
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": -1,
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "gradientMode": "opacity",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "lineInterpolation": "smooth",
+            "lineWidth": 1,
+            "pointSize": 3,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "always",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          }
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 9,
+        "w": 12,
+        "x": 12,
+        "y": 0
+      },
+      "id": 4,
+      "options": {
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom"
+        },
+        "tooltip": {
+          "mode": "single"
+        }
+      },
+      "targets": [
+        {
+          "exemplar": true,
+          "expr": "testkube_testworkflow_executions_count_total{result=\"failed\"}",
+          "hide": false,
+          "instant": false,
+          "interval": "1",
+          "legendFormat": "{{name}}",
+          "refId": "A"
+        },
+        {
+          "hide": false,
+          "refId": "B"
+        }
+      ],
+      "title": "FAILED workflow executions",
+      "transparent": true,
+      "type": "timeseries"
+    },
+    {
+      "datasource": {"type": "prometheus"},
+      "description": "Executions distributions",
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "thresholds"
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          }
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 9,
+        "w": 12,
+        "x": 12,
+        "y": 9
+      },
+      "id": 3,
+      "options": {
+        "colorMode": "value",
+        "graphMode": "area",
+        "justifyMode": "auto",
+        "orientation": "auto",
+        "reduceOptions": {
+          "calcs": [
+            "lastNotNull"
+          ],
+          "fields": "",
+          "values": false
+        },
+        "text": {},
+        "textMode": "auto"
+      },
+      "pluginVersion": "8.1.2",
+      "targets": [
+        {
+          "exemplar": true,
+          "expr": "testkube_testworkflow_executions_count_total{}",
+          "format": "table",
+          "hide": false,
+          "instant": false,
+          "interval": "1",
+          "intervalFactor": 10,
+          "legendFormat": "{{type}} created",
+          "refId": "A"
+        },
+        {
+          "hide": false,
+          "refId": "B"
+        }
+      ],
+      "title": "Total workflow executions",
+      "transparent": true,
+      "type": "stat"
+    }
+  ],
+  "schemaVersion": 30,
+  "style": "dark",
+  "tags": [],
+  "templating": {
+    "list": []
+  },
+  "time": {
+    "from": "now-1h",
+    "to": "now"
+  },
+  "timepicker": {},
+  "timezone": "",
+  "title": "Testkube",
+  "uid": "jMtrP3nnz",
+  "version": 2
+}
diff --git a/chart/templates/NOTES.txt b/chart/templates/NOTES.txt
new file mode 100644
index 0000000..fc8e018
--- /dev/null
+++ b/chart/templates/NOTES.txt
@@ -0,0 +1,31 @@
+Thank you for installing {{ .Chart.Name }}!
+
+Your release is named: {{ .Release.Name }}
+Installed in namespace: {{ include "testbench.namespace" . }}
+
+TestWorkflow Templates have been created:
+  - ragas-setup-template
+  - ragas-run-template
+  - ragas-evaluate-template
+  - ragas-publish-template
+  - ragas-visualize-template
+
+To view the templates:
+  kubectl get testworkflowtemplates -n {{ include "testbench.namespace" . }}
+
+To run a complete evaluation workflow:
+  kubectl testkube run testworkflow <your-workflow-name> \
+    --config bucket="your-bucket" \
+    --config key="your-dataset.csv" \
+    --config agentUrl="http://agent-service:8000" \
+    --config model="gemini-2.5-flash-lite" \
+    -n {{ include "testbench.namespace" . }}
+
+To watch workflow execution:
+  kubectl testkube watch testworkflow <your-workflow-name> -n {{ include "testbench.namespace" . }}
+
+Grafana dashboards have been deployed to: {{ .Values.grafana.dashboardNamespace }}
+  ConfigMap: {{ .Values.grafana.dashboardConfigMapName }}
+
+For more information, visit:
+  {{ .Chart.Home }}
diff --git a/chart/templates/_helpers.tpl b/chart/templates/_helpers.tpl
new file mode 100644
index 0000000..b0554ce
--- /dev/null
+++ b/chart/templates/_helpers.tpl
@@ -0,0 +1,72 @@
+{{/*
+Expand the name of the chart.
+*/}}
+{{- define "testbench.name" -}}
+{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Create a default fully qualified app name.
+*/}}
+{{- define "testbench.fullname" -}}
+{{- if .Values.fullnameOverride }}
+{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- $name := default .Chart.Name .Values.nameOverride }}
+{{- if contains $name .Release.Name }}
+{{- .Release.Name | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
+{{- end }}
+{{- end }}
+{{- end }}
+
+{{/*
+Create chart name and version as used by the chart label.
+*/}}
+{{- define "testbench.chart" -}}
+{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Common labels
+*/}}
+{{- define "testbench.labels" -}}
+helm.sh/chart: {{ include "testbench.chart" . }}
+{{ include "testbench.selectorLabels" . }}
+{{- if .Chart.AppVersion }}
+app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
+{{- end }}
+app.kubernetes.io/managed-by: {{ .Release.Service }}
+{{- end }}
+
+{{/*
+Selector labels
+*/}}
+{{- define "testbench.selectorLabels" -}}
+app.kubernetes.io/name: {{ include "testbench.name" . }}
+app.kubernetes.io/instance: {{ .Release.Name }}
+{{- end }}
+
+{{/*
+Testkube workflow labels
+*/}}
+{{- define "testbench.workflowLabels" -}}
+testkube.io/test-category: ragas-evaluation
+app: testworkflows
+{{- end }}
+
+{{/*
+Namespace name
+*/}}
+{{- define "testbench.namespace" -}}
+{{- .Values.namespace.name }}
+{{- end }}
+
+{{/*
+Image name
+*/}}
+{{- define "testbench.image" -}}
+{{- $tag := .Values.image.tag | default .Chart.AppVersion }}
+{{- printf "%s:%s" .Values.image.repository $tag }}
+{{- end }}
diff --git a/chart/templates/evaluate-template.yaml b/chart/templates/evaluate-template.yaml
new file mode 100644
index 0000000..4ff1f57
--- /dev/null
+++ b/chart/templates/evaluate-template.yaml
@@ -0,0 +1,42 @@
+apiVersion: testworkflows.testkube.io/v1
+kind: TestWorkflowTemplate
+metadata:
+  name: ragas-evaluate-template
+  namespace: {{ include "testbench.namespace" . }}
+  labels:
+    {{- include "testbench.labels" . | nindent 4 }}
+    {{- include "testbench.workflowLabels" . | nindent 4 }}
+spec:
+  # Configuration parameters that can be overridden
+  config:
+    model:
+      type: string
+      description: "Model name to use for evaluation (e.g., gemini-2.5-flash-lite)"
+    openApiBasePath:
+      type: string
+      description: "Base path for OpenAI API"
+      default: "http://ai-gateway-litellm.ai-gateway:4000"
+
+  # Steps to execute
+  steps:
+    - name: evaluate-results
+      artifacts:
+        paths:
+          - "data/results/evaluation_scores.json"
+      run:
+        command:
+          - sh
+          - -c
+        args:
+          - |
+            uv run python3 evaluate.py "{{`{{ config.model }}`}}" --metrics-config "/app/config/metrics.yaml" && \
+            if [ -f data/results/evaluation_scores.json ]; then
+              echo "✓ Evaluation completed"
+              cat data/results/evaluation_scores.json
+            else
+              echo "✗ Error: Results file not created"
+              exit 1
+            fi
+        env:
+          - name: OPENAI_API_BASE
+            value: "{{`{{ config.openApiBasePath }}`}}"
diff --git a/chart/templates/grafana-dashboards.yaml b/chart/templates/grafana-dashboards.yaml
new file mode 100644
index 0000000..f4fb221
--- /dev/null
+++ b/chart/templates/grafana-dashboards.yaml
@@ -0,0 +1,13 @@
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: {{ .Values.grafana.dashboardConfigMapName }}
+  namespace: {{ .Values.grafana.dashboardNamespace }}
+  labels:
+    {{- include "testbench.labels" . | nindent 4 }}
+    grafana_dashboard: "1"
+data:
+  evaluation-dashboard.json: |
+{{ .Files.Get "dashboards/evaluation-dashboard.json" | indent 4 }}
+  testkube-dashboard.json: |
+{{ .Files.Get "dashboards/testkube-dashboard.json" | indent 4 }}
diff --git a/chart/templates/namespace.yaml b/chart/templates/namespace.yaml
new file mode 100644
index 0000000..a39e415
--- /dev/null
+++ b/chart/templates/namespace.yaml
@@ -0,0 +1,8 @@
+{{- if .Values.namespace.create }}
+apiVersion: v1
+kind: Namespace
+metadata:
+  name: {{ include "testbench.namespace" . }}
+  labels:
+    {{- include "testbench.labels" . | nindent 4 }}
+{{- end }}
diff --git a/chart/templates/publish-template.yaml b/chart/templates/publish-template.yaml
new file mode 100644
index 0000000..40b6a26
--- /dev/null
+++ b/chart/templates/publish-template.yaml
@@ -0,0 +1,29 @@
+apiVersion: testworkflows.testkube.io/v1
+kind: TestWorkflowTemplate
+metadata:
+  name: ragas-publish-template
+  namespace: {{ include "testbench.namespace" . }}
+  labels:
+    {{- include "testbench.labels" . | nindent 4 }}
+    {{- include "testbench.workflowLabels" . | nindent 4 }}
+
+spec:
+
+  config:
+    otelExporterOtlpEndpoint:
+      type: string
+      description: "OTel Collector endpoint for exporting traces and metrics"
+      default: "http://lgtm.monitoring:4318"
+
+  # Steps to execute
+  steps:
+    - name: publish-metrics
+      run:
+        args:
+          - publish.py
+          - "{{`{{ workflow.name }}`}}"
+          - "{{`{{ execution.id }}`}}"
+          - "{{`{{ execution.number }}`}}"
+        env:
+          - name: OTEL_EXPORTER_OTLP_ENDPOINT
+            value: "{{`{{ config.otelExporterOtlpEndpoint }}`}}"
diff --git a/chart/templates/run-template.yaml b/chart/templates/run-template.yaml
new file mode 100644
index 0000000..728fea3
--- /dev/null
+++ b/chart/templates/run-template.yaml
@@ -0,0 +1,33 @@
+apiVersion: testworkflows.testkube.io/v1
+kind: TestWorkflowTemplate
+metadata:
+  name: ragas-run-template
+  namespace: {{ include "testbench.namespace" . }}
+  labels:
+    {{- include "testbench.labels" . | nindent 4 }}
+    {{- include "testbench.workflowLabels" . | nindent 4 }}
+
+spec:
+  # Configuration parameters that can be overridden
+  config:
+    agentUrl:
+      type: string
+      description: "URL to the agent endpoint (A2A protocol)"
+    otelExporterOtlpEndpoint:
+      type: string
+      description: "OTel Collector endpoint for exporting traces and metrics"
+      default: "http://lgtm.monitoring:4318"
+
+  # Steps to execute
+  steps:
+    - name: run
+      artifacts:
+        paths:
+          - "data/experiments/ragas_experiment.jsonl"
+      run:
+        args:
+          - run.py
+          - "{{`{{ config.agentUrl }}`}}"
+        env:
+          - name: OTEL_EXPORTER_OTLP_ENDPOINT
+            value: "{{`{{ config.otelExporterOtlpEndpoint }}`}}"
diff --git a/chart/templates/setup-template.yaml b/chart/templates/setup-template.yaml
new file mode 100644
index 0000000..6af624b
--- /dev/null
+++ b/chart/templates/setup-template.yaml
@@ -0,0 +1,30 @@
+apiVersion: testworkflows.testkube.io/v1
+kind: TestWorkflowTemplate
+metadata:
+  name: ragas-setup-template
+  namespace: {{ include "testbench.namespace" . }}
+  labels:
+    {{- include "testbench.labels" . | nindent 4 }}
+    {{- include "testbench.workflowLabels" . | nindent 4 }}
+
+spec:
+  # Configuration parameters that can be overridden
+  config:
+    bucket:
+      type: string
+      description: "S3/MinIO bucket name containing the dataset"
+    key:
+      type: string
+      description: "S3/MinIO object key (path to dataset file in .csv / .json / .parquet format)"
+
+  # Steps to execute
+  steps:
+    - name: setup
+      artifacts:
+        paths:
+          - "data/datasets/ragas_dataset.jsonl"
+      run:
+        args:
+          - setup.py
+          - "{{`{{ config.bucket }}`}}"
+          - "{{`{{ config.key }}`}}"
diff --git a/chart/templates/visualize-template.yaml b/chart/templates/visualize-template.yaml
new file mode 100644
index 0000000..0b0140a
--- /dev/null
+++ b/chart/templates/visualize-template.yaml
@@ -0,0 +1,22 @@
+apiVersion: testworkflows.testkube.io/v1
+kind: TestWorkflowTemplate
+metadata:
+  name: ragas-visualize-template
+  namespace: {{ include "testbench.namespace" . }}
+  labels:
+    {{- include "testbench.labels" . | nindent 4 }}
+    {{- include "testbench.workflowLabels" . | nindent 4 }}
+
+spec:
+  # Steps to execute
+  steps:
+    - name: visualize-metrics
+      artifacts:
+        paths:
+          - "data/results/evaluation_report.html"
+      run:
+        args:
+          - visualize.py
+          - "{{`{{ workflow.name }}`}}"
+          - "{{`{{ execution.id }}`}}"
+          - "{{`{{ execution.number }}`}}"
diff --git a/chart/values.yaml b/chart/values.yaml
new file mode 100644
index 0000000..2f97780
--- /dev/null
+++ b/chart/values.yaml
@@ -0,0 +1,20 @@
+# Default values for testbench
+
+namespace:
+  # Create namespace if it doesn't exist
+  create: true
+  # Namespace name
+  name: testkube
+
+image:
+  repository: ghcr.io/agentic-layer/testbench/testworkflows
+  # Overrides the image tag whose default is the chart appVersion
+  tag: ""
+  pullPolicy: IfNotPresent
+
+# Grafana dashboard configuration
+grafana:
+  # ConfigMap name for dashboards
+  dashboardConfigMapName: grafana-testkube-dashboard
+  # Target namespace for Grafana dashboards
+  dashboardNamespace: monitoring
diff --git a/deploy/local/kustomization.yaml b/deploy/local/kustomization.yaml
index f58bdc0..a83a2bc 100644
--- a/deploy/local/kustomization.yaml
+++ b/deploy/local/kustomization.yaml
@@ -4,7 +4,6 @@ resources:
   - lgtm/
   - weather-agent.yaml
   - dataset.yaml
-  - ../base
   - multi-turn-metrics-configmap.yaml
   - multi-turn-workflow.yaml
-  - multi-turn-workflow-trigger.yaml
\ No newline at end of file
+  - multi-turn-workflow-trigger.yaml