From 9e59779bf017a6a5ccaf551c601a00bbee42ef5e Mon Sep 17 00:00:00 2001 From: Kaosiso Ezealigo Date: Tue, 30 Dec 2025 16:01:34 +0100 Subject: [PATCH 1/7] refactor: replace icon imports with specific icon components in EvaluationRunsCreateButton and InfiniteVirtualTableFeatureShell --- .../components/EvaluationRunsCreateButton.tsx | 122 +++++++++--------- .../InfiniteVirtualTableFeatureShell.tsx | 6 +- 2 files changed, 62 insertions(+), 66 deletions(-) diff --git a/web/oss/src/components/EvaluationRunsTablePOC/components/EvaluationRunsCreateButton.tsx b/web/oss/src/components/EvaluationRunsTablePOC/components/EvaluationRunsCreateButton.tsx index ae14c4506..f6264e0a3 100644 --- a/web/oss/src/components/EvaluationRunsTablePOC/components/EvaluationRunsCreateButton.tsx +++ b/web/oss/src/components/EvaluationRunsTablePOC/components/EvaluationRunsCreateButton.tsx @@ -1,7 +1,7 @@ import {useCallback, useEffect, useMemo} from "react" -import {CaretDown, Check, Plus} from "@phosphor-icons/react" -import {Button, Dropdown, Space, Tooltip, type MenuProps} from "antd" +import {PlusIcon} from "@phosphor-icons/react" +import {Button, Dropdown, Tooltip, type MenuProps} from "antd" import {useAtom, useAtomValue} from "jotai" import { @@ -37,13 +37,17 @@ const createTypeCopy: Record< }, } -const isSupportedCreateType = (value: string): value is SupportedCreateType => - SUPPORTED_CREATE_TYPES.includes(value as SupportedCreateType) +const isSupportedCreateType = (value: unknown): value is SupportedCreateType => { + return typeof value === "string" && (SUPPORTED_CREATE_TYPES as string[]).includes(value) +} + +const FALLBACK_CREATE_TYPE: SupportedCreateType = "auto" const EvaluationRunsCreateButton = () => { const {createEnabled, createTooltip, evaluationKind, defaultCreateType, scope} = useAtomValue( evaluationRunsTableHeaderStateAtom, ) + const isAllTab = evaluationKind === "all" const isAppScoped = scope === "app" const [createOpen, setCreateOpen] = useAtom(evaluationRunsCreateModalOpenAtom) const [selectedCreateType, setSelectedCreateType] = useAtom( @@ -52,40 +56,50 @@ const EvaluationRunsCreateButton = () => { const [createTypePreference, setCreateTypePreference] = useAtom( evaluationRunsCreateTypePreferenceAtom, ) - const isAllTab = evaluationKind === "all" + + const availableTypes = useMemo(() => { + if (!isAllTab) return [] + if (isAppScoped) return SUPPORTED_CREATE_TYPES.filter((t) => t !== "online") + return SUPPORTED_CREATE_TYPES + }, [isAllTab, isAppScoped]) + + const normalizeAllTabType = useCallback( + (value: unknown): SupportedCreateType => { + const candidate = isSupportedCreateType(value) ? value : FALLBACK_CREATE_TYPE + return availableTypes.includes(candidate) + ? candidate + : (availableTypes[0] ?? FALLBACK_CREATE_TYPE) + }, + [availableTypes], + ) useEffect(() => { - if (!createEnabled && createOpen) { - setCreateOpen(false) - } + if (!createEnabled && createOpen) setCreateOpen(false) }, [createEnabled, createOpen, setCreateOpen]) useEffect(() => { - if (!isAllTab && defaultCreateType && selectedCreateType !== defaultCreateType) { - setSelectedCreateType(defaultCreateType) - } + if (isAllTab) return + if (!defaultCreateType) return + if (selectedCreateType !== defaultCreateType) setSelectedCreateType(defaultCreateType) }, [defaultCreateType, isAllTab, selectedCreateType, setSelectedCreateType]) useEffect(() => { if (!isAllTab) return - const normalizedPreference = isSupportedCreateType(createTypePreference) - ? createTypePreference - : "auto" - if (!isSupportedCreateType(createTypePreference)) { - setCreateTypePreference(normalizedPreference) - } - if (selectedCreateType !== normalizedPreference) { - setSelectedCreateType(normalizedPreference) - } + + const normalized = normalizeAllTabType(createTypePreference) + + if (createTypePreference !== normalized) setCreateTypePreference(normalized) + if (selectedCreateType !== normalized) setSelectedCreateType(normalized) }, [ - createTypePreference, isAllTab, + createTypePreference, selectedCreateType, setCreateTypePreference, setSelectedCreateType, + normalizeAllTabType, ]) - const handlePrimaryClick = useCallback(() => { + const openCreateModal = useCallback(() => { if (!createEnabled) return setCreateOpen(true) }, [createEnabled, setCreateOpen]) @@ -93,74 +107,56 @@ const EvaluationRunsCreateButton = () => { const handleMenuClick = useCallback>( ({key}) => { if (!isSupportedCreateType(key)) return - setSelectedCreateType(key) - setCreateTypePreference(key) - if (!createEnabled) return - setCreateOpen(true) + + const normalized = normalizeAllTabType(key) + + setSelectedCreateType(normalized) + setCreateTypePreference(normalized) + openCreateModal() }, - [createEnabled, setCreateOpen, setCreateTypePreference, setSelectedCreateType], + [normalizeAllTabType, openCreateModal, setCreateTypePreference, setSelectedCreateType], ) - const dropdownMenuItems = useMemo(() => { + const menuItems = useMemo(() => { if (!isAllTab) return [] - // Filter out "online" (Live Evaluation) in app-scoped views - const availableTypes = isAppScoped - ? SUPPORTED_CREATE_TYPES.filter((type) => type !== "online") - : SUPPORTED_CREATE_TYPES + return availableTypes.map((type) => { const copy = createTypeCopy[type] - const isActive = selectedCreateType === type return { key: type, label: ( -
-
- {isActive ? : null} -
-
- {copy.title} - {copy.description} -
+
+ {copy.title} + {copy.description}
), } }) - }, [isAllTab, isAppScoped, selectedCreateType]) - - const buttonLabel = useMemo(() => { - if (!isAllTab) return "New Evaluation" - const shortLabel = isSupportedCreateType(selectedCreateType) - ? createTypeCopy[selectedCreateType]?.short - : null - return shortLabel ? `New ${shortLabel} Evaluation` : "New Evaluation" - }, [isAllTab, selectedCreateType]) + }, [availableTypes, isAllTab]) return (
{isAllTab ? ( - + - - diff --git a/web/oss/src/components/InfiniteVirtualTable/features/InfiniteVirtualTableFeatureShell.tsx b/web/oss/src/components/InfiniteVirtualTable/features/InfiniteVirtualTableFeatureShell.tsx index 8d7934de0..ccf275af3 100644 --- a/web/oss/src/components/InfiniteVirtualTable/features/InfiniteVirtualTableFeatureShell.tsx +++ b/web/oss/src/components/InfiniteVirtualTable/features/InfiniteVirtualTableFeatureShell.tsx @@ -1,7 +1,7 @@ import type {CSSProperties, Key, ReactNode} from "react" import {useCallback, useEffect, useMemo, useState} from "react" -import {Trash} from "@phosphor-icons/react" +import {TrashIcon} from "@phosphor-icons/react" import {Button, Grid, Tabs, Tooltip} from "antd" import type {MenuProps} from "antd" import clsx from "clsx" @@ -358,7 +358,7 @@ function InfiniteVirtualTableFeatureShellBase( ) From 23b05c02434ad72585f3f6b538cc59f0416bb93f Mon Sep 17 00:00:00 2001 From: Kaosiso Ezealigo Date: Wed, 31 Dec 2025 15:15:28 +0100 Subject: [PATCH 2/7] refactor: improve layout and styling in evaluation result components --- .../components/CompareRunsMenu.tsx | 46 ++++++++----------- .../EvalRunDetails/components/Page.tsx | 5 +- .../components/views/OverviewView.tsx | 4 +- .../components/BaseRunMetricsSection.tsx | 15 ++---- .../src/components/PageLayout/PageLayout.tsx | 4 +- web/oss/src/styles/evaluations.css | 6 ++- 6 files changed, 34 insertions(+), 46 deletions(-) diff --git a/web/oss/src/components/EvalRunDetails/components/CompareRunsMenu.tsx b/web/oss/src/components/EvalRunDetails/components/CompareRunsMenu.tsx index 21759f163..2c1d2ed89 100644 --- a/web/oss/src/components/EvalRunDetails/components/CompareRunsMenu.tsx +++ b/web/oss/src/components/EvalRunDetails/components/CompareRunsMenu.tsx @@ -259,7 +259,7 @@ const CompareRunsPopoverContent = memo(({runId, availability}: CompareRunsPopove return ( -
+
{availability.testsetIds.length ? ( @@ -280,31 +280,21 @@ const CompareRunsPopoverContent = memo(({runId, availability}: CompareRunsPopove ) : null} -
- -
- - Selected {compareIds.length}/{MAX_COMPARISON_RUNS} - -
- {selectedDetails.map((run) => ( - { - event.preventDefault() - handleRemove(run.id) - }} + + + Selected {compareIds.length}/{MAX_COMPARISON_RUNS} + + {compareIds.length ? ( +
- {compareIds.length ? ( - - ) : null} + Clear all + + ) : null} +
{item.status ? : null} {createdLabel ? ( @@ -457,8 +447,8 @@ const TestsetReferenceTag = ({ label={label} copyValue={copyValue} href={href} - tone="testset" className="max-w-[200px]" + showIcon={false} /> ) diff --git a/web/oss/src/components/EvalRunDetails/components/Page.tsx b/web/oss/src/components/EvalRunDetails/components/Page.tsx index 2693c8cb9..4704e2b56 100644 --- a/web/oss/src/components/EvalRunDetails/components/Page.tsx +++ b/web/oss/src/components/EvalRunDetails/components/Page.tsx @@ -130,6 +130,7 @@ const EvalRunPreviewPage = ({runId, evaluationType, projectId = null}: EvalRunPr return ( setActiveViewParam(v)} /> } - headerClassName="px-2" + headerClassName="px-4" > -
+
{ const comparisonRunIds = useMemo(() => runIds.slice(1), [runIds]) return ( -
+
-
+
{baseRunId ? ( - {runDisplayName} -
- } - > -
-
{renderContent()}
-
- +
+
{renderContent()}
+
) } diff --git a/web/oss/src/components/PageLayout/PageLayout.tsx b/web/oss/src/components/PageLayout/PageLayout.tsx index dfca3c961..f3437c330 100644 --- a/web/oss/src/components/PageLayout/PageLayout.tsx +++ b/web/oss/src/components/PageLayout/PageLayout.tsx @@ -5,6 +5,7 @@ import classNames from "classnames" interface PageLayoutProps { title?: ReactNode + titleLevel?: 1 | 2 | 3 | 4 | 5 headerTabs?: ReactNode headerTabsProps?: TabsProps children: ReactNode @@ -14,6 +15,7 @@ interface PageLayoutProps { const PageLayout = ({ title, + titleLevel = 5, headerTabs, headerTabsProps, children, @@ -35,7 +37,7 @@ const PageLayout = ({ headerClassName, )} > - + {title} {headerTabsContent ? ( diff --git a/web/oss/src/styles/evaluations.css b/web/oss/src/styles/evaluations.css index 8df14e725..44a4ef786 100644 --- a/web/oss/src/styles/evaluations.css +++ b/web/oss/src/styles/evaluations.css @@ -139,7 +139,6 @@ .metadata-summary-table .ant-table, .metadata-summary-table .ant-table-container, .metadata-summary-table .ant-table-content { - border: none !important; box-shadow: none !important; } @@ -161,6 +160,11 @@ border-bottom: none; } +.metadata-summary-table .ant-tag { + margin-inline-end: 0; + margin-bottom: 0; +} + .agenta-scenario-table .ant-table-tbody > tr > td { padding: 0 !important; vertical-align: top; From b0e990447faea1c55dfc8a39eb9f85c15d64ffb2 Mon Sep 17 00:00:00 2001 From: Kaosiso Ezealigo Date: Wed, 31 Dec 2025 15:16:16 +0100 Subject: [PATCH 3/7] refactor: enhance layout and add typography to AggregatedOverviewSection and MetadataSummaryTable --- .../components/AggregatedOverviewSection.tsx | 24 ++-- .../components/MetadataSummaryTable.tsx | 118 ++++++++++-------- 2 files changed, 82 insertions(+), 60 deletions(-) diff --git a/web/oss/src/components/EvalRunDetails/components/views/OverviewView/components/AggregatedOverviewSection.tsx b/web/oss/src/components/EvalRunDetails/components/views/OverviewView/components/AggregatedOverviewSection.tsx index 13dd92230..d0e8a8cfa 100644 --- a/web/oss/src/components/EvalRunDetails/components/views/OverviewView/components/AggregatedOverviewSection.tsx +++ b/web/oss/src/components/EvalRunDetails/components/views/OverviewView/components/AggregatedOverviewSection.tsx @@ -1,6 +1,6 @@ import {memo, useMemo} from "react" -import {Card} from "antd" +import {Card, Typography} from "antd" import useURL from "@/oss/hooks/useURL" @@ -19,14 +19,24 @@ const AggregatedOverviewSection = ({runIds}: AggregatedOverviewSectionProps) => } return ( - +
-
-
- +
+
+ + Evaluator Scores Overview + + + Average evaluator score across evaluations +
-
- +
+
+ +
+
+ +
diff --git a/web/oss/src/components/EvalRunDetails/components/views/OverviewView/components/MetadataSummaryTable.tsx b/web/oss/src/components/EvalRunDetails/components/views/OverviewView/components/MetadataSummaryTable.tsx index 0ca7cdf8d..4dd9f9684 100644 --- a/web/oss/src/components/EvalRunDetails/components/views/OverviewView/components/MetadataSummaryTable.tsx +++ b/web/oss/src/components/EvalRunDetails/components/views/OverviewView/components/MetadataSummaryTable.tsx @@ -10,7 +10,6 @@ import useEvaluatorReference from "@/oss/components/References/hooks/useEvaluato import type {BasicStats} from "@/oss/lib/metricUtils" import {useProjectData} from "@/oss/state/project" -import {getComparisonColor} from "../../../../atoms/compare" import {evaluationQueryRevisionAtomFamily} from "../../../../atoms/query" import { runCreatedAtAtomFamily, @@ -206,14 +205,19 @@ const StatusCell = ({runId}: MetadataCellProps) => { } const ApplicationCell = ({runId, projectURL}: MetadataCellProps) => ( -
- +
+
) const LegacyVariantCell = memo(({runId}: MetadataCellProps) => ( -
- +
+
)) @@ -235,8 +239,10 @@ const MetadataRunNameCell = memo( runId ?? "—" const accent = - accentColor ?? - (typeof runData?.accentColor === "string" ? (runData as any).accentColor : null) + accentColor === null + ? null + : accentColor ?? + (typeof runData?.accentColor === "string" ? (runData as any).accentColor : null) return (
@@ -248,7 +254,18 @@ const MetadataRunNameCell = memo( const LegacyTestsetsCell = memo(({runId, projectURL}: MetadataCellProps) => { const testsetAtom = useMemo(() => runTestsetIdsAtomFamily(runId), [runId]) const testsetIds = useAtomValueWithSchedule(testsetAtom, {priority: LOW_PRIORITY}) ?? [] - return + return ( +
+ +
+ ) }) const formatCurrency = (value: number | undefined | null) => { @@ -362,7 +379,14 @@ const InvocationErrorsCell = makeMetricCell("attributes.ag.metrics.errors.cumula }) const METADATA_ROWS: MetadataRowRecord[] = [ - {key: "evaluations", label: "Evaluations", Cell: MetadataRunNameCell}, + { + key: "testsets", + label: "Test set", + Cell: LegacyTestsetsCell, + shouldDisplay: ({snapshots}) => + snapshots.some(({testsetIds}) => (testsetIds?.length ?? 0) > 0), + }, + {key: "evaluation", label: "Evaluation", Cell: MetadataRunNameCell}, {key: "status", label: "Status", Cell: StatusCell}, {key: "created", label: "Created at", Cell: CreatedCell}, {key: "updated", label: "Updated at", Cell: UpdatedCell}, @@ -400,13 +424,6 @@ const METADATA_ROWS: MetadataRowRecord[] = [ ) }), }, - { - key: "testsets", - label: "Test sets", - Cell: LegacyTestsetsCell, - shouldDisplay: ({snapshots}) => - snapshots.some(({testsetIds}) => (testsetIds?.length ?? 0) > 0), - }, // {key: "scenarios", label: "Scenarios evaluated", Cell: ScenarioCountCell}, {key: "invocation_cost", label: "Cost (Total)", Cell: InvocationCostCell}, {key: "invocation_duration", label: "Duration (Total)", Cell: InvocationDurationCell}, @@ -422,7 +439,7 @@ const EvaluatorNameLabel = ({evaluatorId}: {evaluatorId: string}) => { const MetadataSummaryTable = ({runIds, projectURL}: MetadataSummaryTableProps) => { const orderedRunIds = useMemo(() => runIds.filter((id): id is string => Boolean(id)), [runIds]) - const {metricSelections, runColorMap, runDescriptors} = useRunMetricData(orderedRunIds) + const {metricSelections, runDescriptors} = useRunMetricData(orderedRunIds) const runReferenceSnapshotsAtom = useMemo( () => atom((get) => @@ -605,8 +622,6 @@ const MetadataSummaryTable = ({runIds, projectURL}: MetadataSummaryTableProps) = return rows }, [anyHasQuery, evaluatorMetricRows, rowContext]) - const isComparison = orderedRunIds.length > 1 - const columns = useMemo>(() => { const baseColumn = { title: null, @@ -625,47 +640,44 @@ const MetadataSummaryTable = ({runIds, projectURL}: MetadataSummaryTableProps) = key: runId, width: 160, onCell: (record: MetadataRowRecord) => { - if (!isComparison || record.key === "query_config") { - return {} + if (record.key === "testsets") { + return index === 0 ? {colSpan: orderedRunIds.length} : {colSpan: 0} } - const tone = getComparisonColor(index) - return tone ? {style: {backgroundColor: tone}} : {} + return {} + }, + render: (_: unknown, record: MetadataRowRecord) => { + if (record.key === "testsets" && index !== 0) { + return null + } + return ( + + ) }, - render: (_: unknown, record: MetadataRowRecord) => ( - - ), })) return [baseColumn, ...runColumns] - }, [isComparison, orderedRunIds, projectURL, runColorMap, runNameMap]) + }, [orderedRunIds, projectURL, runNameMap]) return ( -
-
- Evaluator Scores Overview - - Average evaluator score across evaluations - -
-
-
- - className="metadata-summary-table" - rowKey="key" - size="small" - pagination={false} - columns={columns} - dataSource={dataSource} - scroll={{x: "max-content"}} - showHeader={false} - /> -
+
+
+ + className="metadata-summary-table" + rowKey="key" + size="small" + pagination={false} + columns={columns} + dataSource={dataSource} + scroll={{x: "max-content"}} + showHeader={false} + bordered={true} + />
) From e301624959ddffa1b5e3894019d910c39d954898 Mon Sep 17 00:00:00 2001 From: Kaosiso Ezealigo Date: Wed, 31 Dec 2025 15:16:51 +0100 Subject: [PATCH 4/7] refactor: enhance EvaluatorMetricsChart with delta computation and improved summary display --- .../EvaluatorMetricsChart/index.tsx | 242 +++++++++++++++--- 1 file changed, 203 insertions(+), 39 deletions(-) diff --git a/web/oss/src/components/EvalRunDetails/components/EvaluatorMetricsChart/index.tsx b/web/oss/src/components/EvalRunDetails/components/EvaluatorMetricsChart/index.tsx index a822f96bc..3405904dc 100644 --- a/web/oss/src/components/EvalRunDetails/components/EvaluatorMetricsChart/index.tsx +++ b/web/oss/src/components/EvalRunDetails/components/EvaluatorMetricsChart/index.tsx @@ -41,6 +41,38 @@ interface EvaluatorLabelProps { fallbackLabel: string } +type MetricDeltaTone = "positive" | "negative" | "neutral" + +interface MetricStripEntry { + key: string + label: string + color: string + value: number | null + displayValue: string + isMain: boolean + deltaText: string + deltaTone: MetricDeltaTone +} + +const getMainEvaluatorSeries = (entries: MetricStripEntry[]) => + entries.find((entry) => entry.isMain) ?? entries[0] + +const computeDeltaPercent = (current: number | null, baseline: number | null) => { + if (typeof current !== "number" || typeof baseline !== "number") return null + if (!Number.isFinite(current) || !Number.isFinite(baseline) || baseline === 0) return null + return ((current - baseline) / baseline) * 100 +} + +const formatDelta = (delta: number | null): {text: string; tone: MetricDeltaTone} => { + if (delta === null || !Number.isFinite(delta)) { + return {text: "-", tone: "neutral"} + } + const rounded = Math.round(delta) + if (rounded > 0) return {text: `+${rounded}%`, tone: "positive"} + if (rounded < 0) return {text: `${rounded}%`, tone: "negative"} + return {text: "0%", tone: "neutral"} +} + const EvaluatorMetricsChartTitle = memo( ({runId, evaluatorRef, fallbackLabel}: EvaluatorLabelProps) => { const evaluatorAtom = useMemo( @@ -243,25 +275,145 @@ const EvaluatorMetricsChart = ({ (isBooleanMetric && booleanChartData.length > 0) || hasCategoricalFrequency - const summaryValue = useMemo((): string | null => { - if (isBooleanMetric) { - const percentage = booleanHistogram.percentages.true - return Number.isFinite(percentage) ? `${percentage.toFixed(2)}%` : "—" - } - if (hasCategoricalFrequency && categoricalFrequencyData.length) { - return null + const comparisonBooleanPercentMap = useMemo(() => { + const map = new Map() + comparisonBooleanHistograms.forEach((entry) => { + if (Number.isFinite(entry.histogram.percentages.true)) { + map.set(entry.runId, entry.histogram.percentages.true) + } + }) + return map + }, [comparisonBooleanHistograms]) + + const summaryItems = useMemo(() => { + const baseValue = (() => { + if (!resolvedStats) return {value: null, displayValue: "—"} + if (isBooleanMetric) { + const percentage = booleanHistogram.percentages.true + return Number.isFinite(percentage) + ? {value: percentage, displayValue: `${percentage.toFixed(2)}%`} + : {value: null, displayValue: "—"} + } + if (hasCategoricalFrequency) { + return {value: null, displayValue: "—"} + } + if (typeof resolvedStats.mean === "number" && Number.isFinite(resolvedStats.mean)) { + return {value: resolvedStats.mean, displayValue: format3Sig(resolvedStats.mean)} + } + return {value: null, displayValue: "—"} + })() + + const baseEntry: MetricStripEntry = { + key: baseSeriesKey, + label: resolvedRunName, + color: resolvedBaseColor, + value: baseValue.value, + displayValue: baseValue.displayValue, + isMain: true, + deltaText: "-", + deltaTone: "neutral", } - if (typeof stats.mean === "number") return format3Sig(stats.mean) - return "—" + + const comparisonEntries = comparisonSeries.map((entry) => { + const statsValue = entry.stats + if (!statsValue) { + return { + key: entry.runId, + label: entry.runName, + color: entry.color, + value: null, + displayValue: "—", + isMain: false, + deltaText: "-", + deltaTone: "neutral", + } + } + if (isBooleanMetric) { + const percentage = comparisonBooleanPercentMap.get(entry.runId) + return { + key: entry.runId, + label: entry.runName, + color: entry.color, + value: typeof percentage === "number" ? percentage : null, + displayValue: + typeof percentage === "number" && Number.isFinite(percentage) + ? `${percentage.toFixed(2)}%` + : "—", + isMain: false, + deltaText: "-", + deltaTone: "neutral", + } + } + if (hasCategoricalFrequency) { + return { + key: entry.runId, + label: entry.runName, + color: entry.color, + value: null, + displayValue: "—", + isMain: false, + deltaText: "-", + deltaTone: "neutral", + } + } + if (typeof statsValue.mean === "number" && Number.isFinite(statsValue.mean)) { + return { + key: entry.runId, + label: entry.runName, + color: entry.color, + value: statsValue.mean, + displayValue: format3Sig(statsValue.mean), + isMain: false, + deltaText: "-", + deltaTone: "neutral", + } + } + return { + key: entry.runId, + label: entry.runName, + color: entry.color, + value: null, + displayValue: "—", + isMain: false, + deltaText: "-", + deltaTone: "neutral", + } + }) + + const entries = [baseEntry, ...comparisonEntries] + const mainSeries = getMainEvaluatorSeries(entries) + + return entries.map((entry) => { + if (entry.isMain) { + return entry + } + const delta = computeDeltaPercent(entry.value, mainSeries?.value ?? null) + const formatted = formatDelta(delta) + return { + ...entry, + deltaText: formatted.text, + deltaTone: formatted.tone, + } + }) }, [ + baseSeriesKey, booleanHistogram.percentages.true, - categoricalFrequencyData, - effectiveScenarioCount, + comparisonBooleanPercentMap, + comparisonSeries, hasCategoricalFrequency, isBooleanMetric, - stats, + resolvedBaseColor, + resolvedRunName, + resolvedStats, ]) + const metricsGridClass = useMemo(() => { + if (summaryItems.length <= 1) return "grid-cols-1" + if (summaryItems.length === 2) return "grid-cols-2" + if (summaryItems.length === 3) return "grid-cols-3" + return "grid-cols-2 sm:grid-cols-4" + }, [summaryItems.length]) + const chartContent = () => { if (isBooleanMetric) { if (!booleanChartData.length) { @@ -443,10 +595,11 @@ const EvaluatorMetricsChart = ({ return ( + > +
+
- } - > -
- {stableComparisons.length === 0 && ( -
- {summaryValue !== null ? ( - - {summaryValue} - - ) : null} +
+
+ {summaryItems.map((entry) => ( +
+ + {entry.displayValue} + + + {entry.deltaText} + +
+ ))} +
+
+
+
+
+ {isLoading ? ( + + ) : hasError && !resolvedStats ? ( +
+ Unable to load metric data. +
+ ) : ( + chartContent() + )}
- )} -
0 ? "h-[370px]" : "h-[300px]"}> - {isLoading ? ( - - ) : hasError && !resolvedStats ? ( -
- Unable to load metric data. -
- ) : ( - chartContent() - )}
From d73c4e52dcc3411faf129ee11e79e1640e50b4ca Mon Sep 17 00:00:00 2001 From: Kaosiso Ezealigo Date: Wed, 31 Dec 2025 15:17:00 +0100 Subject: [PATCH 5/7] refactor: add toneOverride and showIconOverride props to reference label components --- .../references/EvalReferenceLabels.tsx | 31 ++++++++++++++++ .../components/References/ReferenceLabels.tsx | 35 ++++++++++++++++--- 2 files changed, 62 insertions(+), 4 deletions(-) diff --git a/web/oss/src/components/EvalRunDetails/components/references/EvalReferenceLabels.tsx b/web/oss/src/components/EvalRunDetails/components/references/EvalReferenceLabels.tsx index e611ed6bb..c84e88e31 100644 --- a/web/oss/src/components/EvalRunDetails/components/references/EvalReferenceLabels.tsx +++ b/web/oss/src/components/EvalRunDetails/components/references/EvalReferenceLabels.tsx @@ -15,6 +15,7 @@ import { VariantReferenceText as GenericVariantReferenceText, VariantRevisionLabel as GenericVariantRevisionLabel, } from "@/oss/components/References" +import type {ReferenceTone} from "@/oss/components/References/referenceColors" import {variantReferenceQueryAtomFamily} from "../../atoms/references" import {effectiveProjectIdAtom} from "../../atoms/run" @@ -30,10 +31,14 @@ export const TestsetTag = memo( testsetId, projectURL, runId, + toneOverride, + showIconOverride, }: { testsetId: string projectURL?: string | null runId?: string | null + toneOverride?: ReferenceTone | null + showIconOverride?: boolean }) => { const projectId = useAtomValue(effectiveProjectIdAtom) const {buildTestsetHref} = useRunScopedUrls(runId) @@ -44,6 +49,8 @@ export const TestsetTag = memo( testsetId={testsetId} projectId={projectId} projectURL={href ? undefined : projectURL} + toneOverride={toneOverride} + showIconOverride={showIconOverride} /> ) }, @@ -59,11 +66,15 @@ export const TestsetTagList = memo( projectURL, runId, className, + toneOverride, + showIconOverride, }: { ids: string[] projectURL?: string | null runId?: string | null className?: string + toneOverride?: ReferenceTone | null + showIconOverride?: boolean }) => { const projectId = useAtomValue(effectiveProjectIdAtom) const {buildTestsetHref} = useRunScopedUrls(runId) @@ -78,6 +89,8 @@ export const TestsetTagList = memo( projectId={projectId} projectURL={resolvedProjectURL ?? projectURL} className={className} + toneOverride={toneOverride} + showIconOverride={showIconOverride} /> ) }, @@ -92,10 +105,14 @@ export const ApplicationReferenceLabel = memo( runId, applicationId: explicitApplicationId, projectURL: explicitProjectURL, + toneOverride, + showIconOverride, }: { runId?: string | null applicationId?: string | null projectURL?: string | null + toneOverride?: ReferenceTone | null + showIconOverride?: boolean }) => { const projectId = useAtomValue(effectiveProjectIdAtom) const {applicationId: runApplicationId} = useRunIdentifiers(runId) @@ -112,6 +129,8 @@ export const ApplicationReferenceLabel = memo( projectId={projectId} projectURL={explicitProjectURL ?? scopedProjectURL} href={appDetailHref} + toneOverride={toneOverride} + showIconOverride={showIconOverride} /> ) }, @@ -129,6 +148,8 @@ export const VariantReferenceLabel = memo( fallbackLabel, showVersionPill = false, explicitVersion, + toneOverride, + showIconOverride, }: { variantId?: string | null applicationId?: string | null @@ -136,6 +157,8 @@ export const VariantReferenceLabel = memo( fallbackLabel?: string | null showVersionPill?: boolean explicitVersion?: number | string | null + toneOverride?: ReferenceTone | null + showIconOverride?: boolean }) => { const projectId = useAtomValue(effectiveProjectIdAtom) const {variantId: runVariantId, applicationId: runApplicationId} = useRunIdentifiers(runId) @@ -153,6 +176,8 @@ export const VariantReferenceLabel = memo( showVersionPill={showVersionPill} explicitVersion={explicitVersion} href={href} + toneOverride={toneOverride} + showIconOverride={showIconOverride} /> ) }, @@ -172,6 +197,8 @@ export const VariantRevisionLabel = memo( runId, fallbackVariantName, fallbackRevision, + toneOverride, + showIconOverride, }: { variantId?: string | null revisionId?: string | null @@ -179,6 +206,8 @@ export const VariantRevisionLabel = memo( runId?: string | null fallbackVariantName?: string | null fallbackRevision?: number | string | null + toneOverride?: ReferenceTone | null + showIconOverride?: boolean }) => { const projectId = useAtomValue(effectiveProjectIdAtom) const { @@ -235,6 +264,8 @@ export const VariantRevisionLabel = memo( fallbackVariantName={resolvedVariantName} fallbackRevision={resolvedRevision} href={href} + toneOverride={toneOverride} + showIconOverride={showIconOverride} /> ) }, diff --git a/web/oss/src/components/References/ReferenceLabels.tsx b/web/oss/src/components/References/ReferenceLabels.tsx index efae728f9..430e5160d 100644 --- a/web/oss/src/components/References/ReferenceLabels.tsx +++ b/web/oss/src/components/References/ReferenceLabels.tsx @@ -12,6 +12,7 @@ import { queryReferenceAtomFamily, variantConfigAtomFamily, } from "./atoms/entityReferences" +import type {ReferenceTone} from "./referenceColors" import ReferenceTag from "./ReferenceTag" const {Text} = Typography @@ -25,11 +26,15 @@ export const TestsetTag = memo( testsetId, projectId, projectURL, + toneOverride, + showIconOverride, openExternally = false, }: { testsetId: string projectId: string | null projectURL?: string | null + toneOverride?: ReferenceTone | null + showIconOverride?: boolean openExternally?: boolean }) => { const queryAtom = useMemo( @@ -56,7 +61,8 @@ export const TestsetTag = memo( tooltip={isDeleted ? `Testset ${testsetId} was deleted` : label} copyValue={testsetId} className="max-w-[220px] w-fit" - tone="testset" + tone={toneOverride === null ? undefined : toneOverride ?? "testset"} + showIcon={showIconOverride ?? true} openExternally={openExternally} /> ) @@ -169,12 +175,16 @@ export const TestsetTagList = memo( projectId, projectURL, className, + toneOverride, + showIconOverride, openExternally = false, }: { ids: string[] projectId: string | null projectURL?: string | null className?: string + toneOverride?: ReferenceTone | null + showIconOverride?: boolean openExternally?: boolean }) => { if (!ids.length) { @@ -189,6 +199,8 @@ export const TestsetTagList = memo( testsetId={id} projectId={projectId} projectURL={projectURL} + toneOverride={toneOverride} + showIconOverride={showIconOverride} openExternally={openExternally} /> ))} @@ -209,6 +221,8 @@ export const ApplicationReferenceLabel = memo( href: explicitHref, openExternally = false, label: customLabel, + toneOverride, + showIconOverride, }: { applicationId: string | null projectId: string | null @@ -216,6 +230,8 @@ export const ApplicationReferenceLabel = memo( href?: string | null openExternally?: boolean label?: string + toneOverride?: ReferenceTone | null + showIconOverride?: boolean }) => { const queryAtom = useMemo( () => appReferenceAtomFamily({projectId, appId: applicationId}), @@ -255,7 +271,8 @@ export const ApplicationReferenceLabel = memo( tooltip={isDeleted ? `Application ${applicationId} was deleted` : label} copyValue={applicationId ?? undefined} className="max-w-[220px] w-fit" - tone="app" + tone={toneOverride === null ? undefined : toneOverride ?? "app"} + showIcon={showIconOverride ?? true} openExternally={openExternally} /> ) @@ -277,6 +294,8 @@ export const VariantReferenceLabel = memo( href: explicitHref, openExternally = false, label: customLabel, + toneOverride, + showIconOverride, }: { revisionId?: string | null projectId: string | null @@ -286,6 +305,8 @@ export const VariantReferenceLabel = memo( href?: string | null openExternally?: boolean label?: string + toneOverride?: ReferenceTone | null + showIconOverride?: boolean }) => { const queryAtom = useMemo( () => variantConfigAtomFamily({projectId, revisionId}), @@ -329,7 +350,8 @@ export const VariantReferenceLabel = memo( tooltip={isDeleted ? `Variant ${revisionId} was deleted` : label} copyValue={revisionId ?? undefined} className="max-w-[220px]" - tone="variant" + tone={toneOverride === null ? undefined : toneOverride ?? "variant"} + showIcon={showIconOverride ?? true} openExternally={openExternally} /> {showVersionPill && resolvedVersion ? ( @@ -355,6 +377,8 @@ export const VariantRevisionLabel = memo( fallbackVariantName, fallbackRevision, href: explicitHref, + toneOverride, + showIconOverride, }: { variantId?: string | null revisionId?: string | null @@ -362,6 +386,8 @@ export const VariantRevisionLabel = memo( fallbackVariantName?: string | null fallbackRevision?: number | string | null href?: string | null + toneOverride?: ReferenceTone | null + showIconOverride?: boolean }) => { // Fetch variant config using revisionId to get revision number const configQueryAtom = useMemo( @@ -411,7 +437,8 @@ export const VariantRevisionLabel = memo( tooltip={isDeleted ? `Variant ${revisionId ?? variantId} was deleted` : label} copyValue={revisionId ?? variantId ?? undefined} className="max-w-[220px]" - tone="variant" + tone={toneOverride === null ? undefined : toneOverride ?? "variant"} + showIcon={showIconOverride ?? true} /> ) }, From 3ebb2e5362e6fe75730a9911889ea827f8f37ed9 Mon Sep 17 00:00:00 2001 From: Kaosiso Ezealigo Date: Wed, 31 Dec 2025 15:17:05 +0100 Subject: [PATCH 6/7] refactor: enhance PreviewEvalRunMeta with comparison functionality and improved UI elements --- .../components/PreviewEvalRunHeader.tsx | 115 +++++++++++++----- 1 file changed, 85 insertions(+), 30 deletions(-) diff --git a/web/oss/src/components/EvalRunDetails/components/PreviewEvalRunHeader.tsx b/web/oss/src/components/EvalRunDetails/components/PreviewEvalRunHeader.tsx index a3b75e74b..5e4046223 100644 --- a/web/oss/src/components/EvalRunDetails/components/PreviewEvalRunHeader.tsx +++ b/web/oss/src/components/EvalRunDetails/components/PreviewEvalRunHeader.tsx @@ -1,10 +1,11 @@ import {memo, useCallback, useMemo, useState} from "react" -import {Pause, Play} from "@phosphor-icons/react" +import {PushpinFilled} from "@ant-design/icons" +import {PauseIcon, PlayIcon, XCircleIcon} from "@phosphor-icons/react" import {useQueryClient} from "@tanstack/react-query" -import {Button, Space, Tabs, Tag, Tooltip} from "antd" +import {Button, Tabs, Tag, Tooltip, Typography} from "antd" import clsx from "clsx" -import {useAtomValue} from "jotai" +import {atom, useAtomValue, useSetAtom} from "jotai" import {message} from "@/oss/components/AppMessageContext" import dayjs from "@/oss/lib/helpers/dateTimeHelper/dayjs" @@ -12,6 +13,13 @@ import {invalidatePreviewRunCache} from "@/oss/lib/hooks/usePreviewEvaluations/a import {startSimpleEvaluation, stopSimpleEvaluation} from "@/oss/services/onlineEvaluations/api" import { + compareRunIdsAtom, + compareRunIdsWriteAtom, + getComparisonColor, + getComparisonSolidColor, +} from "../atoms/compare" +import { + runDisplayNameAtomFamily, runInvocationRefsAtomFamily, runTestsetIdsAtomFamily, runFlagsAtomFamily, @@ -21,15 +29,6 @@ import {previewEvalTypeAtom} from "../state/evalType" import CompareRunsMenu from "./CompareRunsMenu" -const statusColor = (status?: string | null) => { - if (!status) return "default" - const normalized = status.toLowerCase() - if (normalized.includes("success") || normalized.includes("completed")) return "green" - if (normalized.includes("fail") || normalized.includes("error")) return "red" - if (normalized.includes("running") || normalized.includes("queued")) return "blue" - return "default" -} - type ActiveView = "overview" | "focus" | "scenarios" | "configuration" const useOnlineEvaluationActions = (runId: string, projectId?: string | null) => { @@ -156,6 +155,30 @@ const PreviewEvalRunMeta = ({ const _testsetIds = useAtomValue(useMemo(() => runTestsetIdsAtomFamily(runId), [runId])) const {canStopOnline, handleOnlineAction, onlineAction, showOnlineAction} = useOnlineEvaluationActions(runId, projectId) + const compareRunIds = useAtomValue(compareRunIdsAtom) + const setCompareRunIds = useSetAtom(compareRunIdsWriteAtom) + + const orderedRunIds = useMemo(() => { + const ids = [runId, ...compareRunIds].filter((id): id is string => Boolean(id)) + const seen = new Set() + return ids.filter((id) => { + if (seen.has(id)) return false + seen.add(id) + return true + }) + }, [compareRunIds, runId]) + + const runDescriptorsAtom = useMemo( + () => + atom((get) => + orderedRunIds.map((id) => ({ + id, + name: get(runDisplayNameAtomFamily(id)), + })), + ), + [orderedRunIds], + ) + const runDescriptors = useAtomValue(runDescriptorsAtom) const runData = runQuery.data?.camelRun ?? runQuery.data?.rawRun ?? null const runStatus = runData?.status ?? null @@ -169,30 +192,62 @@ const PreviewEvalRunMeta = ({ const lastUpdated = updatedMoment?.isValid() ? updatedMoment.fromNow() : undefined return ( -
- - {runStatus ? ( - <> - - {runStatus} - - - ) : null} - {lastUpdated ? ( - - - Updated {lastUpdated} - - - ) : null} - +
+
+ Evaluations: +
+ {runDescriptors.map((run, index) => { + const isBaseRun = index === 0 + const tagColor = getComparisonSolidColor(index) + const tagBg = getComparisonColor(index) + return ( + + ) : undefined + } + closable={!isBaseRun} + closeIcon={ + !isBaseRun ? ( + + ) : undefined + } + onClose={ + !isBaseRun + ? (event) => { + event.preventDefault() + setCompareRunIds((prev) => + prev.filter((id) => id !== run.id), + ) + } + : undefined + } + > + {run.name} + + ) + })} +
+
+
{showOnlineAction ? (