From d7aac0ee70462b7977dfeef4f1533e7d3e657a86 Mon Sep 17 00:00:00 2001 From: shunjiazhu Date: Sun, 15 Feb 2026 21:05:47 +0800 Subject: [PATCH 1/8] upgrade adk-go to v0.5.x --- configs/configs.go | 3 +- configs/observability.go | 8 - go.mod | 10 +- go.sum | 18 +- observability/README.md | 1 + observability/README_zh.md | 3 +- observability/initialize.go | 113 +---- observability/initialize_test.go | 4 +- observability/metrics.go | 37 +- observability/metrics_test.go | 15 +- observability/plugin.go | 691 +------------------------------ observability/span_processor.go | 135 ++++++ observability/translator.go | 18 + 13 files changed, 231 insertions(+), 825 deletions(-) create mode 100644 observability/span_processor.go diff --git a/configs/configs.go b/configs/configs.go index c23da2e..2dd38f5 100644 --- a/configs/configs.go +++ b/configs/configs.go @@ -91,8 +91,7 @@ func SetupVeADKConfig() error { }, Observability: &ObservabilityConfig{ OpenTelemetry: &OpenTelemetryConfig{ - EnableGlobalProvider: true, // use global trace provider by default, like veadk-python - EnableLocalProvider: false, // disable adk-go's local provider + EnableGlobalProvider: true, // adk-go main path: use global provider by default }, }, } diff --git a/configs/observability.go b/configs/observability.go index 440effe..4fbdfe3 100644 --- a/configs/observability.go +++ b/configs/observability.go @@ -23,7 +23,6 @@ import ( const ( // Global EnvOtelServiceName = "OTEL_SERVICE_NAME" - EnvObservabilityEnableLocalProvider = "OBSERVABILITY_OPENTELEMETRY_ENABLE_LOCAL_PROVIDER" EnvObservabilityEnableGlobalProvider = "OBSERVABILITY_OPENTELEMETRY_ENABLE_GLOBAL_PROVIDER" EnvObservabilityEnableMetrics = "OBSERVABILITY_OPENTELEMETRY_ENABLE_METRICS" @@ -59,7 +58,6 @@ type ObservabilityConfig struct { } type OpenTelemetryConfig struct { - EnableLocalProvider bool `yaml:"enable_local_tracer"` EnableGlobalProvider bool `yaml:"enable_global_tracer"` EnableMetrics *bool `yaml:"enable_metrics"` @@ -236,11 +234,6 @@ func (c *ObservabilityConfig) MapEnvToConfig() { ot.EnableGlobalProvider = v == "true" } - // Local Tracer - if v := utils.GetEnvWithDefault(EnvObservabilityEnableLocalProvider); v != "" { - ot.EnableLocalProvider = v == "true" - } - // Meter Provider if v := utils.GetEnvWithDefault(EnvObservabilityEnableMetrics); v != "" { if ot.EnableMetrics == nil { @@ -266,7 +259,6 @@ func (c *OpenTelemetryConfig) Clone() *OpenTelemetryConfig { return &OpenTelemetryConfig{ EnableGlobalProvider: c.EnableGlobalProvider, - EnableLocalProvider: c.EnableLocalProvider, EnableMetrics: c.EnableMetrics, ApmPlus: c.ApmPlus.Clone(), CozeLoop: c.CozeLoop.Clone(), diff --git a/go.mod b/go.mod index 50f0024..1303d00 100644 --- a/go.mod +++ b/go.mod @@ -29,7 +29,7 @@ require ( go.opentelemetry.io/otel/sdk/metric v1.39.0 go.opentelemetry.io/otel/trace v1.39.0 golang.org/x/oauth2 v0.32.0 - google.golang.org/adk v0.4.0 + google.golang.org/adk v0.4.1-0.20260213105330-147b75759ac6 google.golang.org/genai v1.40.0 gopkg.in/go-playground/validator.v8 v8.18.2 gopkg.in/yaml.v3 v3.0.1 @@ -41,6 +41,7 @@ require ( cloud.google.com/go v0.123.0 // indirect cloud.google.com/go/auth v0.17.0 // indirect cloud.google.com/go/compute/metadata v0.9.0 // indirect + github.com/GoogleCloudPlatform/opentelemetry-operations-go/detectors/gcp v1.30.0 // indirect github.com/awalterschulze/gographviz v2.0.3+incompatible // indirect github.com/bluele/gcache v0.0.2 // indirect github.com/bytedance/sonic v1.14.2 // indirect @@ -49,7 +50,7 @@ require ( github.com/cespare/xxhash/v2 v2.3.0 // indirect github.com/cloudwego/base64x v0.1.6 // indirect github.com/coze-dev/cozeloop-go/spec v0.1.4-0.20250829072213-3812ddbfb735 // indirect - github.com/davecgh/go-spew v1.1.1 // indirect + github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect github.com/dustin/go-humanize v1.0.1 // indirect github.com/felixge/httpsnoop v1.0.4 // indirect github.com/go-logr/logr v1.4.3 // indirect @@ -79,7 +80,7 @@ require ( github.com/modern-go/reflect2 v1.0.2 // indirect github.com/nikolalohinski/gonja/v2 v2.3.1 // indirect github.com/pkg/errors v0.9.2-0.20201214064552-5dd12d0cfe7f // indirect - github.com/pmezard/go-difflib v1.0.0 // indirect + github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect github.com/sirupsen/logrus v1.9.3 // indirect github.com/smarty/assertions v1.15.0 // indirect github.com/smartystreets/goconvey v1.8.1 // indirect @@ -90,6 +91,7 @@ require ( github.com/yosida95/uritemplate/v3 v3.0.2 // indirect go.mongodb.org/mongo-driver v1.17.6 // indirect go.opentelemetry.io/auto/sdk v1.2.1 // indirect + go.opentelemetry.io/contrib/detectors/gcp v1.38.0 // indirect go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.63.0 // indirect go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.39.0 // indirect go.opentelemetry.io/otel/log v0.14.0 // indirect @@ -99,7 +101,7 @@ require ( golang.org/x/exp v0.0.0-20240404231335-c0f41cb1a7a0 // indirect golang.org/x/net v0.47.0 // indirect golang.org/x/sync v0.18.0 // indirect - golang.org/x/sys v0.39.0 // indirect + golang.org/x/sys v0.40.0 // indirect golang.org/x/text v0.31.0 // indirect google.golang.org/genproto/googleapis/api v0.0.0-20251202230838-ff82c1b0f217 // indirect google.golang.org/genproto/googleapis/rpc v0.0.0-20251202230838-ff82c1b0f217 // indirect diff --git a/go.sum b/go.sum index cd6a2cb..dfad47f 100644 --- a/go.sum +++ b/go.sum @@ -6,6 +6,8 @@ cloud.google.com/go/auth v0.17.0/go.mod h1:6wv/t5/6rOPAX4fJiRjKkJCvswLwdet7G8+UG cloud.google.com/go/compute/metadata v0.9.0 h1:pDUj4QMoPejqq20dK0Pg2N4yG9zIkYGdBtwLoEkH9Zs= cloud.google.com/go/compute/metadata v0.9.0/go.mod h1:E0bWwX5wTnLPedCKqk3pJmVgCBSM6qQI1yTBdEb3C10= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= +github.com/GoogleCloudPlatform/opentelemetry-operations-go/detectors/gcp v1.30.0 h1:sBEjpZlNHzK1voKq9695PJSX2o5NEXl7/OL3coiIY0c= +github.com/GoogleCloudPlatform/opentelemetry-operations-go/detectors/gcp v1.30.0/go.mod h1:P4WPRUkOhJC13W//jWpyfJNDAIpvRbAUIYLX/4jtlE0= github.com/MakeNowJust/heredoc v1.0.0 h1:cXCdzVdstXyiTqTvfqk9SDHpKNjxuom+DOlyEeQ4pzQ= github.com/MakeNowJust/heredoc v1.0.0/go.mod h1:mG5amYoWBHf8vpLOuehzbGGw0EHxpZZ6lCpQ4fNJ8LE= github.com/a2aproject/a2a-go v0.3.3 h1:NqGDw2c8hCSW3/9MakeeRpw5yCZUUmW2Y/yINV15GwQ= @@ -35,8 +37,9 @@ github.com/coze-dev/cozeloop-go/spec v0.1.4-0.20250829072213-3812ddbfb735 h1:qxA github.com/coze-dev/cozeloop-go/spec v0.1.4-0.20250829072213-3812ddbfb735/go.mod h1:/f3BrWehffwXIpd4b5rYIqktLd/v5dlLBw0h9F/LQIU= github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM= +github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY= github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto= github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= @@ -154,8 +157,9 @@ github.com/onsi/gomega v1.27.8/go.mod h1:2J8vzI/s+2shY9XHRApDkdgPo1TKT7P2u6fXeJK github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/errors v0.9.2-0.20201214064552-5dd12d0cfe7f h1:lJqhwddJVYAkyp72a4pwzMClI20xTwL7miDdm2W/KBM= github.com/pkg/errors v0.9.2-0.20201214064552-5dd12d0cfe7f/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= -github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U= +github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec h1:W09IVJc94icq4NjY3clb7Lk8O1qJ8BdBEF8z0ibU0rE= github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo= @@ -199,6 +203,8 @@ go.mongodb.org/mongo-driver v1.17.6 h1:87JUG1wZfWsr6rIz3ZmpH90rL5tea7O3IHuSwHUps go.mongodb.org/mongo-driver v1.17.6/go.mod h1:Hy04i7O2kC4RS06ZrhPRqj/u4DTYkFDAAccj+rVKqgQ= go.opentelemetry.io/auto/sdk v1.2.1 h1:jXsnJ4Lmnqd11kwkBV2LgLoFMZKizbCi5fNZ/ipaZ64= go.opentelemetry.io/auto/sdk v1.2.1/go.mod h1:KRTj+aOaElaLi+wW1kO/DZRXwkF4C5xPbEe3ZiIhN7Y= +go.opentelemetry.io/contrib/detectors/gcp v1.38.0 h1:ZoYbqX7OaA/TAikspPl3ozPI6iY6LiIY9I8cUfm+pJs= +go.opentelemetry.io/contrib/detectors/gcp v1.38.0/go.mod h1:SU+iU7nu5ud4oCb3LQOhIZ3nRLj6FNVrKgtflbaf2ts= go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.63.0 h1:RbKq8BG0FI8OiXhBfcRtqqHcZcka+gU3cskNuf05R18= go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.63.0/go.mod h1:h06DGIukJOevXaj/xrNjhi/2098RZzcLTbc0jDAUbsg= go.opentelemetry.io/otel v1.39.0 h1:8yPrr/S0ND9QEfTfdP9V+SiwT4E0G7Y5MO7p85nis48= @@ -269,8 +275,8 @@ golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5h golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.39.0 h1:CvCKL8MeisomCi6qNZ+wbb0DN9E5AATixKsvNtMoMFk= -golang.org/x/sys v0.39.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= +golang.org/x/sys v0.40.0 h1:DBZZqJ2Rkml6QMQsZywtnjnnGvHza6BTfYFWY9kjEWQ= +golang.org/x/sys v0.40.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= @@ -286,8 +292,8 @@ golang.org/x/tools v0.38.0/go.mod h1:yEsQ/d/YK8cjh0L6rZlY8tgtlKiBNTL14pGDJPJpYQs golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= gonum.org/v1/gonum v0.16.0 h1:5+ul4Swaf3ESvrOnidPp4GZbzf0mxVQpDCYUQE7OJfk= gonum.org/v1/gonum v0.16.0/go.mod h1:fef3am4MQ93R2HHpKnLk4/Tbh/s0+wqD5nfa6Pnwy4E= -google.golang.org/adk v0.4.0 h1:CJ31nyxkqRfEgKuttR4h3o6QFok94Ty4UpbefUn21h8= -google.golang.org/adk v0.4.0/go.mod h1:jVeb7Ir53+3XKTncdY7k3pVdPneKcm5+60sXpxHQnao= +google.golang.org/adk v0.4.1-0.20260213105330-147b75759ac6 h1:lrH4NZMy+Mw9mhDSR0wMAnNkUGQMfaW3DIoLRXfGl2c= +google.golang.org/adk v0.4.1-0.20260213105330-147b75759ac6/go.mod h1:K9gcJhkWBF2NcLvLh/oq4BM7QwPswwSX/gPqukLpgwQ= google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= google.golang.org/genai v1.40.0 h1:kYxyQSH+vsib8dvsgyLJzsVEIv5k3ZmHJyVqdvGncmc= diff --git a/observability/README.md b/observability/README.md index 9696dcc..2086bed 100644 --- a/observability/README.md +++ b/observability/README.md @@ -62,6 +62,7 @@ Add an `observability` section to your `config.yaml`: ```yaml observability: opentelemetry: + enable_global_tracer: true apmplus: endpoint: "https://apmplus-cn-beijing.volces.com:4318" api_key: "YOUR_APMPLUS_API_KEY" diff --git a/observability/README_zh.md b/observability/README_zh.md index ec1d696..fcca5a3 100644 --- a/observability/README_zh.md +++ b/observability/README_zh.md @@ -62,6 +62,7 @@ VeADK Go 实现了以下 Span 属性类别,详见 [Python ADK Span 属性文 ```yaml observability: opentelemetry: + enable_global_tracer: true apmplus: endpoint: "https://apmplus-cn-beijing.volces.com:4318" api_key: "YOUR_APMPLUS_API_KEY" @@ -74,7 +75,7 @@ observability: - `OBSERVABILITY_OPENTELEMETRY_COZELOOP_API_KEY` - `OBSERVABILITY_OPENTELEMETRY_APMPLUS_API_KEY` -- `OBSERVABILITY_OPENTELEMETRY_ENABLE_GLOBAL_PROVIDER` (默认: false) +- `OBSERVABILITY_OPENTELEMETRY_ENABLE_GLOBAL_PROVIDER` (默认: true) - `VEADK_MODEL_PROVIDER` - 设置模型提供商 ## 使用方法 diff --git a/observability/initialize.go b/observability/initialize.go index 090bd71..334797a 100644 --- a/observability/initialize.go +++ b/observability/initialize.go @@ -21,7 +21,6 @@ import ( "github.com/volcengine/veadk-go/configs" "github.com/volcengine/veadk-go/log" - "google.golang.org/adk/telemetry" "go.opentelemetry.io/otel" sdktrace "go.opentelemetry.io/otel/sdk/trace" @@ -87,15 +86,8 @@ func Shutdown(ctx context.Context) error { } // 2. Shutdown local MeterProvider if exists - if localMeterProvider != nil { - if err := localMeterProvider.Shutdown(ctx); err != nil { - errs = append(errs, err) - } - } - - // 3. Shutdown global MeterProvider if exists - if globalMeterProvider != nil { - if err := globalMeterProvider.Shutdown(ctx); err != nil { + if meterProvider != nil { + if err := meterProvider.Shutdown(ctx); err != nil { errs = append(errs, err) } } @@ -132,25 +124,6 @@ func newVeadkExporter(exp sdktrace.SpanExporter) sdktrace.SpanExporter { return &VeADKTranslatedExporter{SpanExporter: exp} } -// AddSpanExporter registers an exporter to Google ADK's local telemetry. -func AddSpanExporter(exp sdktrace.SpanExporter) { - veExp := newVeadkExporter(exp) - if veExp != nil { - telemetry.RegisterSpanProcessor(sdktrace.NewBatchSpanProcessor(veExp)) - } -} - -// AddGlobalSpanExporter registers an exporter toglobal TracerProvider. -func AddGlobalSpanExporter(exp sdktrace.SpanExporter) { - veExp := newVeadkExporter(exp) - if veExp != nil { - globalTP := otel.GetTracerProvider() - if sdkTP, ok := globalTP.(*sdktrace.TracerProvider); ok { - sdkTP.RegisterSpanProcessor(sdktrace.NewBatchSpanProcessor(veExp)) - } - } -} - // setGlobalTracerProvider configures the global OpenTelemetry TracerProvider. func setGlobalTracerProvider(exp sdktrace.SpanExporter, spanProcessors ...sdktrace.SpanProcessor) { // Always wrap with VeADKTranslatedExporter to ensure ADK-internal spans are correctly mapped @@ -160,7 +133,7 @@ func setGlobalTracerProvider(exp sdktrace.SpanExporter, spanProcessors ...sdktra } // Default processors - allProcessors := append([]sdktrace.SpanProcessor{}, spanProcessors...) + allProcessors := append([]sdktrace.SpanProcessor{NewVeADKSpanProcessor()}, spanProcessors...) // Use BatchSpanProcessor for all exporters to ensure performance and batching. finalProcessor := sdktrace.NewBatchSpanProcessor(translatedExp) @@ -190,11 +163,15 @@ func setGlobalTracerProvider(exp sdktrace.SpanExporter, spanProcessors ...sdktra otel.SetTracerProvider(tp) } -func setupLocalTracer(ctx context.Context, cfg *configs.OpenTelemetryConfig) (bool, error) { +func initializeTraceProvider(ctx context.Context, cfg *configs.OpenTelemetryConfig) (bool, error) { if cfg == nil { return false, nil } + if !cfg.EnableGlobalProvider { + return false, nil + } + exp, err := NewMultiExporter(ctx, cfg) if err != nil { return false, err @@ -204,77 +181,29 @@ func setupLocalTracer(ctx context.Context, cfg *configs.OpenTelemetryConfig) (bo return false, nil } - AddSpanExporter(exp) + setGlobalTracerProvider(exp) return true, nil } -func setupGlobalTracer(ctx context.Context, cfg *configs.OpenTelemetryConfig) (bool, error) { - globalExp, err := NewMultiExporter(ctx, cfg) - if err != nil { - return false, err - } - - if globalExp != nil { - log.Info("Registering ADK Global TracerProvider") - setGlobalTracerProvider(globalExp) - return true, nil - } - return false, nil -} - -func initializeTraceProvider(ctx context.Context, cfg *configs.OpenTelemetryConfig) (bool, error) { - var errs []error - var initialized bool - if cfg != nil && cfg.EnableLocalProvider { - ok, err := setupLocalTracer(ctx, cfg) - if err != nil { - errs = append(errs, err) - } - if ok { - initialized = true - } - } - - if cfg != nil && cfg.EnableGlobalProvider { - ok, err := setupGlobalTracer(ctx, cfg) - if err != nil { - errs = append(errs, err) - } - if ok { - initialized = true - } - } - return initialized, errors.Join(errs...) -} - func initializeMeterProvider(ctx context.Context, cfg *configs.OpenTelemetryConfig) (bool, error) { - var errs []error - var initialized bool if cfg == nil || cfg.EnableMetrics == nil || !*cfg.EnableMetrics { log.Debug("Meter provider is not enabled") return false, nil } - if cfg.EnableLocalProvider { - readers, err := NewMetricReader(ctx, cfg) - if err != nil { - errs = append(errs, err) - } - if len(readers) > 0 { - registerLocalMetrics(readers) - initialized = true - } + if !cfg.EnableGlobalProvider { + return false, nil } - if cfg.EnableGlobalProvider { - globalReaders, err := NewMetricReader(ctx, cfg) - if err != nil { - errs = append(errs, err) - } - if len(globalReaders) > 0 { - registerGlobalMetrics(globalReaders) - initialized = true - } + readers, err := NewMetricReader(ctx, cfg) + if err != nil { + return false, err + } + + if len(readers) == 0 { + return false, nil } - return initialized, errors.Join(errs...) + + registerMetrics(readers) + return true, nil } diff --git a/observability/initialize_test.go b/observability/initialize_test.go index b1f89e2..f13fb23 100644 --- a/observability/initialize_test.go +++ b/observability/initialize_test.go @@ -88,11 +88,9 @@ func TestInitializeWithConfig(t *testing.T) { err := initWithConfig(context.Background(), nil) assert.ErrorIs(t, err, ErrNoExporters) - // Config with disabled global provider but valid exporter, should return ErrNoExporters - // because neither local nor global provider is enabled. + // Config with disabled global provider but valid exporter, should return ErrNoExporters. cfg := &configs.OpenTelemetryConfig{ EnableGlobalProvider: false, - EnableLocalProvider: false, Stdout: &configs.StdoutConfig{Enable: true}, } err = initWithConfig(context.Background(), cfg) diff --git a/observability/metrics.go b/observability/metrics.go index 20586b4..6617e82 100644 --- a/observability/metrics.go +++ b/observability/metrics.go @@ -53,12 +53,10 @@ var ( ) var ( - // Slices to hold instruments from multiple providers (Global, Local, etc.) - localOnce sync.Once - globalOnce sync.Once - instrumentsMu sync.RWMutex - localMeterProvider *sdkmetric.MeterProvider - globalMeterProvider *sdkmetric.MeterProvider + // Shared instruments bound to a single global meter provider. + meterOnce sync.Once + instrumentsMu sync.RWMutex + meterProvider *sdkmetric.MeterProvider // Standard Gen AI Metrics tokenUsageHistograms []metric.Float64Histogram @@ -78,35 +76,18 @@ var ( agentkitDurationHistograms []metric.Float64Histogram ) -// registerLocalMetrics initializes the metrics system with a local isolated MeterProvider. -// It does NOT overwrite the global OTel MeterProvider. -func registerLocalMetrics(readers []sdkmetric.Reader) { - localOnce.Do(func() { +// registerMetrics configures a single global OpenTelemetry MeterProvider. +func registerMetrics(readers []sdkmetric.Reader) { + meterOnce.Do(func() { options := []sdkmetric.Option{} for _, r := range readers { options = append(options, sdkmetric.WithReader(r)) } mp := sdkmetric.NewMeterProvider(options...) - localMeterProvider = mp - initializeInstruments(mp.Meter(InstrumentationName)) - }) -} - -// registerGlobalMetrics configures the global OpenTelemetry MeterProvider with the provided readers. -// This is optional and used when you want unrelated OTel measurements to also be exported. -func registerGlobalMetrics(readers []sdkmetric.Reader) { - globalOnce.Do(func() { - options := []sdkmetric.Option{} - for _, r := range readers { - options = append(options, sdkmetric.WithReader(r)) - } - - mp := sdkmetric.NewMeterProvider(options...) - globalMeterProvider = mp + meterProvider = mp otel.SetMeterProvider(mp) - // No need to call registerMeter here, because the global proxy registered in init() - initializeInstruments(otel.GetMeterProvider().Meter(InstrumentationName)) + initializeInstruments(mp.Meter(InstrumentationName)) }) } diff --git a/observability/metrics_test.go b/observability/metrics_test.go index 815d94f..256095c 100644 --- a/observability/metrics_test.go +++ b/observability/metrics_test.go @@ -144,20 +144,9 @@ func TestMetricsRecording(t *testing.T) { }) } -func TestRegisterLocalMetrics(t *testing.T) { - // Since registerLocalMetrics uses sync.Once, we can only test it doesn't panic. - // Logic verification is implicitly done via InitializeInstruments testing above. +func TestRegisterMetrics(t *testing.T) { reader := sdkmetric.NewManualReader() assert.NotPanics(t, func() { - registerLocalMetrics([]sdkmetric.Reader{reader}) - }) -} - -// We cannot easily test registerGlobalMetrics side effects on otel.GetMeterProvider -// without affecting other tests or global state, but basic execution safety check: -func TestRegisterGlobalMetrics(t *testing.T) { - reader := sdkmetric.NewManualReader() - assert.NotPanics(t, func() { - registerGlobalMetrics([]sdkmetric.Reader{reader}) + registerMetrics([]sdkmetric.Reader{reader}) }) } diff --git a/observability/plugin.go b/observability/plugin.go index 93d7d50..fd7a659 100644 --- a/observability/plugin.go +++ b/observability/plugin.go @@ -17,16 +17,12 @@ package observability import ( "context" "encoding/json" - "fmt" - "strconv" - "strings" "time" "github.com/volcengine/veadk-go/configs" "github.com/volcengine/veadk-go/log" "go.opentelemetry.io/otel" "go.opentelemetry.io/otel/attribute" - "go.opentelemetry.io/otel/codes" "go.opentelemetry.io/otel/trace" "google.golang.org/adk/agent" "google.golang.org/adk/model" @@ -111,7 +107,7 @@ func (p *adkObservabilityPlugin) isMetricsEnabled() bool { func (p *adkObservabilityPlugin) BeforeRun(ctx agent.InvocationContext) (*genai.Content, error) { log.Debug("Before Run", "InvocationID", ctx.InvocationID(), "SessionID", ctx.Session().ID(), "UserID", ctx.Session().UserID()) // 1. Start the 'invocation' span - newCtx, span := p.tracer.Start(context.Context(ctx), SpanInvocation, trace.WithSpanKind(trace.SpanKindServer)) + _, span := p.tracer.Start(context.Context(ctx), SpanInvocation, trace.WithSpanKind(trace.SpanKindServer)) log.Debug("BeforeRun created a new invocation span", "span", span.SpanContext()) // Register internal ADK run span ID -> our veadk invocation span context. @@ -120,9 +116,8 @@ func (p *adkObservabilityPlugin) BeforeRun(ctx agent.InvocationContext) (*genai. GetRegistry().RegisterRunMapping(adkSpan.SpanContext().SpanID(), adkSpan.SpanContext().TraceID(), span.SpanContext(), span) } - // 2. Store in state for AfterRun and children + // 2. Store in state for AfterRun _ = ctx.Session().State().Set(stateKeyInvocationSpan, span) - _ = ctx.Session().State().Set(stateKeyInvocationCtx, newCtx) setCommonAttributesFromInvocation(ctx, span) setWorkflowAttributes(span) @@ -230,29 +225,7 @@ func (p *adkObservabilityPlugin) AfterRun(ctx agent.InvocationContext) { func (p *adkObservabilityPlugin) BeforeModel(ctx agent.CallbackContext, req *model.LLMRequest) (*model.LLMResponse, error) { log.Debug("BeforeModel", "InvocationID", ctx.InvocationID(), "SessionID", ctx.SessionID(), "UserID", ctx.UserID(), "AgentName", ctx.AgentName(), "AppName", ctx.AppName()) - parentCtx := context.Context(ctx) - - if actx, _ := ctx.State().Get(stateKeyInvokeAgentCtx); actx != nil { - parentCtx = actx.(context.Context) - log.Debug("BeforeModel get a parent invoke_agent ctx from state", "parentCtx", parentCtx) - } else if ictx, _ := ctx.State().Get(stateKeyInvocationCtx); ictx != nil { - parentCtx = ictx.(context.Context) - log.Debug("BeforeModel get a parent invocation ctx from state", "parentCtx", parentCtx) - } - - // 2. Start our OWN span to cover the full duration of the call (including streaming). - // ADK's "call_llm" span will be closed prematurely by the framework on the first chunk. - // Align with Python: name is "call_llm" - _, span := p.tracer.Start(parentCtx, SpanCallLLM) - log.Debug("BeforeModel created a span", "span", span.SpanContext(), "is_recording", span.IsRecording()) - _ = ctx.State().Set(stateKeyStreamingSpan, span) - - adkSpan := trace.SpanFromContext(context.Context(ctx)) - if adkSpan.SpanContext().IsValid() { // Register google's ADK span (currently not implemented) -> our veadk span context. - GetRegistry().RegisterLLMMapping(adkSpan.SpanContext().SpanID(), adkSpan.SpanContext().TraceID(), span.SpanContext()) - } - - // Group metadata in a single structure for state storage + // New ADK emits model/tool spans natively. Plugin only keeps metadata for metrics and invocation aggregation. meta := p.getSpanMetadata(ctx.State()) meta.StartTime = time.Now() meta.PrevPromptTokens = meta.PromptTokens @@ -260,125 +233,23 @@ func (p *adkObservabilityPlugin) BeforeModel(ctx agent.CallbackContext, req *mod meta.PrevTotalTokens = meta.TotalTokens meta.ModelName = req.Model p.storeSpanMetadata(ctx.State(), meta) - - // Link back to the ADK internal span if it's there. - // This records the ID of the span started by the ADK framework, which we - // often bypass to maintain a cleaner hierarchy in our veadk spans. - adkSpan = trace.SpanFromContext(context.Context(ctx)) - if adkSpan.SpanContext().IsValid() { - span.SetAttributes(attribute.String("adk.internal_span_id", adkSpan.SpanContext().SpanID().String())) - } - - setCommonAttributesFromCallback(ctx, span) - // Set GenAI standard span attributes - setLLMAttributes(span) - - // Record request attributes - p.setLLMRequestAttributes(ctx, span, req) - - // Capture messages in GenAI format for the span - messages := p.extractMessages(req) - var msgAttrs []attribute.KeyValue - messagesJSON, err := json.Marshal(messages) - if err == nil { - msgAttrs = append(msgAttrs, attribute.String(AttrGenAIMessages, string(messagesJSON))) - } - - // Flatten messages for gen_ai.prompt.[n] attributes (alignment with python) - msgAttrs = append(msgAttrs, p.flattenPrompt(messages)...) - - // Add input.value (standard for some collectors) - msgAttrs = append(msgAttrs, attribute.String(AttrGenAIInput, string(messagesJSON))) - - msgAttrs = append(msgAttrs, attribute.String(AttrInputValue, string(messagesJSON))) - - span.SetAttributes(msgAttrs...) - - // Add gen_ai.messages events (system, user, tool, assistant) aligned with Python - p.addMessageEvents(span, ctx, req) - - // Add gen_ai.content.prompt event (OTEL GenAI convention) - span.AddEvent(EventGenAIContentPrompt, trace.WithAttributes( - attribute.String(AttrGenAIPrompt, string(messagesJSON)), - attribute.String(AttrGenAIInput, string(messagesJSON)), - )) - return nil, nil } -func (p *adkObservabilityPlugin) setLLMRequestAttributes(ctx agent.CallbackContext, span trace.Span, req *model.LLMRequest) { - attrs := []attribute.KeyValue{ - attribute.String(AttrGenAIRequestModel, req.Model), - attribute.String(AttrGenAIRequestType, "chat"), // Default to chat - attribute.String(AttrGenAISystem, GetModelProvider(context.Context(ctx))), - } - - if req.Config != nil { - if req.Config.Temperature != nil { - attrs = append(attrs, attribute.Float64(AttrGenAIRequestTemperature, float64(*req.Config.Temperature))) - } - if req.Config.TopP != nil { - attrs = append(attrs, attribute.Float64(AttrGenAIRequestTopP, float64(*req.Config.TopP))) - } - if req.Config.MaxOutputTokens > 0 { - attrs = append(attrs, attribute.Int64(AttrGenAIRequestMaxTokens, int64(req.Config.MaxOutputTokens))) - } - - funcIdx := 0 - for _, tool := range req.Config.Tools { - if tool.FunctionDeclarations != nil { - for _, fn := range tool.FunctionDeclarations { - prefix := fmt.Sprintf("gen_ai.request.functions.%d.", funcIdx) // Simplified indexing - attrs = append(attrs, attribute.String(prefix+"name", fn.Name)) - attrs = append(attrs, attribute.String(prefix+"description", fn.Description)) - if fn.Parameters != nil { - paramsJSON, _ := json.Marshal(fn.Parameters) - attrs = append(attrs, attribute.String(prefix+"parameters", string(paramsJSON))) - } - funcIdx++ - } - } - } - } - span.SetAttributes(attrs...) -} - // AfterModel is called after the LLM returns. func (p *adkObservabilityPlugin) AfterModel(ctx agent.CallbackContext, resp *model.LLMResponse, err error) (*model.LLMResponse, error) { log.Debug("AfterModel", "InvocationID", ctx.InvocationID(), "SessionID", ctx.SessionID(), "UserID", ctx.UserID(), "AgentName", ctx.AgentName(), "AppName", ctx.AppName()) - // 1. Get our managed span from state - s, _ := ctx.State().Get(stateKeyStreamingSpan) - if s == nil { - log.Warn("AfterModel: No streaming span found in state") - return nil, nil - } - span := s.(trace.Span) - // log.Debug("AfterModel get a trace span from state", "span", span.SpanContext(), "type", fmt.Sprintf("%T", s), "is_recording", span.IsRecording()) - - // 2. Wrap the cleanup to ensure span is always ended on error or final chunk. - // ADK calls AfterModel for EVERY chunk in a stream. - // resp.Partial is true for intermediate chunks, false for the final one. - defer func() { - if err != nil || (resp != nil && !resp.Partial) { - if span.IsRecording() { - log.Debug("AfterModel got a partial response", "span", span.SpanContext()) - span.End() - } - } - }() + meta := p.getSpanMetadata(ctx.State()) if err != nil { - span.SetStatus(codes.Error, err.Error()) - // Record Exceptions metric if p.isMetricsEnabled() { - meta := p.getSpanMetadata(ctx.State()) metricAttrs := []attribute.KeyValue{ attribute.String(AttrGenAISystem, GetModelProvider(context.Context(ctx))), attribute.String("gen_ai_response_model", meta.ModelName), attribute.String("gen_ai_operation_name", "chat"), attribute.String("gen_ai_operation_type", "llm"), - attribute.String("error_type", "error"), // Simple error type + attribute.String("error_type", "error"), } RecordExceptions(context.Context(ctx), 1, metricAttrs...) } @@ -389,34 +260,29 @@ func (p *adkObservabilityPlugin) AfterModel(ctx agent.CallbackContext, resp *mod return nil, nil } - if !span.IsRecording() { - log.Warn("AfterModel: span is not recording", "span", span) - // Even if not recording, we should still accumulate content for metrics/logs - } - - // Record responding model - meta := p.getSpanMetadata(ctx.State()) - // Try to get confirmation from response metadata first (passed from sdk) - var finalModelName string + finalModelName := meta.ModelName if resp.CustomMetadata != nil { - if m, ok := resp.CustomMetadata["response_model"].(string); ok { + if m, ok := resp.CustomMetadata["response_model"].(string); ok && m != "" { finalModelName = m } } - // Fallback to request model name if not present in response - if finalModelName == "" { - finalModelName = meta.ModelName - } - if finalModelName != "" { - span.SetAttributes(attribute.String(AttrGenAIResponseModel, finalModelName)) - } if resp.UsageMetadata != nil { - p.handleUsage(ctx, span, resp, resp.Partial, finalModelName) + p.handleUsageWithoutSpan(ctx, resp, finalModelName) } - // Capture tool calls from response to link future tool spans if resp.Content != nil { + if !resp.Partial { + _ = ctx.State().Set(stateKeyStreamingOutput, resp.Content) + } + + parentSC := trace.SpanContext{} + if s, _ := ctx.State().Get(stateKeyInvocationSpan); s != nil { + if span, ok := s.(trace.Span); ok { + parentSC = span.SpanContext() + } + } + adkSpan := trace.SpanFromContext(context.Context(ctx)) adkTraceID := trace.TraceID{} if adkSpan.SpanContext().IsValid() { @@ -424,169 +290,19 @@ func (p *adkObservabilityPlugin) AfterModel(ctx agent.CallbackContext, resp *mod } for _, part := range resp.Content.Parts { - if part.FunctionCall != nil && part.FunctionCall.ID != "" { - log.Debug(" AfterModel, registering ToolCallID mapping", "tool_call_id", part.FunctionCall.ID, "parent_llm_span_id", span.SpanContext()) - GetRegistry().RegisterToolCallMapping(part.FunctionCall.ID, adkTraceID, span.SpanContext()) + if part.FunctionCall != nil && part.FunctionCall.ID != "" && parentSC.IsValid() { + GetRegistry().RegisterToolCallMapping(part.FunctionCall.ID, adkTraceID, parentSC) } } } - if resp.FinishReason != "" { - span.SetAttributes(attribute.String(AttrGenAIResponseFinishReason, string(resp.FinishReason))) - } - - // Record response content - var currentAcc *genai.Content - cached, _ := ctx.State().Get(stateKeyStreamingOutput) - if cached != nil { - currentAcc = cached.(*genai.Content) - } - - // --------------------------------------------------------- - // Metrics: Time to First Token (Streaming Only) - // --------------------------------------------------------- - p.recordTimeToFirstToken(ctx, resp, meta, currentAcc, finalModelName) - - if resp.Content != nil { - currentAcc = p.processStreamingChunk(ctx, resp, currentAcc) - } - - // For streaming, we update the span attributes with what we have so far - var fullText string - if currentAcc != nil { - fullText = p.updateStreamingSpanAttributes(span, currentAcc) - } - - // Metrics: Time to Generate (Streaming Only) & Time Per Output Token - p.recordStreamingGenerationMetrics(ctx, resp, meta, currentAcc, finalModelName) - - // If this is the final chunk, add the completion event - if !resp.Partial && currentAcc != nil { - contentJSON, _ := json.Marshal(currentAcc) - span.AddEvent(EventGenAIContentCompletion, trace.WithAttributes( - attribute.String(AttrGenAICompletion, string(contentJSON)), - attribute.String(AttrGenAIOutput, fullText), - )) - - // Add gen_ai.choice event (aligned with Python) - p.addChoiceEvents(span, currentAcc) - } - if !resp.Partial { - // Record Operation Duration and Latency p.recordFinalResponseMetrics(ctx, meta, finalModelName) } return nil, nil } -func (p *adkObservabilityPlugin) recordTimeToFirstToken(ctx agent.CallbackContext, resp *model.LLMResponse, meta *spanMetadata, currentAcc *genai.Content, finalModelName string) { - if resp.Partial && currentAcc == nil && resp.Content != nil { - // This is the very first chunk - if !meta.StartTime.IsZero() { - meta.FirstTokenTime = time.Now() - p.storeSpanMetadata(ctx.State(), meta) - - if p.isMetricsEnabled() { - // Record streaming time to first token - latency := time.Since(meta.StartTime).Seconds() - metricAttrs := []attribute.KeyValue{ - attribute.String(AttrGenAISystem, GetModelProvider(context.Context(ctx))), - attribute.String("gen_ai_response_model", finalModelName), - attribute.String("gen_ai_operation_name", "chat"), - attribute.String("gen_ai_operation_type", "llm"), - } - RecordStreamingTimeToFirstToken(context.Context(ctx), latency, metricAttrs...) - } - } else { - log.Warn("didn't find the start time of span", "meta", meta) - } - } -} - -func (p *adkObservabilityPlugin) processStreamingChunk(ctx agent.CallbackContext, resp *model.LLMResponse, currentAcc *genai.Content) *genai.Content { - if currentAcc == nil { - currentAcc = &genai.Content{Role: resp.Content.Role} - if currentAcc.Role == "" { - currentAcc.Role = "model" - } - } - - // If this is the final response, our implementation (like OpenAI) often sends the full content. - // We clear our previous accumulation to avoid duplication in the span attributes. - // We only do this if the final response actually contains content. - if !resp.Partial && resp.Content != nil && len(resp.Content.Parts) > 0 { - currentAcc.Parts = nil - } - - // Accumulate parts with merging of adjacent text - for _, part := range resp.Content.Parts { - // If it's a text part, try to merge with the last part if that was also text - if part.Text != "" && len(currentAcc.Parts) > 0 { - lastPart := currentAcc.Parts[len(currentAcc.Parts)-1] - if lastPart.Text != "" && lastPart.FunctionCall == nil && lastPart.FunctionResponse == nil && lastPart.InlineData == nil { - lastPart.Text += part.Text - continue - } - } - - // Otherwise append as a new part - newPart := &genai.Part{} - *newPart = *part - currentAcc.Parts = append(currentAcc.Parts, newPart) - } - _ = ctx.State().Set(stateKeyStreamingOutput, currentAcc) - return currentAcc -} - -func (p *adkObservabilityPlugin) updateStreamingSpanAttributes(span trace.Span, currentAcc *genai.Content) string { - // Set output.value to the cumulative text (parity with python) - var textParts strings.Builder - textParts.Grow(len(currentAcc.Parts) * 4) - for _, p := range currentAcc.Parts { - if p.Text != "" { - textParts.WriteString(p.Text) - } - } - fullText := textParts.String() - span.SetAttributes(attribute.String(AttrGenAIOutput, fullText)) - - // Add output.value for full JSON representation - if contentJSON, err := json.Marshal(currentAcc); err == nil { - span.SetAttributes(attribute.String("output.value", string(contentJSON))) - } - - // Also set the structured GenAI attributes - span.SetAttributes(p.flattenCompletion(currentAcc)...) - return fullText -} - -func (p *adkObservabilityPlugin) recordStreamingGenerationMetrics(ctx agent.CallbackContext, resp *model.LLMResponse, meta *spanMetadata, currentAcc *genai.Content, finalModelName string) { - if !resp.Partial && currentAcc != nil { - if !meta.StartTime.IsZero() { - // Time Per Output Token - // Only valid if we have output tokens and we tracked first token time - if p.isMetricsEnabled() { - if meta.CandidateTokens > 0 { - generateDuration := time.Since(meta.StartTime).Seconds() - metricAttrs := []attribute.KeyValue{ - attribute.String(AttrGenAISystem, GetModelProvider(context.Context(ctx))), - attribute.String("gen_ai_response_model", finalModelName), - attribute.String("gen_ai_operation_name", "chat"), - attribute.String("gen_ai_operation_type", "llm"), - } - RecordStreamingTimeToGenerate(context.Context(ctx), generateDuration, metricAttrs...) - - if generateDuration > 0 { - timePerToken := generateDuration / float64(meta.CandidateTokens) - RecordStreamingTimePerOutputToken(context.Context(ctx), timePerToken, metricAttrs...) - } - } - } - } - } -} - func (p *adkObservabilityPlugin) recordFinalResponseMetrics(ctx agent.CallbackContext, meta *spanMetadata, finalModelName string) { if !meta.StartTime.IsZero() { duration := time.Since(meta.StartTime).Seconds() @@ -603,10 +319,9 @@ func (p *adkObservabilityPlugin) recordFinalResponseMetrics(ctx agent.CallbackCo } } -func (p *adkObservabilityPlugin) handleUsage(ctx agent.CallbackContext, span trace.Span, resp *model.LLMResponse, isStream bool, modelName string) { +func (p *adkObservabilityPlugin) handleUsageWithoutSpan(ctx agent.CallbackContext, resp *model.LLMResponse, modelName string) { meta := p.getSpanMetadata(ctx.State()) - // 1. Get current call usage currentPrompt := int64(resp.UsageMetadata.PromptTokenCount) currentCandidate := int64(resp.UsageMetadata.CandidatesTokenCount) currentTotal := int64(resp.UsageMetadata.TotalTokenCount) @@ -615,40 +330,11 @@ func (p *adkObservabilityPlugin) handleUsage(ctx agent.CallbackContext, span tra currentTotal = currentPrompt + currentCandidate } - // 2. New session total = previous calls total + current call's (latest) usage - // (Note: in streaming, currentCall usage is cumulative for this call) meta.PromptTokens = meta.PrevPromptTokens + currentPrompt meta.CandidateTokens = meta.PrevCandidateTokens + currentCandidate meta.TotalTokens = meta.PrevTotalTokens + currentTotal - - // 3. Update session-wide totals p.storeSpanMetadata(ctx.State(), meta) - // 4. Set attributes on the current LLM span (only current call's usage) - attrs := make([]attribute.KeyValue, 0, 7) - if currentPrompt > 0 { - attrs = append(attrs, attribute.Int64(AttrGenAIUsageInputTokens, currentPrompt)) - attrs = append(attrs, attribute.Int64(AttrGenAIResponsePromptTokenCount, currentPrompt)) - } - if currentCandidate > 0 { - attrs = append(attrs, attribute.Int64(AttrGenAIUsageOutputTokens, currentCandidate)) - attrs = append(attrs, attribute.Int64(AttrGenAIResponseCandidatesTokenCount, currentCandidate)) - } - if currentTotal > 0 { - attrs = append(attrs, attribute.Int64(AttrGenAIUsageTotalTokens, currentTotal)) - } - - if resp.UsageMetadata != nil { - if resp.UsageMetadata.CachedContentTokenCount > 0 { - attrs = append(attrs, attribute.Int64(AttrGenAIUsageCacheReadInputTokens, int64(resp.UsageMetadata.CachedContentTokenCount))) - } - // Always set cache creation to 0 if not provided, for parity with python - attrs = append(attrs, attribute.Int64(AttrGenAIUsageCacheCreationInputTokens, 0)) - } - - span.SetAttributes(attrs...) - - // Record metrics directly from the plugin logic if p.isMetricsEnabled() { metricAttrs := []attribute.KeyValue{ attribute.String(AttrGenAISystem, GetModelProvider(ctx)), @@ -657,259 +343,12 @@ func (p *adkObservabilityPlugin) handleUsage(ctx agent.CallbackContext, span tra attribute.String("gen_ai_operation_type", "llm"), } RecordChatCount(context.Context(ctx), 1, metricAttrs...) - if currentTotal > 0 && (currentPrompt > 0 || currentCandidate > 0) { RecordTokenUsage(context.Context(ctx), currentPrompt, currentCandidate, metricAttrs...) - } } } -func (p *adkObservabilityPlugin) addMessageEvents(span trace.Span, ctx agent.CallbackContext, req *model.LLMRequest) { - // 1. System Message - if req.Config != nil && req.Config.SystemInstruction != nil { - sysContent := "" - for _, part := range req.Config.SystemInstruction.Parts { - if part.Text != "" { - sysContent += part.Text - } - } - if sysContent != "" { - span.AddEvent("gen_ai.system.message", trace.WithAttributes( - attribute.String("role", "system"), - attribute.String("content", sysContent), - )) - } - } - - // 2. User, Tool, Assistant Messages from History - for _, content := range req.Contents { - if content.Role == "user" { - userEventAttrs := []attribute.KeyValue{ - attribute.String("role", "user"), - } - - // Check if it's a tool response (which comes in as 'user' role in Gemini/ADK but logically is tool message) - // Actually ADK structure: - // User inputs -> Role: user - // Tool Outputs -> Role: user (FunctionResponse) or "tool" depending on model? - // Python implementation checks `part.function_response`. - - hasToolResponse := false - for _, part := range content.Parts { - if part.FunctionResponse != nil { - hasToolResponse = true - // Emit separate event for each tool response - span.AddEvent("gen_ai.tool.message", trace.WithAttributes( - attribute.String("role", "tool"), - attribute.String("id", part.FunctionResponse.ID), - attribute.String("content", safeMarshal(part.FunctionResponse.Response)), - )) - } - } - - if hasToolResponse { - continue - } - - // Normal User Message - for i, part := range content.Parts { - if part.Text != "" { - if len(content.Parts) == 1 { - userEventAttrs = append(userEventAttrs, attribute.String("content", sanitizeUTF8(part.Text))) - } else { - userEventAttrs = append(userEventAttrs, attribute.String("parts."+strconv.Itoa(i)+".type", "text")) - userEventAttrs = append(userEventAttrs, attribute.String("parts."+strconv.Itoa(i)+".text", sanitizeUTF8(part.Text))) - } - } - if part.InlineData != nil && len(part.InlineData.Data) > 0 { - // Image/Blob handling - prefix := "parts." + strconv.Itoa(i) - if len(content.Parts) == 1 { - prefix = "parts.0" - } - userEventAttrs = append(userEventAttrs, attribute.String(prefix+".type", "image_url")) - // MIME type or display name mapping - userEventAttrs = append(userEventAttrs, attribute.String(prefix+".image_url.url", part.InlineData.MIMEType)) - } - } - span.AddEvent("gen_ai.user.message", trace.WithAttributes(userEventAttrs...)) - - } else if content.Role == "model" { - assistantEventAttrs := []attribute.KeyValue{ - attribute.String("role", "assistant"), - } - for i, part := range content.Parts { - if part.Text != "" { - assistantEventAttrs = append(assistantEventAttrs, attribute.String("parts."+strconv.Itoa(i)+".type", "text")) - assistantEventAttrs = append(assistantEventAttrs, attribute.String("parts."+strconv.Itoa(i)+".text", sanitizeUTF8(part.Text))) - } - if part.FunctionCall != nil { - // Tool Calls - prefix := "tool_calls.0" // Assuming single tool call per part or simplifying - assistantEventAttrs = append(assistantEventAttrs, attribute.String(prefix+".id", part.FunctionCall.ID)) - assistantEventAttrs = append(assistantEventAttrs, attribute.String(prefix+".type", "function")) - assistantEventAttrs = append(assistantEventAttrs, attribute.String(prefix+".function.name", part.FunctionCall.Name)) - assistantEventAttrs = append(assistantEventAttrs, attribute.String(prefix+".function.arguments", safeMarshal(part.FunctionCall.Args))) - } - } - span.AddEvent("gen_ai.assistant.message", trace.WithAttributes(assistantEventAttrs...)) - } - } -} - -func (p *adkObservabilityPlugin) addChoiceEvents(span trace.Span, content *genai.Content) { - for i, part := range content.Parts { - attrs := make([]attribute.KeyValue, 0, 2) - if part.Text != "" { - attrs = append(attrs, attribute.String("message.parts."+strconv.Itoa(i)+".type", "text")) - attrs = append(attrs, attribute.String("message.parts."+strconv.Itoa(i)+".text", sanitizeUTF8(part.Text))) - } - if len(attrs) > 0 { - span.AddEvent("gen_ai.choice", trace.WithAttributes(attrs...)) - } - } -} - -// extractMessages converts ADK model.LLMRequest contents into a JSON-compatible message list. -func (p *adkObservabilityPlugin) extractMessages(req *model.LLMRequest) []map[string]any { - var messages []map[string]any - for _, content := range req.Contents { - role := content.Role - if role == "model" { - role = "assistant" - } - - msg := map[string]any{ - "role": role, - } - - var textParts []string - var toolCalls []map[string]any - var toolResponses []map[string]any - - for _, part := range content.Parts { - if part.Text != "" { - textParts = append(textParts, sanitizeUTF8(part.Text)) - } - if part.FunctionCall != nil { - toolCalls = append(toolCalls, map[string]any{ - "id": part.FunctionCall.ID, - "type": "function", - "function": map[string]any{ - "name": part.FunctionCall.Name, - "arguments": safeMarshal(part.FunctionCall.Args), - }, - }) - } - if part.FunctionResponse != nil { - toolResponses = append(toolResponses, map[string]any{ - "id": part.FunctionResponse.ID, - "name": part.FunctionResponse.Name, - "content": safeMarshal(part.FunctionResponse.Response), - }) - } - } - - if len(textParts) > 0 { - msg["content"] = strings.Join(textParts, "") - } - if len(toolCalls) > 0 { - msg["tool_calls"] = toolCalls - } - if len(toolResponses) > 0 { - // In standard GenAI, tool responses are often represented separate messages or differently. - // Alignment with veadk-python usually means following their structure. - msg["tool_responses"] = toolResponses - } - - messages = append(messages, msg) - } - return messages -} - -func (p *adkObservabilityPlugin) flattenPrompt(messages []map[string]any) []attribute.KeyValue { - var attrs []attribute.KeyValue - idx := 0 - for _, msg := range messages { - // In Python, each piece of content/part increments the index. - // Since we already merged text parts in extractMessages, we just process each message here. - // If we wanted exact parity for multi-part messages, we'd need to change extractMessages. - // For now, this is a good approximation that matches the role/content flat structure. - prefix := "gen_ai.prompt." + strconv.Itoa(idx) - if role, ok := msg["role"].(string); ok { - attrs = append(attrs, attribute.String(prefix+".role", role)) - } - if content, ok := msg["content"].(string); ok { - attrs = append(attrs, attribute.String(prefix+".content", content)) - } - - if toolCalls, ok := msg["tool_calls"].([]map[string]any); ok { - for j, tc := range toolCalls { - tcPrefix := prefix + ".tool_calls." + strconv.Itoa(j) - if id, ok := tc["id"].(string); ok { - attrs = append(attrs, attribute.String(tcPrefix+".id", id)) - } - if t, ok := tc["type"].(string); ok { - attrs = append(attrs, attribute.String(tcPrefix+".type", t)) - } - if fn, ok := tc["function"].(map[string]any); ok { - if name, ok := fn["name"].(string); ok { - attrs = append(attrs, attribute.String(tcPrefix+".function.name", name)) - } - if args, ok := fn["arguments"].(string); ok { - attrs = append(attrs, attribute.String(tcPrefix+".function.arguments", args)) - } - } - } - } - - if toolResponses, ok := msg["tool_responses"].([]map[string]any); ok { - for j, tr := range toolResponses { - trPrefix := prefix + ".tool_responses." + strconv.Itoa(j) - if id, ok := tr["id"].(string); ok { - attrs = append(attrs, attribute.String(trPrefix+".id", id)) - } - if name, ok := tr["name"].(string); ok { - attrs = append(attrs, attribute.String(trPrefix+".name", name)) - } - if content, ok := tr["content"].(string); ok { - attrs = append(attrs, attribute.String(trPrefix+".content", content)) - } - } - } - idx++ - } - return attrs -} - -func (p *adkObservabilityPlugin) flattenCompletion(content *genai.Content) []attribute.KeyValue { - var attrs []attribute.KeyValue - - role := content.Role - if role == "model" { - role = "assistant" - } - - for idx, part := range content.Parts { - prefix := "gen_ai.completion." + strconv.Itoa(idx) - attrs = append(attrs, attribute.String(prefix+".role", role)) - - if part.Text != "" { - attrs = append(attrs, attribute.String(prefix+".content", sanitizeUTF8(part.Text))) - } - if part.FunctionCall != nil { - tcPrefix := prefix + ".tool_calls.0" - attrs = append(attrs, attribute.String(tcPrefix+".id", part.FunctionCall.ID)) - attrs = append(attrs, attribute.String(tcPrefix+".type", "function")) - attrs = append(attrs, attribute.String(tcPrefix+".function.name", part.FunctionCall.Name)) - attrs = append(attrs, attribute.String(tcPrefix+".function.arguments", safeMarshal(part.FunctionCall.Args))) - } - } - - return attrs -} - // BeforeTool is called before a tool is executed. func (p *adkObservabilityPlugin) BeforeTool(ctx tool.Context, tool tool.Tool, args map[string]any) (map[string]any, error) { log.Debug("BeforeTool", @@ -974,48 +413,6 @@ func (p *adkObservabilityPlugin) AfterTool(ctx tool.Context, tool tool.Tool, arg func (p *adkObservabilityPlugin) BeforeAgent(ctx agent.CallbackContext) (*genai.Content, error) { log.Debug("BeforeAgent", "InvocationID", ctx.InvocationID(), "SessionID", ctx.SessionID(), "UserID", ctx.UserID(), "AgentName", ctx.AgentName(), "AppName", ctx.AppName()) - agentName := ctx.AgentName() - if agentName == "" { - agentName = FallbackAgentName - } - - // 1. Get the parent context from state to maintain hierarchy - parentCtx := context.Context(ctx) - if ictx, _ := ctx.State().Get(stateKeyInvocationCtx); ictx != nil { - parentCtx = ictx.(context.Context) - } - - // 2. Start the 'invoke_agent' span manually. - // Since we can't easily wrap the Agent interface due to internal methods, - // we use the plugin to start our span. - spanName := SpanInvokeAgent + " " + agentName - newCtx, span := p.tracer.Start(parentCtx, spanName) - - // Register internal ADK's agent span ID -> our veadk agent span context. - adkSpan := trace.SpanFromContext(context.Context(ctx)) - if adkSpan.SpanContext().IsValid() { - GetRegistry().RegisterAgentMapping(adkSpan.SpanContext().SpanID(), adkSpan.SpanContext().TraceID(), span.SpanContext()) - } - - // 3. Store in state for AfterAgent and children - _ = ctx.State().Set(stateKeyInvokeAgentSpan, span) - _ = ctx.State().Set(stateKeyInvokeAgentCtx, newCtx) - - // 4. Set attributes - setCommonAttributesFromCallback(ctx, span) - setWorkflowAttributes(span) - setAgentAttributes(span, agentName) - - // Capture input if available (propagated from BeforeRun via state or context?) - // Note: BeforeRun captures UserContent, but for nested agents, input might be passed differently. - // For now, if UserContent is available in this context, log it. - if userContent := ctx.UserContent(); userContent != nil { - if jsonIn, err := json.Marshal(userContent); err == nil { - val := string(jsonIn) - span.SetAttributes(attribute.String(AttrGenAIInput, val)) - } - } - return nil, nil } @@ -1023,20 +420,6 @@ func (p *adkObservabilityPlugin) BeforeAgent(ctx agent.CallbackContext) (*genai. func (p *adkObservabilityPlugin) AfterAgent(ctx agent.CallbackContext) (*genai.Content, error) { log.Debug("AfterAgent", "InvocationID", ctx.InvocationID(), "SessionID", ctx.SessionID(), "UserID", ctx.UserID(), "AgentName", ctx.AgentName(), "AppName", ctx.AppName()) - // 1. End the span - if s, _ := ctx.State().Get(stateKeyInvokeAgentSpan); s != nil { - span := s.(trace.Span) - if span.IsRecording() { - // Try to capture output if available in state (propagated from AfterRun or internal execution) - if cached, _ := ctx.State().Get(stateKeyStreamingOutput); cached != nil { - if jsonOut, err := json.Marshal(cached); err == nil { - val := string(jsonOut) - span.SetAttributes(attribute.String(AttrGenAIOutput, val)) - } - } - span.End() - } - } return nil, nil } @@ -1052,45 +435,17 @@ func (p *adkObservabilityPlugin) storeSpanMetadata(state session.State, meta *sp _ = state.Set(stateKeyMetadata, meta) } -// sanitizeUTF8 removes or replaces invalid UTF-8 characters from a string -func sanitizeUTF8(s string) string { - // If the string is already valid UTF-8, return it as is - if len(s) == 0 { - return s - } - - // Replace invalid UTF-8 sequences with Unicode replacement character - return string([]rune(s)) -} - -func safeMarshal(v any) string { - if v == nil { - return "" - } - b, err := json.Marshal(v) - if err != nil { - return "" - } - - return string(b) -} - const ( - stateKeyInvocationSpan = "veadk.observability.invocation_span" - stateKeyInvocationCtx = "veadk.observability.invocation_ctx" - stateKeyInvokeAgentCtx = "veadk.observability.invoke_agent_ctx" - stateKeyInvokeAgentSpan = "veadk.observability.invoke_agent_span" + stateKeyInvocationSpan = "veadk.observability.invocation_span" stateKeyMetadata = "veadk.observability.metadata" stateKeyStreamingOutput = "veadk.observability.streaming_output" - stateKeyStreamingSpan = "veadk.observability.streaming_span" ) // spanMetadata groups various observational data points in a single structure // to keep the ADK State clean. type spanMetadata struct { StartTime time.Time - FirstTokenTime time.Time PromptTokens int64 CandidateTokens int64 TotalTokens int64 diff --git a/observability/span_processor.go b/observability/span_processor.go new file mode 100644 index 0000000..56c02b0 --- /dev/null +++ b/observability/span_processor.go @@ -0,0 +1,135 @@ +package observability + +import ( + "context" + "strings" + + "go.opentelemetry.io/otel/attribute" + sdktrace "go.opentelemetry.io/otel/sdk/trace" + "google.golang.org/adk/agent" +) + +type veadkSpanProcessor struct{} + +func NewVeADKSpanProcessor() sdktrace.SpanProcessor { + return &veadkSpanProcessor{} +} + +func (p *veadkSpanProcessor) OnStart(ctx context.Context, span sdktrace.ReadWriteSpan) { + p.setCommonAttributes(ctx, span) + p.setSemanticAttributes(ctx, span) +} + +func (p *veadkSpanProcessor) OnEnd(sdktrace.ReadOnlySpan) {} + +func (p *veadkSpanProcessor) Shutdown(context.Context) error { return nil } + +func (p *veadkSpanProcessor) ForceFlush(context.Context) error { return nil } + +func (p *veadkSpanProcessor) setCommonAttributes(ctx context.Context, span sdktrace.ReadWriteSpan) { + sessionID := FallbackSessionID + userID := FallbackUserID + appName := FallbackAppName + invocationID := FallbackInvocationID + agentName := FallbackAgentName + + if ivc, ok := ctx.(agent.InvocationContext); ok { + if s := ivc.Session(); s != nil { + if s.ID() != "" { + sessionID = s.ID() + } + if s.UserID() != "" { + userID = s.UserID() + } + if s.AppName() != "" { + appName = s.AppName() + } + } + if ivc.InvocationID() != "" { + invocationID = ivc.InvocationID() + } + if ivc.Agent() != nil && ivc.Agent().Name() != "" { + agentName = ivc.Agent().Name() + } + } + + if cctx, ok := ctx.(agent.CallbackContext); ok { + if cctx.SessionID() != "" { + sessionID = cctx.SessionID() + } + if cctx.UserID() != "" { + userID = cctx.UserID() + } + if cctx.AppName() != "" { + appName = cctx.AppName() + } + if cctx.InvocationID() != "" { + invocationID = cctx.InvocationID() + } + if cctx.AgentName() != "" { + agentName = cctx.AgentName() + } + } + + span.SetAttributes( + attribute.String(AttrCozeloopReportSource, DefaultCozeLoopReportSource), + attribute.String(AttrGenAISystem, GetModelProvider(ctx)), + attribute.String(AttrGenAISystemVersion, Version), + attribute.String(AttrInstrumentation, Version), + attribute.String(AttrCozeloopCallType, GetCallType(ctx)), + attribute.String(AttrGenAISessionID, sessionID), + attribute.String(AttrSessionID, sessionID), + attribute.String(AttrGenAIUserID, userID), + attribute.String(AttrUserID, userID), + attribute.String(AttrGenAIAppName, appName), + attribute.String(AttrAppNameUnderline, appName), + attribute.String(AttrAppNameDot, appName), + attribute.String(AttrGenAIInvocationID, invocationID), + attribute.String(AttrInvocationID, invocationID), + attribute.String(AttrGenAIAgentName, agentName), + attribute.String(AttrAgentName, agentName), + attribute.String(AttrAgentNameDot, agentName), + ) +} + +func (p *veadkSpanProcessor) setSemanticAttributes(ctx context.Context, span sdktrace.ReadWriteSpan) { + name := span.Name() + + switch { + case name == SpanInvocation: + span.SetAttributes( + attribute.String(AttrGenAISpanKind, SpanKindWorkflow), + attribute.String(AttrGenAIOperationName, "chain"), + ) + case strings.HasPrefix(name, "invoke_agent "): + agentName := strings.TrimPrefix(name, "invoke_agent ") + if agentName == "" { + agentName = FallbackAgentName + } + span.SetAttributes( + attribute.String(AttrGenAISpanKind, SpanKindWorkflow), + attribute.String(AttrGenAIOperationName, "chain"), + attribute.String(AttrGenAIAgentName, agentName), + attribute.String(AttrAgentName, agentName), + attribute.String(AttrAgentNameDot, agentName), + ) + case strings.HasPrefix(name, "generate_content ") || name == SpanCallLLM: + span.SetAttributes( + attribute.String(AttrGenAISpanKind, SpanKindLLM), + attribute.String(AttrGenAIOperationName, "chat"), + attribute.String(AttrGenAIRequestType, "chat"), + ) + case strings.HasPrefix(name, "execute_tool "): + toolName := strings.TrimPrefix(name, "execute_tool ") + if toolName == "" { + toolName = "" + } + span.SetAttributes( + attribute.String(AttrGenAISpanKind, SpanKindTool), + attribute.String(AttrGenAIOperationName, "execute_tool"), + attribute.String(AttrGenAIToolName, toolName), + ) + } + + _ = ctx +} diff --git a/observability/translator.go b/observability/translator.go index d7d2781..0e72099 100644 --- a/observability/translator.go +++ b/observability/translator.go @@ -165,6 +165,16 @@ func (p *translatedSpan) processAttributes(attrs []attribute.KeyValue, existingK for _, kv := range attrs { key := string(kv.Key) + if key == AttrGenAIOperationName { + op := kv.Value.AsString() + switch op { + case "generate_content": + kv = attribute.String(AttrGenAIOperationName, "chat") + case "invoke_agent": + kv = attribute.String(AttrGenAIOperationName, "chain") + } + } + // 1. Map ADK internal attributes if not already present in standard form if strings.HasPrefix(key, "gcp.vertex.agent.") { targetKey, ok := ADKAttributeKeyMap[key] @@ -192,6 +202,14 @@ func (p *translatedSpan) processAttributes(attrs []attribute.KeyValue, existingK return newAttrs } +func (p *translatedSpan) Name() string { + name := p.ReadOnlySpan.Name() + if strings.HasPrefix(name, "generate_content ") || name == "generate_content" { + return SpanCallLLM + } + return name +} + func (p *translatedSpan) reconstructToolInput(toolName, toolDesc, toolArgs string) []attribute.KeyValue { var paramsMap map[string]any if err := json.Unmarshal([]byte(toolArgs), ¶msMap); err == nil { From a007e9592da4c67067e9373f1619ebbc3ad91872 Mon Sep 17 00:00:00 2001 From: shunjiazhu Date: Tue, 17 Feb 2026 01:49:30 +0800 Subject: [PATCH 2/8] upgrade adk-go to v0.5.x --- apps/agentkit_server_app/app.go | 13 +- apps/basic_app.go | 16 +- configs/config_test.go | 6 - configs/configs.go | 2 +- configs/observability.go | 26 +- examples/observability/agent.go | 1 + observability/README.md | 4 +- observability/README_zh.md | 4 +- observability/adk_option.go | 23 ++ observability/attributes.go | 68 +--- observability/attributes_test.go | 26 +- observability/constant.go | 45 +++ observability/initialize.go | 8 - observability/initialize_test.go | 44 +-- observability/plugin.go | 297 +++++++++--------- observability/plugin_test.go | 63 ++++ observability/registry.go | 159 ++++------ observability/serialization.go | 184 +++++++++++ observability/serialization_test.go | 61 ++++ observability/span_processor.go | 160 ++++++++-- observability/span_processor_test.go | 49 +++ observability/translator.go | 444 ++++++++++++++++++--------- observability/translator_test.go | 42 +++ 23 files changed, 1147 insertions(+), 598 deletions(-) create mode 100644 observability/adk_option.go create mode 100644 observability/plugin_test.go create mode 100644 observability/serialization.go create mode 100644 observability/serialization_test.go create mode 100644 observability/span_processor_test.go create mode 100644 observability/translator_test.go diff --git a/apps/agentkit_server_app/app.go b/apps/agentkit_server_app/app.go index b81d208..5e4a1ab 100644 --- a/apps/agentkit_server_app/app.go +++ b/apps/agentkit_server_app/app.go @@ -63,12 +63,13 @@ func (a *agentkitServerApp) SetupRouters(router *mux.Router, config *apps.RunCon } launchConfig := &launcher.Config{ - SessionService: config.SessionService, - ArtifactService: config.ArtifactService, - MemoryService: config.MemoryService, - AgentLoader: config.AgentLoader, - A2AOptions: config.A2AOptions, - PluginConfig: config.PluginConfig, + SessionService: config.SessionService, + ArtifactService: config.ArtifactService, + MemoryService: config.MemoryService, + AgentLoader: config.AgentLoader, + A2AOptions: config.A2AOptions, + PluginConfig: config.PluginConfig, + TelemetryOptions: config.TelemetryOptions, } // setup webui routers diff --git a/apps/basic_app.go b/apps/basic_app.go index 43c50e7..dbe7768 100644 --- a/apps/basic_app.go +++ b/apps/basic_app.go @@ -35,15 +35,17 @@ import ( "google.golang.org/adk/plugin" "google.golang.org/adk/runner" "google.golang.org/adk/session" + "google.golang.org/adk/telemetry" ) type RunConfig struct { - SessionService session.Service - ArtifactService artifact.Service - MemoryService memory.Service - AgentLoader agent.Loader - A2AOptions []a2asrv.RequestHandlerOption - PluginConfig runner.PluginConfig + SessionService session.Service + ArtifactService artifact.Service + MemoryService memory.Service + AgentLoader agent.Loader + A2AOptions []a2asrv.RequestHandlerOption + PluginConfig runner.PluginConfig + TelemetryOptions []telemetry.Option } func (cfg *RunConfig) AppendObservability() { @@ -62,6 +64,8 @@ func (cfg *RunConfig) AppendObservability() { cfg.PluginConfig.Plugins = append(cfg.PluginConfig.Plugins, observabilityPlugin) log.Info("Plugin configured") } + + cfg.TelemetryOptions = append(cfg.TelemetryOptions, observability.ADKTelemetryOptions()...) } type ApiConfig struct { diff --git a/configs/config_test.go b/configs/config_test.go index 0fea85c..b95b576 100644 --- a/configs/config_test.go +++ b/configs/config_test.go @@ -100,7 +100,6 @@ opentelemetry: endpoint: "http://apmplus-example.com" api_key: "test-key" service_name: "test-service" - enable_global_tracer: true ` var config ObservabilityConfig err := yaml.Unmarshal([]byte(yamlData), &config) @@ -111,18 +110,14 @@ opentelemetry: assert.Equal(t, "http://apmplus-example.com", config.OpenTelemetry.ApmPlus.Endpoint) assert.Equal(t, "test-key", config.OpenTelemetry.ApmPlus.APIKey) assert.Equal(t, "test-service", config.OpenTelemetry.ApmPlus.ServiceName) - assert.True(t, config.OpenTelemetry.EnableGlobalProvider) assert.Equal(t, "test-service", config.OpenTelemetry.ApmPlus.ServiceName) - assert.True(t, config.OpenTelemetry.EnableGlobalProvider) } func TestObservabilityConfig_EnvMapping(t *testing.T) { os.Setenv("OBSERVABILITY_OPENTELEMETRY_APMPLUS_ENDPOINT", "http://env-endpoint") - os.Setenv("OBSERVABILITY_OPENTELEMETRY_ENABLE_GLOBAL_PROVIDER", "true") defer func() { os.Unsetenv("OBSERVABILITY_OPENTELEMETRY_APMPLUS_ENDPOINT") - os.Unsetenv("OBSERVABILITY_OPENTELEMETRY_ENABLE_GLOBAL_PROVIDER") }() config := &ObservabilityConfig{} @@ -131,7 +126,6 @@ func TestObservabilityConfig_EnvMapping(t *testing.T) { assert.NotNil(t, config.OpenTelemetry) assert.NotNil(t, config.OpenTelemetry.ApmPlus) assert.Equal(t, "http://env-endpoint", config.OpenTelemetry.ApmPlus.Endpoint) - assert.True(t, config.OpenTelemetry.EnableGlobalProvider) } func TestObservabilityConfig_Priority(t *testing.T) { diff --git a/configs/configs.go b/configs/configs.go index 2dd38f5..a259c9e 100644 --- a/configs/configs.go +++ b/configs/configs.go @@ -91,7 +91,7 @@ func SetupVeADKConfig() error { }, Observability: &ObservabilityConfig{ OpenTelemetry: &OpenTelemetryConfig{ - EnableGlobalProvider: true, // adk-go main path: use global provider by default + // traces are enabled automatically when at least one trace exporter is configured }, }, } diff --git a/configs/observability.go b/configs/observability.go index 4fbdfe3..a00de1b 100644 --- a/configs/observability.go +++ b/configs/observability.go @@ -22,9 +22,8 @@ import ( const ( // Global - EnvOtelServiceName = "OTEL_SERVICE_NAME" - EnvObservabilityEnableGlobalProvider = "OBSERVABILITY_OPENTELEMETRY_ENABLE_GLOBAL_PROVIDER" - EnvObservabilityEnableMetrics = "OBSERVABILITY_OPENTELEMETRY_ENABLE_METRICS" + EnvOtelServiceName = "OTEL_SERVICE_NAME" + EnvObservabilityEnableMetrics = "OBSERVABILITY_OPENTELEMETRY_ENABLE_METRICS" // APMPlus EnvObservabilityOpenTelemetryApmPlusProtocol = "OBSERVABILITY_OPENTELEMETRY_APMPLUS_PROTOCOL" @@ -58,8 +57,7 @@ type ObservabilityConfig struct { } type OpenTelemetryConfig struct { - EnableGlobalProvider bool `yaml:"enable_global_tracer"` - EnableMetrics *bool `yaml:"enable_metrics"` + EnableMetrics *bool `yaml:"enable_metrics"` File *FileConfig `yaml:"file"` Stdout *StdoutConfig `yaml:"stdout"` @@ -229,11 +227,6 @@ func (c *ObservabilityConfig) MapEnvToConfig() { ot.Stdout.Enable = v == "true" } - // Global Tracer - if v := utils.GetEnvWithDefault(EnvObservabilityEnableGlobalProvider); v != "" { - ot.EnableGlobalProvider = v == "true" - } - // Meter Provider if v := utils.GetEnvWithDefault(EnvObservabilityEnableMetrics); v != "" { if ot.EnableMetrics == nil { @@ -258,13 +251,12 @@ func (c *OpenTelemetryConfig) Clone() *OpenTelemetryConfig { } return &OpenTelemetryConfig{ - EnableGlobalProvider: c.EnableGlobalProvider, - EnableMetrics: c.EnableMetrics, - ApmPlus: c.ApmPlus.Clone(), - CozeLoop: c.CozeLoop.Clone(), - TLS: c.TLS.Clone(), - File: c.File.Clone(), - Stdout: c.Stdout.Clone(), + EnableMetrics: c.EnableMetrics, + ApmPlus: c.ApmPlus.Clone(), + CozeLoop: c.CozeLoop.Clone(), + TLS: c.TLS.Clone(), + File: c.File.Clone(), + Stdout: c.Stdout.Clone(), } } diff --git a/examples/observability/agent.go b/examples/observability/agent.go index dc0a60b..bafcf81 100644 --- a/examples/observability/agent.go +++ b/examples/observability/agent.go @@ -50,6 +50,7 @@ func main() { PluginConfig: runner.PluginConfig{ Plugins: []*plugin.Plugin{observability.NewPlugin()}, }, + TelemetryOptions: observability.ADKTelemetryOptions(), } l := full.NewLauncher() diff --git a/observability/README.md b/observability/README.md index 2086bed..2f0895d 100644 --- a/observability/README.md +++ b/observability/README.md @@ -62,7 +62,6 @@ Add an `observability` section to your `config.yaml`: ```yaml observability: opentelemetry: - enable_global_tracer: true apmplus: endpoint: "https://apmplus-cn-beijing.volces.com:4318" api_key: "YOUR_APMPLUS_API_KEY" @@ -75,9 +74,10 @@ All settings can be overridden via environment variables: - `OBSERVABILITY_OPENTELEMETRY_COZELOOP_API_KEY` - `OBSERVABILITY_OPENTELEMETRY_APMPLUS_API_KEY` -- `OBSERVABILITY_OPENTELEMETRY_ENABLE_GLOBAL_PROVIDER` (default: true) - `VEADK_MODEL_PROVIDER` - Set model provider +Trace exporting is enabled automatically when at least one trace exporter is configured. + ## Usage diff --git a/observability/README_zh.md b/observability/README_zh.md index fcca5a3..fafc1f4 100644 --- a/observability/README_zh.md +++ b/observability/README_zh.md @@ -62,7 +62,6 @@ VeADK Go 实现了以下 Span 属性类别,详见 [Python ADK Span 属性文 ```yaml observability: opentelemetry: - enable_global_tracer: true apmplus: endpoint: "https://apmplus-cn-beijing.volces.com:4318" api_key: "YOUR_APMPLUS_API_KEY" @@ -75,9 +74,10 @@ observability: - `OBSERVABILITY_OPENTELEMETRY_COZELOOP_API_KEY` - `OBSERVABILITY_OPENTELEMETRY_APMPLUS_API_KEY` -- `OBSERVABILITY_OPENTELEMETRY_ENABLE_GLOBAL_PROVIDER` (默认: true) - `VEADK_MODEL_PROVIDER` - 设置模型提供商 +只要配置了至少一个 trace exporter,就会自动启用 trace 导出。 + ## 使用方法 diff --git a/observability/adk_option.go b/observability/adk_option.go new file mode 100644 index 0000000..c166657 --- /dev/null +++ b/observability/adk_option.go @@ -0,0 +1,23 @@ +package observability + +import ( + "go.opentelemetry.io/otel" + sdktrace "go.opentelemetry.io/otel/sdk/trace" + adktelemetry "google.golang.org/adk/telemetry" +) + +// ADKTelemetryOptions returns launcher telemetry options that force ADK to reuse +// the current global SDK TracerProvider initialized by veadk-go observability. +// +// This prevents launcher telemetry initialization from replacing the global +// provider with a different instance and keeps ADK spans and veadk plugin spans +// in the same pipeline. +func ADKTelemetryOptions() []adktelemetry.Option { + tc := otel.GetTracerProvider() + sdkTP, ok := tc.(*sdktrace.TracerProvider) + if !ok || sdkTP == nil { + return nil + } + + return []adktelemetry.Option{adktelemetry.WithTracerProvider(sdkTP)} +} diff --git a/observability/attributes.go b/observability/attributes.go index 6dabac9..9fd1f90 100644 --- a/observability/attributes.go +++ b/observability/attributes.go @@ -47,21 +47,6 @@ func setCommonAttributesFromInvocation(ctx agent.InvocationContext, span trace.S ) } -// setCommonAttributesFromCallback enriches the span with identity attributes from -// agent.CallbackContext, following ADK canonical sources. -func setCommonAttributesFromCallback(ctx agent.CallbackContext, span trace.Span) { - setCommonAttributesWithIdentity( - context.Context(ctx), - span, - ctx.SessionID(), - ctx.UserID(), - ctx.AppName(), - - ctx.InvocationID(), - ) - setDynamicAttribute(span, AttrGenAIAgentName, ctx.AgentName(), FallbackAgentName, AttrAgentName, AttrAgentNameDot) -} - func setCommonAttributesWithIdentity(ctx context.Context, span trace.Span, sessionID, userID, appName, invocationID string) { // 1. Fixed attributes span.SetAttributes(attribute.String(AttrCozeloopReportSource, DefaultCozeLoopReportSource)) @@ -90,37 +75,11 @@ func setDynamicAttribute(span trace.Span, key string, val string, fallback strin } } -// setLLMAttributes sets standard GenAI attributes for LLM spans. -func setLLMAttributes(span trace.Span) { - span.SetAttributes( - attribute.String(AttrGenAISpanKind, SpanKindLLM), - attribute.String(AttrGenAIOperationName, "chat"), - ) -} - -// setToolAttributes sets standard GenAI attributes for Tool spans. -func setToolAttributes(span trace.Span, name string) { - span.SetAttributes( - attribute.String(AttrGenAISpanKind, SpanKindTool), - attribute.String(AttrGenAIOperationName, "execute_tool"), - attribute.String(AttrGenAIToolName, name), - ) -} - -// setAgentAttributes sets standard GenAI attributes for Agent spans. -func setAgentAttributes(span trace.Span, name string) { - span.SetAttributes( - attribute.String(AttrGenAIAgentName, name), - attribute.String(AttrAgentName, name), // Alias: agent_name - attribute.String(AttrAgentNameDot, name), // Alias: agent.name - ) -} - // setWorkflowAttributes sets standard GenAI attributes for Workflow/Root spans. func setWorkflowAttributes(span trace.Span) { span.SetAttributes( attribute.String(AttrGenAISpanKind, SpanKindWorkflow), - attribute.String(AttrGenAIOperationName, "chain"), + attribute.String(AttrGenAIOperationName, OperationNameChain), ) } @@ -177,31 +136,6 @@ func getFromGlobalConfig(key contextKey) string { return "" } -func getServiceName(cfg *configs.OpenTelemetryConfig) string { - if serviceFromEnv := os.Getenv("OTEL_SERVICE_NAME"); serviceFromEnv != "" { - return serviceFromEnv - } - - if cfg.ApmPlus != nil { - if cfg.ApmPlus.ServiceName != "" { - return cfg.ApmPlus.ServiceName - } - } - - if cfg.CozeLoop != nil { - if cfg.CozeLoop.ServiceName != "" { - return cfg.CozeLoop.ServiceName - } - } - - if cfg.TLS != nil { - if cfg.TLS.ServiceName != "" { - return cfg.TLS.ServiceName - } - } - return "" -} - // OTEL_RESOURCE_ATTRIBUTES="instance.id=123456,apmplus.business_carrier=agentkit_runtime" func checkAgentKitRuntime() bool { otelAttrStr := os.Getenv("OTEL_RESOURCE_ATTRIBUTES") diff --git a/observability/attributes_test.go b/observability/attributes_test.go index 3f7953a..3ffa8ee 100644 --- a/observability/attributes_test.go +++ b/observability/attributes_test.go @@ -43,26 +43,18 @@ func (m *MockSpan) SetAttributes(kv ...attribute.KeyValue) { } func TestSetSpecificAttributes(t *testing.T) { - t.Run("LLM", func(t *testing.T) { + t.Run("Workflow", func(t *testing.T) { span := NewMockSpan() - setLLMAttributes(span) - assert.Equal(t, SpanKindLLM, span.Attributes[attribute.Key(AttrGenAISpanKind)].AsString()) - assert.Equal(t, "chat", span.Attributes[attribute.Key(AttrGenAIOperationName)].AsString()) + setWorkflowAttributes(span) + assert.Equal(t, SpanKindWorkflow, span.Attributes[attribute.Key(AttrGenAISpanKind)].AsString()) + assert.Equal(t, OperationNameChain, span.Attributes[attribute.Key(AttrGenAIOperationName)].AsString()) }) - t.Run("Tool", func(t *testing.T) { + t.Run("DynamicAttributeWithFallbackAndAliases", func(t *testing.T) { span := NewMockSpan() - setToolAttributes(span, "my-tool") - assert.Equal(t, SpanKindTool, span.Attributes[attribute.Key(AttrGenAISpanKind)].AsString()) - assert.Equal(t, "execute_tool", span.Attributes[attribute.Key(AttrGenAIOperationName)].AsString()) - assert.Equal(t, "my-tool", span.Attributes[attribute.Key(AttrGenAIToolName)].AsString()) - }) - - t.Run("Agent", func(t *testing.T) { - span := NewMockSpan() - setAgentAttributes(span, "my-agent") - assert.Equal(t, "my-agent", span.Attributes[attribute.Key(AttrGenAIAgentName)].AsString()) - assert.Equal(t, "my-agent", span.Attributes[attribute.Key(AttrAgentName)].AsString()) - assert.Equal(t, "my-agent", span.Attributes[attribute.Key(AttrAgentNameDot)].AsString()) + setDynamicAttribute(span, AttrGenAIAgentName, "", FallbackAgentName, AttrAgentName, AttrAgentNameDot) + assert.Equal(t, FallbackAgentName, span.Attributes[attribute.Key(AttrGenAIAgentName)].AsString()) + assert.Equal(t, FallbackAgentName, span.Attributes[attribute.Key(AttrAgentName)].AsString()) + assert.Equal(t, FallbackAgentName, span.Attributes[attribute.Key(AttrAgentNameDot)].AsString()) }) } diff --git a/observability/constant.go b/observability/constant.go index 5193fe7..903b7ed 100644 --- a/observability/constant.go +++ b/observability/constant.go @@ -53,6 +53,10 @@ const ( SpanInvokeAgent = "invoke_agent" // Will be suffixed with name in code SpanCallLLM = "call_llm" SpanExecuteTool = "execute_tool" // Will be suffixed with name in code + + SpanPrefixInvokeAgent = SpanInvokeAgent + " " + SpanPrefixGenerateContent = OperationNameGenerateContent + " " + SpanPrefixExecuteTool = SpanExecuteTool + " " ) // Metric names @@ -95,6 +99,13 @@ const ( AttrCozeloopReportSource = "cozeloop.report.source" // Fixed value: veadk AttrCozeloopCallType = "cozeloop.call_type" // CozeLoop call type + ADKAttributePrefix = "gcp.vertex.agent." + ADKInstrumentationName = "gcp.vertex.agent" + ADKLegacyScopeName = "veadk" + ADKModelProvider = "gcp.vertex.agent" + OpenInferenceScopeName = "openinference.instrumentation.veadk" + ModelProviderVolcengine = "volcengine" + // Environment Variable Keys for Zero-Config Attributes EnvModelProvider = "VEADK_MODEL_PROVIDER" EnvAppName = "VEADK_APP_NAME" @@ -116,6 +127,31 @@ const ( SpanKindTool = "tool" ) +// Operation names and types +const ( + OperationNameChain = "chain" + OperationNameChat = "chat" + OperationNameInvokeAgent = SpanInvokeAgent + OperationNameGenerateContent = "generate_content" + OperationNameExecuteTool = SpanExecuteTool + + OperationTypeWorkflow = SpanKindWorkflow + OperationTypeLLM = SpanKindLLM + OperationTypeTool = SpanKindTool +) + +// Metric attribute keys and values +const ( + MetricAttrGenAIOperationName = "gen_ai_operation_name" + MetricAttrGenAIOperationType = "gen_ai_operation_type" + MetricAttrErrorType = "error_type" + MetricAttrTokenType = "token_type" + + TokenTypeInput = "input" + TokenTypeOutput = "output" + ErrorTypeError = "error" +) + // LLM attributes const ( AttrGenAIRequestModel = "gen_ai.request.model" @@ -151,10 +187,19 @@ const ( const ( EventGenAIContentPrompt = "gen_ai.content.prompt" EventGenAIContentCompletion = "gen_ai.content.completion" + EventGenAIUserMessage = "gen_ai.user.message" + EventGenAIChoice = "gen_ai.choice" ) // Tool attributes const ( + ADKAttrLLMRequestName = ADKAttributePrefix + "llm_request" + ADKAttrToolCallArgsName = ADKAttributePrefix + "tool_call_args" + ADKAttrToolResponseName = ADKAttributePrefix + "tool_response" + ADKAttrLLMResponseName = ADKAttributePrefix + "llm_response" + ADKAttrInvocationID = ADKAttributePrefix + "invocation_id" + ADKAttrSessionID = ADKAttributePrefix + "session_id" + AttrGenAIOperationName = "gen_ai.operation.name" AttrGenAIOperationType = "gen_ai.operation.type" AttrGenAIToolName = "gen_ai.tool.name" diff --git a/observability/initialize.go b/observability/initialize.go index 334797a..65f99e6 100644 --- a/observability/initialize.go +++ b/observability/initialize.go @@ -168,10 +168,6 @@ func initializeTraceProvider(ctx context.Context, cfg *configs.OpenTelemetryConf return false, nil } - if !cfg.EnableGlobalProvider { - return false, nil - } - exp, err := NewMultiExporter(ctx, cfg) if err != nil { return false, err @@ -191,10 +187,6 @@ func initializeMeterProvider(ctx context.Context, cfg *configs.OpenTelemetryConf return false, nil } - if !cfg.EnableGlobalProvider { - return false, nil - } - readers, err := NewMetricReader(ctx, cfg) if err != nil { return false, err diff --git a/observability/initialize_test.go b/observability/initialize_test.go index f13fb23..63953c6 100644 --- a/observability/initialize_test.go +++ b/observability/initialize_test.go @@ -16,7 +16,6 @@ package observability import ( "context" - "os" "testing" "github.com/stretchr/testify/assert" @@ -26,39 +25,6 @@ import ( "go.opentelemetry.io/otel/sdk/trace/tracetest" ) -func TestGetServiceName(t *testing.T) { - t.Run("EnvVar", func(t *testing.T) { - os.Setenv("OTEL_SERVICE_NAME", "env-service") - defer os.Unsetenv("OTEL_SERVICE_NAME") - assert.Equal(t, "env-service", getServiceName(&configs.OpenTelemetryConfig{})) - }) - - t.Run("ApmPlus", func(t *testing.T) { - cfg := &configs.OpenTelemetryConfig{ - ApmPlus: &configs.ApmPlusConfig{ServiceName: "apm-service"}, - } - assert.Equal(t, "apm-service", getServiceName(cfg)) - }) - - t.Run("CozeLoop", func(t *testing.T) { - cfg := &configs.OpenTelemetryConfig{ - CozeLoop: &configs.CozeLoopExporterConfig{ServiceName: "coze-service"}, - } - assert.Equal(t, "coze-service", getServiceName(cfg)) - }) - - t.Run("TLS", func(t *testing.T) { - cfg := &configs.OpenTelemetryConfig{ - TLS: &configs.TLSExporterConfig{ServiceName: "tls-service"}, - } - assert.Equal(t, "tls-service", getServiceName(cfg)) - }) - - t.Run("Unknown", func(t *testing.T) { - assert.Equal(t, "", getServiceName(&configs.OpenTelemetryConfig{})) - }) -} - func TestSetGlobalTracerProvider(t *testing.T) { // Save original provider to restore orig := otel.GetTracerProvider() @@ -88,18 +54,16 @@ func TestInitializeWithConfig(t *testing.T) { err := initWithConfig(context.Background(), nil) assert.ErrorIs(t, err, ErrNoExporters) - // Config with disabled global provider but valid exporter, should return ErrNoExporters. + // Config without exporters should return ErrNoExporters. cfg := &configs.OpenTelemetryConfig{ - EnableGlobalProvider: false, - Stdout: &configs.StdoutConfig{Enable: true}, + EnableMetrics: nil, } err = initWithConfig(context.Background(), cfg) assert.ErrorIs(t, err, ErrNoExporters) - // Config with global provider enabled and stdout + // Config with stdout exporter should initialize traces. cfgGlobal := &configs.OpenTelemetryConfig{ - EnableGlobalProvider: true, - Stdout: &configs.StdoutConfig{Enable: true}, + Stdout: &configs.StdoutConfig{Enable: true}, } err = initWithConfig(context.Background(), cfgGlobal) assert.NoError(t, err) diff --git a/observability/plugin.go b/observability/plugin.go index fd7a659..72a7fdc 100644 --- a/observability/plugin.go +++ b/observability/plugin.go @@ -16,11 +16,11 @@ package observability import ( "context" - "encoding/json" "time" "github.com/volcengine/veadk-go/configs" "github.com/volcengine/veadk-go/log" + "go.opentelemetry.io/otel" "go.opentelemetry.io/otel/attribute" "go.opentelemetry.io/otel/trace" @@ -108,16 +108,10 @@ func (p *adkObservabilityPlugin) BeforeRun(ctx agent.InvocationContext) (*genai. log.Debug("Before Run", "InvocationID", ctx.InvocationID(), "SessionID", ctx.Session().ID(), "UserID", ctx.Session().UserID()) // 1. Start the 'invocation' span _, span := p.tracer.Start(context.Context(ctx), SpanInvocation, trace.WithSpanKind(trace.SpanKindServer)) - log.Debug("BeforeRun created a new invocation span", "span", span.SpanContext()) - - // Register internal ADK run span ID -> our veadk invocation span context. - adkSpan := trace.SpanFromContext(context.Context(ctx)) - if adkSpan.SpanContext().IsValid() { - GetRegistry().RegisterRunMapping(adkSpan.SpanContext().SpanID(), adkSpan.SpanContext().TraceID(), span.SpanContext(), span) - } // 2. Store in state for AfterRun _ = ctx.Session().State().Set(stateKeyInvocationSpan, span) + GetRegistry().RegisterInvocationSpan(span) setCommonAttributesFromInvocation(ctx, span) setWorkflowAttributes(span) @@ -130,12 +124,17 @@ func (p *adkObservabilityPlugin) BeforeRun(ctx agent.InvocationContext) (*genai. // Capture input from UserContent if userContent := ctx.UserContent(); userContent != nil { - if jsonIn, err := json.Marshal(userContent); err == nil { - val := string(jsonIn) + if val := serializeContentForTelemetry(userContent); val != "" { span.SetAttributes( attribute.String(AttrInputValue, val), attribute.String(AttrGenAIInput, val), ) + span.AddEvent(EventGenAIUserMessage, trace.WithAttributes( + attribute.String(AttrGenAIMessages, val), + )) + span.AddEvent(EventGenAIContentPrompt, trace.WithAttributes( + attribute.String(AttrInputValue, val), + )) } } @@ -146,85 +145,96 @@ func (p *adkObservabilityPlugin) BeforeRun(ctx agent.InvocationContext) (*genai. func (p *adkObservabilityPlugin) AfterRun(ctx agent.InvocationContext) { log.Debug("After Run", "InvocationID", ctx.InvocationID(), "SessionID", ctx.Session().ID(), "UserID", ctx.Session().UserID()) // 1. End the span - if s, _ := ctx.Session().State().Get(stateKeyInvocationSpan); s != nil { - span := s.(trace.Span) - log.Debug("AfterRun get a span from state", "span", span, "isRecording", span.IsRecording()) - - if span.IsRecording() { - // Capture final output if available - if cached, _ := ctx.Session().State().Get(stateKeyStreamingOutput); cached != nil { - if jsonOut, err := json.Marshal(cached); err == nil { - val := string(jsonOut) + s, _ := ctx.Session().State().Get(stateKeyInvocationSpan) + if s == nil { + return + } + + span := s.(trace.Span) + log.Debug("AfterRun get a span from state", "span", span, "isRecording", span.IsRecording()) + + if span.IsRecording() { + // Capture final output if available + if cached, _ := ctx.Session().State().Get(stateKeyStreamingOutput); cached != nil { + if content, ok := cached.(*genai.Content); ok { + if val := serializeContentForTelemetry(content); val != "" { span.SetAttributes( attribute.String(AttrOutputValue, val), attribute.String(AttrGenAIOutput, val), ) + span.AddEvent(EventGenAIChoice, trace.WithAttributes( + attribute.String(AttrGenAIChoice, val), + )) + span.AddEvent(EventGenAIContentCompletion, trace.WithAttributes( + attribute.String(AttrOutputValue, val), + )) } } - // Capture accumulated token usage for the root invocation span - meta := p.getSpanMetadata(ctx.Session().State()) + } + // Capture accumulated token usage for the root invocation span + meta := p.getSpanMetadata(ctx.Session().State()) - if meta.PromptTokens > 0 { - span.SetAttributes(attribute.Int64(AttrGenAIUsageInputTokens, meta.PromptTokens)) - } - if meta.CandidateTokens > 0 { - span.SetAttributes(attribute.Int64(AttrGenAIUsageOutputTokens, meta.CandidateTokens)) - } - if meta.TotalTokens > 0 { - span.SetAttributes(attribute.Int64(AttrGenAIUsageTotalTokens, meta.TotalTokens)) - } + if meta.PromptTokens > 0 { + span.SetAttributes(attribute.Int64(AttrGenAIUsageInputTokens, meta.PromptTokens)) + } + if meta.CandidateTokens > 0 { + span.SetAttributes(attribute.Int64(AttrGenAIUsageOutputTokens, meta.CandidateTokens)) + } + if meta.TotalTokens > 0 { + span.SetAttributes(attribute.Int64(AttrGenAIUsageTotalTokens, meta.TotalTokens)) + } - // Record final metrics for invocation - if !meta.StartTime.IsZero() { - if p.isMetricsEnabled() { - elapsed := time.Since(meta.StartTime).Seconds() - metricAttrs := []attribute.KeyValue{ - attribute.String("gen_ai_operation_name", "chain"), - attribute.String("gen_ai_operation_type", "workflow"), - attribute.String("gen_ai.system", GetModelProvider(context.Context(ctx))), + // Record final metrics for invocation + if !meta.StartTime.IsZero() { + if p.isMetricsEnabled() { + elapsed := time.Since(meta.StartTime).Seconds() + metricAttrs := []attribute.KeyValue{ + attribute.String(MetricAttrGenAIOperationName, OperationNameChain), + attribute.String(MetricAttrGenAIOperationType, OperationTypeWorkflow), + attribute.String(AttrGenAISystem, GetModelProvider(context.Context(ctx))), + } + RecordOperationDuration(context.Background(), elapsed, metricAttrs...) + RecordAPMPlusSpanLatency(context.Background(), elapsed, metricAttrs...) + + if isAgentKitRuntime { + agentKitsAttrs := []attribute.KeyValue{ + attribute.String(MetricAttrGenAIOperationName, OperationNameChain), + attribute.String(MetricAttrGenAIOperationType, OperationTypeWorkflow), } - RecordOperationDuration(context.Background(), elapsed, metricAttrs...) - RecordAPMPlusSpanLatency(context.Background(), elapsed, metricAttrs...) - - if isAgentKitRuntime { - agentKitsAttrs := []attribute.KeyValue{ - attribute.String("gen_ai_operation_name", "chain"), - attribute.String("gen_ai_operation_type", "workflow"), - } - - var errorCode string - eventLen := ctx.Session().Events().Len() - if eventLen > 0 { - lastEvent := ctx.Session().Events().At(eventLen - 1) - errorCode = lastEvent.ErrorCode - } - if errorCode != "" { - agentKitsAttrs = append(agentKitsAttrs, attribute.String("error_type", errorCode)) - } - RecordAgentKitDuration(context.Background(), elapsed, agentKitsAttrs...) + + var errorCode string + eventLen := ctx.Session().Events().Len() + if eventLen > 0 { + lastEvent := ctx.Session().Events().At(eventLen - 1) + errorCode = lastEvent.ErrorCode } + if errorCode != "" { + agentKitsAttrs = append(agentKitsAttrs, attribute.String(MetricAttrErrorType, errorCode)) + } + RecordAgentKitDuration(context.Background(), elapsed, agentKitsAttrs...) } } + } - // Clean up from global map with delay to allow children to be exported. - // Since we have multiple exporters, we wait long enough for all of them to finish. - adkSpan := trace.SpanFromContext(context.Context(ctx)) - if adkSpan.SpanContext().IsValid() { - id := adkSpan.SpanContext().SpanID() - tid := adkSpan.SpanContext().TraceID() - veadkTraceID := span.SpanContext().SpanID() - GetRegistry().ScheduleCleanup(tid, id, veadkTraceID) - } - - span.End() + // Clean up from global map with delay to allow children to be exported. + // Since we have multiple exporters, we wait long enough for all of them to finish. + adkSpan := trace.SpanFromContext(context.Context(ctx)) + if adkSpan.SpanContext().IsValid() { + tid := adkSpan.SpanContext().TraceID() + veadkInvocationSpanID := span.SpanContext().SpanID() + GetRegistry().ScheduleCleanup(tid, veadkInvocationSpanID) } + + span.End() } + } // BeforeModel is called before the LLM is called. func (p *adkObservabilityPlugin) BeforeModel(ctx agent.CallbackContext, req *model.LLMRequest) (*model.LLMResponse, error) { log.Debug("BeforeModel", "InvocationID", ctx.InvocationID(), "SessionID", ctx.SessionID(), "UserID", ctx.UserID(), "AgentName", ctx.AgentName(), "AppName", ctx.AppName()) + p.tryBridgeTraceMappingFromCallback(ctx, "BeforeModel") // New ADK emits model/tool spans natively. Plugin only keeps metadata for metrics and invocation aggregation. meta := p.getSpanMetadata(ctx.State()) meta.StartTime = time.Now() @@ -247,9 +257,9 @@ func (p *adkObservabilityPlugin) AfterModel(ctx agent.CallbackContext, resp *mod metricAttrs := []attribute.KeyValue{ attribute.String(AttrGenAISystem, GetModelProvider(context.Context(ctx))), attribute.String("gen_ai_response_model", meta.ModelName), - attribute.String("gen_ai_operation_name", "chat"), - attribute.String("gen_ai_operation_type", "llm"), - attribute.String("error_type", "error"), + attribute.String(MetricAttrGenAIOperationName, OperationNameChat), + attribute.String(MetricAttrGenAIOperationType, OperationTypeLLM), + attribute.String(MetricAttrErrorType, ErrorTypeError), } RecordExceptions(context.Context(ctx), 1, metricAttrs...) } @@ -268,7 +278,7 @@ func (p *adkObservabilityPlugin) AfterModel(ctx agent.CallbackContext, resp *mod } if resp.UsageMetadata != nil { - p.handleUsageWithoutSpan(ctx, resp, finalModelName) + p.accumulateLLMUsageAndRecordMetrics(ctx, resp, finalModelName) } if resp.Content != nil { @@ -276,12 +286,7 @@ func (p *adkObservabilityPlugin) AfterModel(ctx agent.CallbackContext, resp *mod _ = ctx.State().Set(stateKeyStreamingOutput, resp.Content) } - parentSC := trace.SpanContext{} - if s, _ := ctx.State().Get(stateKeyInvocationSpan); s != nil { - if span, ok := s.(trace.Span); ok { - parentSC = span.SpanContext() - } - } + parentSC, _ := getInvocationSpanContextFromState(ctx.State()) adkSpan := trace.SpanFromContext(context.Context(ctx)) adkTraceID := trace.TraceID{} @@ -309,8 +314,8 @@ func (p *adkObservabilityPlugin) recordFinalResponseMetrics(ctx agent.CallbackCo metricAttrs := []attribute.KeyValue{ attribute.String(AttrGenAISystem, GetModelProvider(context.Context(ctx))), attribute.String("gen_ai_response_model", finalModelName), - attribute.String("gen_ai_operation_name", "chat"), - attribute.String("gen_ai_operation_type", "llm"), + attribute.String(MetricAttrGenAIOperationName, OperationNameChat), + attribute.String(MetricAttrGenAIOperationType, OperationTypeLLM), } if p.isMetricsEnabled() { RecordOperationDuration(context.Context(ctx), duration, metricAttrs...) @@ -319,28 +324,33 @@ func (p *adkObservabilityPlugin) recordFinalResponseMetrics(ctx agent.CallbackCo } } -func (p *adkObservabilityPlugin) handleUsageWithoutSpan(ctx agent.CallbackContext, resp *model.LLMResponse, modelName string) { +// accumulateLLMUsageAndRecordMetrics aggregates per-call LLM usage into invocation-level metadata, +// then emits LLM usage metrics for the current response. +// +// This remains in plugin callbacks because invocation-level accumulation requires cross-callback state. +func (p *adkObservabilityPlugin) accumulateLLMUsageAndRecordMetrics(ctx agent.CallbackContext, resp *model.LLMResponse, modelName string) { meta := p.getSpanMetadata(ctx.State()) currentPrompt := int64(resp.UsageMetadata.PromptTokenCount) currentCandidate := int64(resp.UsageMetadata.CandidatesTokenCount) currentTotal := int64(resp.UsageMetadata.TotalTokenCount) - if currentTotal == 0 && (currentPrompt > 0 || currentCandidate > 0) { - currentTotal = currentPrompt + currentCandidate - } - - meta.PromptTokens = meta.PrevPromptTokens + currentPrompt - meta.CandidateTokens = meta.PrevCandidateTokens + currentCandidate - meta.TotalTokens = meta.PrevTotalTokens + currentTotal + meta.PromptTokens, meta.CandidateTokens, meta.TotalTokens = mergeUsageTotals( + meta.PrevPromptTokens, + meta.PrevCandidateTokens, + meta.PrevTotalTokens, + currentPrompt, + currentCandidate, + currentTotal, + ) p.storeSpanMetadata(ctx.State(), meta) if p.isMetricsEnabled() { metricAttrs := []attribute.KeyValue{ attribute.String(AttrGenAISystem, GetModelProvider(ctx)), attribute.String("gen_ai_response_model", modelName), - attribute.String("gen_ai_operation_name", "chat"), - attribute.String("gen_ai_operation_type", "llm"), + attribute.String(MetricAttrGenAIOperationName, OperationNameChat), + attribute.String(MetricAttrGenAIOperationType, OperationTypeLLM), } RecordChatCount(context.Context(ctx), 1, metricAttrs...) if currentTotal > 0 && (currentPrompt > 0 || currentCandidate > 0) { @@ -349,73 +359,80 @@ func (p *adkObservabilityPlugin) handleUsageWithoutSpan(ctx agent.CallbackContex } } -// BeforeTool is called before a tool is executed. +func mergeUsageTotals(prevPrompt, prevCandidate, prevTotal, currentPrompt, currentCandidate, currentTotal int64) (int64, int64, int64) { + if currentTotal == 0 && (currentPrompt > 0 || currentCandidate > 0) { + currentTotal = currentPrompt + currentCandidate + } + + return prevPrompt + currentPrompt, prevCandidate + currentCandidate, prevTotal + currentTotal +} + +// BeforeTool is a lightweight debug-only callback. +// Tool span metrics and token estimation are handled in span processor / translator paths. func (p *adkObservabilityPlugin) BeforeTool(ctx tool.Context, tool tool.Tool, args map[string]any) (map[string]any, error) { log.Debug("BeforeTool", - "InvocationID", ctx.InvocationID(), "SessionID", ctx.SessionID(), "UserID", ctx.UserID(), "AgentName", ctx.AgentName(), "AppName", ctx.AppName()) - // Note: In Google ADK-go, the execute_tool span is often not available in the context at this stage. - // We rely on VeADKTranslatedExporter (translator.go) to reconstruct tool attributes from the - // span after it is ended and exported. - - // Maintain metadata for metrics calculation - meta := p.getSpanMetadata(ctx.State()) - meta.StartTime = time.Now() - p.storeSpanMetadata(ctx.State(), meta) + "InvocationID", ctx.InvocationID(), "SessionID", ctx.SessionID(), "UserID", ctx.UserID(), "AgentName", ctx.AgentName(), "AppName", ctx.AppName(), + "ToolName", tool.Name(), "ToolArgs", args) return nil, nil } -// AfterTool is called after a tool is executed. -func (p *adkObservabilityPlugin) AfterTool(ctx tool.Context, tool tool.Tool, args, result map[string]any, err error) (map[string]any, error) { +// AfterTool is a lightweight debug-only callback. +// Tool span metrics and token estimation are handled in span processor / translator paths. +func (p *adkObservabilityPlugin) AfterTool(ctx tool.Context, tool tool.Tool, args map[string]any, result map[string]any, err error) (map[string]any, error) { log.Debug("AfterTool", - "InvocationID", ctx.InvocationID(), "SessionID", ctx.SessionID(), "UserID", ctx.UserID(), "AgentName", ctx.AgentName(), "AppName", ctx.AppName()) - // Metrics recording only - meta := p.getSpanMetadata(ctx.State()) - if !meta.StartTime.IsZero() { - duration := time.Since(meta.StartTime).Seconds() - metricAttrs := []attribute.KeyValue{ - attribute.String("gen_ai_operation_name", tool.Name()), - attribute.String("gen_ai_operation_type", "tool"), - attribute.String(AttrGenAISystem, GetModelProvider(context.Context(ctx))), - } - if p.isMetricsEnabled() { - RecordOperationDuration(context.Background(), duration, metricAttrs...) - RecordAPMPlusSpanLatency(context.Background(), duration, metricAttrs...) - } - - if p.isMetricsEnabled() { - // Tool Token Usage (Estimated) - - // Input Chars - var inputChars int64 - if argsJSON, err := json.Marshal(args); err == nil { - inputChars = int64(len(argsJSON)) - } - - // Output Chars - var outputChars int64 - if resultJSON, err := json.Marshal(result); err == nil { - outputChars = int64(len(resultJSON)) - } - - if inputChars > 0 { - RecordAPMPlusToolTokenUsage(context.Background(), inputChars/4, append(metricAttrs, attribute.String("token_type", "input"))...) - } - if outputChars > 0 { - RecordAPMPlusToolTokenUsage(context.Background(), outputChars/4, append(metricAttrs, attribute.String("token_type", "output"))...) - } - } - } + "InvocationID", ctx.InvocationID(), "SessionID", ctx.SessionID(), "UserID", ctx.UserID(), "AgentName", ctx.AgentName(), "AppName", ctx.AppName(), + "ToolName", tool.Name(), "ToolArgs", args, "ToolResult", result, "ToolError", err) return nil, nil } // BeforeAgent is called before an agent execution. +// This is the primary trace-bridging point for adk trace -> veadk invocation trace. +// BeforeModel keeps an idempotent bridge as a secondary safety net. func (p *adkObservabilityPlugin) BeforeAgent(ctx agent.CallbackContext) (*genai.Content, error) { log.Debug("BeforeAgent", "InvocationID", ctx.InvocationID(), "SessionID", ctx.SessionID(), "UserID", ctx.UserID(), "AgentName", ctx.AgentName(), "AppName", ctx.AppName()) + p.tryBridgeTraceMappingFromCallback(ctx, "BeforeAgent") return nil, nil } +func (p *adkObservabilityPlugin) tryBridgeTraceMappingFromCallback(ctx agent.CallbackContext, stage string) { + adkSC := trace.SpanFromContext(context.Context(ctx)).SpanContext() + veadkInvocationSC, ok := getInvocationSpanContextFromState(ctx.State()) + if !ok { + log.Debug("Skip trace mapping bridge: invocation span missing in state", "stage", stage) + return + } + + if registerTraceMappingIfPossible(GetRegistry(), adkSC, veadkInvocationSC) { + log.Debug("Bridged adk trace to veadk invocation trace", + "stage", stage, + "adk_trace_id", adkSC.TraceID().String(), + "veadk_trace_id", veadkInvocationSC.TraceID().String(), + ) + } +} + +func registerTraceMappingIfPossible(registry *TraceRegistry, adkSC, veadkSC trace.SpanContext) bool { + if registry == nil || !adkSC.IsValid() || !veadkSC.IsValid() { + return false + } + registry.RegisterTraceMapping(adkSC.TraceID(), veadkSC.TraceID()) + return true +} + +func getInvocationSpanContextFromState(state session.State) (trace.SpanContext, bool) { + if s, _ := state.Get(stateKeyInvocationSpan); s != nil { + if span, ok := s.(trace.Span); ok { + sc := span.SpanContext() + if sc.IsValid() { + return sc, true + } + } + } + return trace.SpanContext{}, false +} + // AfterAgent is called after an agent execution. func (p *adkObservabilityPlugin) AfterAgent(ctx agent.CallbackContext) (*genai.Content, error) { log.Debug("AfterAgent", diff --git a/observability/plugin_test.go b/observability/plugin_test.go new file mode 100644 index 0000000..e92cbe4 --- /dev/null +++ b/observability/plugin_test.go @@ -0,0 +1,63 @@ +package observability + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "go.opentelemetry.io/otel/trace" +) + +func TestMergeUsageTotals(t *testing.T) { + t.Run("use provided total", func(t *testing.T) { + prompt, candidate, total := mergeUsageTotals(100, 200, 300, 10, 20, 40) + assert.Equal(t, int64(110), prompt) + assert.Equal(t, int64(220), candidate) + assert.Equal(t, int64(340), total) + }) + + t.Run("derive total when missing", func(t *testing.T) { + prompt, candidate, total := mergeUsageTotals(100, 200, 300, 10, 20, 0) + assert.Equal(t, int64(110), prompt) + assert.Equal(t, int64(220), candidate) + assert.Equal(t, int64(330), total) + }) + + t.Run("all zeros", func(t *testing.T) { + prompt, candidate, total := mergeUsageTotals(0, 0, 0, 0, 0, 0) + assert.Equal(t, int64(0), prompt) + assert.Equal(t, int64(0), candidate) + assert.Equal(t, int64(0), total) + }) +} + +func TestRegisterTraceMappingIfPossible(t *testing.T) { + t.Run("register when both span contexts are valid", func(t *testing.T) { + registry := GetRegistry() + + adkTraceID, _ := trace.TraceIDFromHex("11111111111111111111111111111111") + adkSpanID, _ := trace.SpanIDFromHex("1111111111111111") + veadkTraceID, _ := trace.TraceIDFromHex("22222222222222222222222222222222") + veadkSpanID, _ := trace.SpanIDFromHex("2222222222222222") + + adkSC := trace.NewSpanContext(trace.SpanContextConfig{TraceID: adkTraceID, SpanID: adkSpanID, TraceFlags: trace.FlagsSampled}) + veadkSC := trace.NewSpanContext(trace.SpanContextConfig{TraceID: veadkTraceID, SpanID: veadkSpanID, TraceFlags: trace.FlagsSampled}) + + ok := registerTraceMappingIfPossible(registry, adkSC, veadkSC) + assert.True(t, ok) + + mappedTraceID, exists := registry.GetVeadkTraceID(adkTraceID) + assert.True(t, exists) + assert.Equal(t, veadkTraceID, mappedTraceID) + }) + + t.Run("skip when adk span context is invalid", func(t *testing.T) { + registry := GetRegistry() + + veadkTraceID, _ := trace.TraceIDFromHex("33333333333333333333333333333333") + veadkSpanID, _ := trace.SpanIDFromHex("3333333333333333") + veadkSC := trace.NewSpanContext(trace.SpanContextConfig{TraceID: veadkTraceID, SpanID: veadkSpanID, TraceFlags: trace.FlagsSampled}) + + ok := registerTraceMappingIfPossible(registry, trace.SpanContext{}, veadkSC) + assert.False(t, ok) + }) +} diff --git a/observability/registry.go b/observability/registry.go index 0cf9ab8..353fc91 100644 --- a/observability/registry.go +++ b/observability/registry.go @@ -25,9 +25,6 @@ import ( // TraceRegistry manages the mapping between ADK-go's spans and VeADK spans. // It ensures thread-safe access and proper cleanup of resources. type TraceRegistry struct { - // adkSpanMap tracks google's adk SpanID (Run/Agent/LLM/Tool) -> VeADK SpanContext - adkSpanMap sync.Map - // toolCallMap tracks ToolCallID (string) -> *toolCallInfo // Consolidates: toolCallToVeadkLLMMap, toolInputs, toolOutputs toolCallMap sync.Map @@ -46,11 +43,16 @@ type TraceRegistry struct { shutdownChan chan struct{} } +const ( + traceCleanupQueueSize = 512 + traceCleanupDelay = 2 * time.Minute + traceCleanupTick = 10 * time.Second +) + type cleanupRequest struct { - adkTraceID trace.TraceID - internalRunID trace.SpanID - veadkSpanID trace.SpanID - deadline time.Time + adkTraceID trace.TraceID + veadkSpanID trace.SpanID + deadline time.Time } type toolCallInfo struct { @@ -60,7 +62,6 @@ type toolCallInfo struct { type traceInfos struct { veadkTraceID trace.TraceID - spanIDs []trace.SpanID toolCallIDs []string } @@ -75,7 +76,7 @@ func GetRegistry() *TraceRegistry { once.Do(func() { globalRegistry = &TraceRegistry{ adkTraceToVeadkTraceMap: make(map[trace.TraceID]*traceInfos), - cleanupQueue: make(chan cleanupRequest, 512), + cleanupQueue: make(chan cleanupRequest, traceCleanupQueueSize), shutdownChan: make(chan struct{}), } go globalRegistry.cleanupLoop() @@ -94,7 +95,7 @@ func (r *TraceRegistry) Shutdown() { } func (r *TraceRegistry) cleanupLoop() { - ticker := time.NewTicker(10 * time.Second) + ticker := time.NewTicker(traceCleanupTick) defer ticker.Stop() // Use a slice to store pending requests @@ -107,98 +108,58 @@ func (r *TraceRegistry) cleanupLoop() { case req := <-r.cleanupQueue: pendingRequests = append(pendingRequests, req) case <-ticker.C: - now := time.Now() - activeRequests := pendingRequests[:0] - for _, req := range pendingRequests { - if now.After(req.deadline) { - // Perform cleanup - r.UnregisterInvocationMapping(req.internalRunID, req.veadkSpanID) - - r.resourcesMu.Lock() - if res, ok := r.adkTraceToVeadkTraceMap[req.adkTraceID]; ok { - for _, sid := range res.spanIDs { - r.adkSpanMap.Delete(sid) - } - for _, tcid := range res.toolCallIDs { - r.toolCallMap.Delete(tcid) - } - delete(r.adkTraceToVeadkTraceMap, req.adkTraceID) - } - r.resourcesMu.Unlock() - } else { - activeRequests = append(activeRequests, req) - } - } - pendingRequests = activeRequests + pendingRequests = r.cleanupExpiredRequests(pendingRequests, time.Now()) } } } -func (r *TraceRegistry) getOrCreateTraceInfos(adkTraceID trace.TraceID) *traceInfos { - r.resourcesMu.Lock() - defer r.resourcesMu.Unlock() - - if res, ok := r.adkTraceToVeadkTraceMap[adkTraceID]; ok { - return res +func (r *TraceRegistry) cleanupExpiredRequests(pending []cleanupRequest, now time.Time) []cleanupRequest { + activeRequests := pending[:0] + for _, req := range pending { + if now.After(req.deadline) { + r.cleanupByTraceID(req.adkTraceID, req.veadkSpanID) + continue + } + activeRequests = append(activeRequests, req) } - res := &traceInfos{} - r.adkTraceToVeadkTraceMap[adkTraceID] = res - return res + return activeRequests } -// RegisterRunMapping links ADK's internal run span to our veadk invocation span. -func (r *TraceRegistry) RegisterRunMapping(adkSpanID trace.SpanID, adkTraceID trace.TraceID, veadkSC trace.SpanContext, veadkSpan trace.Span) { - if !adkSpanID.IsValid() || !veadkSC.IsValid() { - return - } - r.adkSpanMap.Store(adkSpanID, veadkSC) - r.activeInvocationSpans.Store(veadkSC.SpanID(), veadkSpan) +func (r *TraceRegistry) cleanupByTraceID(adkTraceID trace.TraceID, veadkSpanID trace.SpanID) { + r.activeInvocationSpans.Delete(veadkSpanID) - if adkTraceID.IsValid() { - res := r.getOrCreateTraceInfos(adkTraceID) - r.resourcesMu.Lock() - res.spanIDs = append(res.spanIDs, adkSpanID) - res.veadkTraceID = veadkSC.TraceID() - r.resourcesMu.Unlock() - } -} + r.resourcesMu.Lock() + defer r.resourcesMu.Unlock() -// RegisterAgentMapping links ADK's internal agent span to our veadk agent span. -func (r *TraceRegistry) RegisterAgentMapping(adkSpanID trace.SpanID, adkTraceID trace.TraceID, veadkSC trace.SpanContext) { - if !adkSpanID.IsValid() || !veadkSC.IsValid() { + res, ok := r.adkTraceToVeadkTraceMap[adkTraceID] + if !ok { return } - r.adkSpanMap.Store(adkSpanID, veadkSC) - if adkTraceID.IsValid() { - res := r.getOrCreateTraceInfos(adkTraceID) - r.resourcesMu.Lock() - res.spanIDs = append(res.spanIDs, adkSpanID) - r.resourcesMu.Unlock() + for _, tcid := range res.toolCallIDs { + r.toolCallMap.Delete(tcid) } + delete(r.adkTraceToVeadkTraceMap, adkTraceID) } -// RegisterLLMMapping links ADK's internal LLM span to our veadk LLM span. -func (r *TraceRegistry) RegisterLLMMapping(adkSpanID trace.SpanID, adkTraceID trace.TraceID, veadkSC trace.SpanContext) { - if !adkSpanID.IsValid() || !veadkSC.IsValid() { - return - } - r.adkSpanMap.Store(adkSpanID, veadkSC) +func (r *TraceRegistry) getOrCreateTraceInfos(adkTraceID trace.TraceID) *traceInfos { + r.resourcesMu.Lock() + defer r.resourcesMu.Unlock() - if adkTraceID.IsValid() { - res := r.getOrCreateTraceInfos(adkTraceID) - r.resourcesMu.Lock() - res.spanIDs = append(res.spanIDs, adkSpanID) - r.resourcesMu.Unlock() + if res, ok := r.adkTraceToVeadkTraceMap[adkTraceID]; ok { + return res } + res := &traceInfos{} + r.adkTraceToVeadkTraceMap[adkTraceID] = res + return res } -// RegisterToolMapping links a tool span (started by ADK) to its veadk parent (LLM call). -func (r *TraceRegistry) RegisterToolMapping(toolSpanID trace.SpanID, veadkParentSC trace.SpanContext) { - if !toolSpanID.IsValid() || !veadkParentSC.IsValid() { +// RegisterInvocationSpan tracks a live invocation span for shutdown flushing. +func (r *TraceRegistry) RegisterInvocationSpan(veadkSpan trace.Span) { + if veadkSpan == nil || !veadkSpan.SpanContext().IsValid() { return } - r.adkSpanMap.Store(toolSpanID, veadkParentSC) + r.activeInvocationSpans.Store(veadkSpan.SpanContext().SpanID(), veadkSpan) } func (r *TraceRegistry) getOrCreateToolCallInfo(toolCallID string) *toolCallInfo { @@ -219,11 +180,22 @@ func (r *TraceRegistry) RegisterToolCallMapping(toolCallID string, adkTraceID tr if adkTraceID.IsValid() { res := r.getOrCreateTraceInfos(adkTraceID) r.resourcesMu.Lock() - res.toolCallIDs = append(res.toolCallIDs, toolCallID) + if !containsString(res.toolCallIDs, toolCallID) { + res.toolCallIDs = append(res.toolCallIDs, toolCallID) + } r.resourcesMu.Unlock() } } +func containsString(items []string, target string) bool { + for _, item := range items { + if item == target { + return true + } + } + return false +} + // RegisterTraceMapping records a mapping from an internal adk TraceID to a veadk TraceID. func (r *TraceRegistry) RegisterTraceMapping(adkTraceID trace.TraceID, veadkTraceID trace.TraceID) { if !adkTraceID.IsValid() || !veadkTraceID.IsValid() { @@ -235,14 +207,6 @@ func (r *TraceRegistry) RegisterTraceMapping(adkTraceID trace.TraceID, veadkTrac r.resourcesMu.Unlock() } -// GetVeadkSpanContext finds the veadk replacement for an adk parent span ID. -func (r *TraceRegistry) GetVeadkSpanContext(adkSpanID trace.SpanID) (trace.SpanContext, bool) { - if val, ok := r.adkSpanMap.Load(adkSpanID); ok { - return val.(trace.SpanContext), true - } - return trace.SpanContext{}, false -} - // GetVeadkParentContextByToolCallID finds the veadk parent for a tool span by its logical ToolCallID. func (r *TraceRegistry) GetVeadkParentContextByToolCallID(toolCallID string) (trace.SpanContext, bool) { if toolCallID == "" { @@ -270,21 +234,14 @@ func (r *TraceRegistry) GetVeadkTraceID(adkTraceID trace.TraceID) (trace.TraceID return trace.TraceID{}, false } -// UnregisterInvocationMapping removes run-related mappings. -func (r *TraceRegistry) UnregisterInvocationMapping(adkSpanID trace.SpanID, veadkSpanID trace.SpanID) { - r.adkSpanMap.Delete(adkSpanID) - r.activeInvocationSpans.Delete(veadkSpanID) -} - // ScheduleCleanup schedules cleanup of all mappings related to an internal TraceID. // This is typically called when the trace is considered complete. -func (r *TraceRegistry) ScheduleCleanup(adkTraceID trace.TraceID, internalRunID trace.SpanID, veadkSpanID trace.SpanID) { +func (r *TraceRegistry) ScheduleCleanup(adkTraceID trace.TraceID, veadkSpanID trace.SpanID) { select { case r.cleanupQueue <- cleanupRequest{ - adkTraceID: adkTraceID, - internalRunID: internalRunID, - veadkSpanID: veadkSpanID, - deadline: time.Now().Add(2 * time.Minute), + adkTraceID: adkTraceID, + veadkSpanID: veadkSpanID, + deadline: time.Now().Add(traceCleanupDelay), }: default: log.Warn("trace cleanup queue is full") diff --git a/observability/serialization.go b/observability/serialization.go new file mode 100644 index 0000000..5376dac --- /dev/null +++ b/observability/serialization.go @@ -0,0 +1,184 @@ +package observability + +import ( + "encoding/base64" + "encoding/json" + "fmt" + "strings" + + "google.golang.org/genai" +) + +func serializeContentForTelemetry(content *genai.Content) string { + if content == nil { + return "" + } + + parts := make([]map[string]any, 0, len(content.Parts)) + for _, part := range content.Parts { + if part == nil { + continue + } + normalized := normalizePartForTelemetry(part) + if normalized != nil { + parts = append(parts, normalized) + } + } + + payload := map[string]any{ + "role": content.Role, + "parts": parts, + } + + b, err := json.Marshal(payload) + if err != nil { + return "" + } + return string(b) +} + +func normalizePartForTelemetry(part *genai.Part) map[string]any { + if part.Text != "" { + return map[string]any{ + "type": "text", + "text": part.Text, + } + } + + if part.FunctionCall != nil { + return map[string]any{ + "type": "function_call", + "id": part.FunctionCall.ID, + "name": part.FunctionCall.Name, + "args": part.FunctionCall.Args, + } + } + + if part.FunctionResponse != nil { + return map[string]any{ + "type": "function_response", + "id": part.FunctionResponse.ID, + "name": part.FunctionResponse.Name, + "response": part.FunctionResponse.Response, + } + } + + if part.FileData != nil { + return normalizeFileDataForTelemetry(part.FileData) + } + + if part.InlineData != nil { + return normalizeInlineDataForTelemetry(part.InlineData) + } + + return nil +} + +func normalizeFileDataForTelemetry(file *genai.FileData) map[string]any { + if file == nil { + return nil + } + + mimeType := file.MIMEType + name := file.DisplayName + url := file.FileURI + + if strings.HasPrefix(mimeType, "image/") { + return map[string]any{ + "type": "image_url", + "image_url": map[string]any{ + "name": name, + "url": url, + }, + } + } + + if strings.HasPrefix(mimeType, "video/") { + return map[string]any{ + "type": "video_url", + "video_url": map[string]any{ + "name": name, + "url": url, + }, + } + } + + if strings.HasPrefix(mimeType, "audio/") { + return map[string]any{ + "type": "audio_url", + "audio_url": map[string]any{ + "name": name, + "url": url, + }, + } + } + + return map[string]any{ + "type": "file", + "file": map[string]any{ + "name": name, + "url": url, + "mime_type": mimeType, + }, + } +} + +func normalizeInlineDataForTelemetry(blob *genai.Blob) map[string]any { + if blob == nil { + return nil + } + + mimeType := blob.MIMEType + name := blob.DisplayName + + if strings.HasPrefix(mimeType, "text/") { + return map[string]any{ + "type": "text", + "text": string(blob.Data), + } + } + + url := "" + if len(blob.Data) > 0 && mimeType != "" { + url = fmt.Sprintf("data:%s;base64,%s", mimeType, base64.StdEncoding.EncodeToString(blob.Data)) + } + + if strings.HasPrefix(mimeType, "image/") { + return map[string]any{ + "type": "image_url", + "image_url": map[string]any{ + "name": name, + "url": url, + }, + } + } + + if strings.HasPrefix(mimeType, "video/") { + return map[string]any{ + "type": "video_url", + "video_url": map[string]any{ + "name": name, + "url": url, + }, + } + } + + if strings.HasPrefix(mimeType, "audio/") { + return map[string]any{ + "type": "audio_url", + "audio_url": map[string]any{ + "name": name, + "url": url, + }, + } + } + + return map[string]any{ + "type": "file", + "file": map[string]any{ + "name": name, + "mime_type": mimeType, + "data_base64": url, + }, + } +} diff --git a/observability/serialization_test.go b/observability/serialization_test.go new file mode 100644 index 0000000..8a86476 --- /dev/null +++ b/observability/serialization_test.go @@ -0,0 +1,61 @@ +package observability + +import ( + "encoding/json" + "strings" + "testing" + + "github.com/stretchr/testify/assert" + "google.golang.org/genai" +) + +func TestSerializeContentForTelemetry_Multimodal(t *testing.T) { + content := &genai.Content{ + Role: "user", + Parts: []*genai.Part{ + {Text: "hello"}, + {FileData: &genai.FileData{FileURI: "https://example.com/cat.jpg", MIMEType: "image/jpeg", DisplayName: "cat.jpg"}}, + {InlineData: &genai.Blob{MIMEType: "image/png", DisplayName: "chart.png", Data: []byte("png-bytes")}}, + }, + } + + serialized := serializeContentForTelemetry(content) + assert.NotEmpty(t, serialized) + + var decoded map[string]any + err := json.Unmarshal([]byte(serialized), &decoded) + assert.NoError(t, err) + assert.Equal(t, "user", decoded["role"]) + + parts, ok := decoded["parts"].([]any) + assert.True(t, ok) + assert.Len(t, parts, 3) + + imagePart, ok := parts[1].(map[string]any) + assert.True(t, ok) + assert.Equal(t, "image_url", imagePart["type"]) + + inlineImagePart, ok := parts[2].(map[string]any) + assert.True(t, ok) + assert.Equal(t, "image_url", inlineImagePart["type"]) + imageURL, ok := inlineImagePart["image_url"].(map[string]any) + assert.True(t, ok) + assert.True(t, strings.HasPrefix(imageURL["url"].(string), "data:image/png;base64,")) +} + +func TestSerializeContentForTelemetry_TextInlineData(t *testing.T) { + content := &genai.Content{ + Role: "user", + Parts: []*genai.Part{ + {InlineData: &genai.Blob{MIMEType: "text/plain", Data: []byte("inline text")}}, + }, + } + + serialized := serializeContentForTelemetry(content) + assert.NotEmpty(t, serialized) + assert.Contains(t, serialized, "inline text") +} + +func TestSerializeContentForTelemetry_Nil(t *testing.T) { + assert.Equal(t, "", serializeContentForTelemetry(nil)) +} diff --git a/observability/span_processor.go b/observability/span_processor.go index 56c02b0..2854548 100644 --- a/observability/span_processor.go +++ b/observability/span_processor.go @@ -11,6 +11,16 @@ import ( type veadkSpanProcessor struct{} +type semanticSpanKind int + +const ( + semanticSpanUnknown semanticSpanKind = iota + semanticSpanInvocation + semanticSpanAgent + semanticSpanLLM + semanticSpanTool +) + func NewVeADKSpanProcessor() sdktrace.SpanProcessor { return &veadkSpanProcessor{} } @@ -20,7 +30,32 @@ func (p *veadkSpanProcessor) OnStart(ctx context.Context, span sdktrace.ReadWrit p.setSemanticAttributes(ctx, span) } -func (p *veadkSpanProcessor) OnEnd(sdktrace.ReadOnlySpan) {} +func (p *veadkSpanProcessor) OnEnd(span sdktrace.ReadOnlySpan) { + if classifySemanticSpanKind(span.Name()) != semanticSpanTool { + return + } + + duration := span.EndTime().Sub(span.StartTime()).Seconds() + if duration <= 0 { + return + } + + toolName := strings.TrimPrefix(span.Name(), SpanPrefixExecuteTool) + if toolName == "" { + toolName = "" + } + + modelProvider := getStringAttribute(span.Attributes(), AttrGenAISystem, FallbackModelProvider) + metricAttrs := []attribute.KeyValue{ + attribute.String(MetricAttrGenAIOperationName, toolName), + attribute.String(MetricAttrGenAIOperationType, OperationTypeTool), + attribute.String(AttrGenAISystem, modelProvider), + } + + RecordOperationDuration(context.Background(), duration, metricAttrs...) + RecordAPMPlusSpanLatency(context.Background(), duration, metricAttrs...) + p.recordToolTokenUsageFromSpanAttributes(span, metricAttrs) +} func (p *veadkSpanProcessor) Shutdown(context.Context) error { return nil } @@ -94,42 +129,101 @@ func (p *veadkSpanProcessor) setCommonAttributes(ctx context.Context, span sdktr func (p *veadkSpanProcessor) setSemanticAttributes(ctx context.Context, span sdktrace.ReadWriteSpan) { name := span.Name() + kind := classifySemanticSpanKind(name) + switch kind { + case semanticSpanInvocation: + p.applyInvocationSemanticAttributes(span) + case semanticSpanAgent: + p.applyAgentSemanticAttributes(span, name) + case semanticSpanLLM: + p.applyLLMSemanticAttributes(span) + case semanticSpanTool: + p.applyToolSemanticAttributes(span, name) + } + + _ = ctx +} + +func classifySemanticSpanKind(name string) semanticSpanKind { switch { case name == SpanInvocation: - span.SetAttributes( - attribute.String(AttrGenAISpanKind, SpanKindWorkflow), - attribute.String(AttrGenAIOperationName, "chain"), - ) - case strings.HasPrefix(name, "invoke_agent "): - agentName := strings.TrimPrefix(name, "invoke_agent ") - if agentName == "" { - agentName = FallbackAgentName - } - span.SetAttributes( - attribute.String(AttrGenAISpanKind, SpanKindWorkflow), - attribute.String(AttrGenAIOperationName, "chain"), - attribute.String(AttrGenAIAgentName, agentName), - attribute.String(AttrAgentName, agentName), - attribute.String(AttrAgentNameDot, agentName), - ) - case strings.HasPrefix(name, "generate_content ") || name == SpanCallLLM: - span.SetAttributes( - attribute.String(AttrGenAISpanKind, SpanKindLLM), - attribute.String(AttrGenAIOperationName, "chat"), - attribute.String(AttrGenAIRequestType, "chat"), - ) - case strings.HasPrefix(name, "execute_tool "): - toolName := strings.TrimPrefix(name, "execute_tool ") - if toolName == "" { - toolName = "" + return semanticSpanInvocation + case strings.HasPrefix(name, SpanPrefixInvokeAgent): + return semanticSpanAgent + case strings.HasPrefix(name, SpanPrefixGenerateContent) || name == SpanCallLLM: + return semanticSpanLLM + case strings.HasPrefix(name, SpanPrefixExecuteTool): + return semanticSpanTool + default: + return semanticSpanUnknown + } +} + +func (p *veadkSpanProcessor) applyInvocationSemanticAttributes(span sdktrace.ReadWriteSpan) { + span.SetAttributes( + attribute.String(AttrGenAISpanKind, SpanKindWorkflow), + attribute.String(AttrGenAIOperationName, OperationNameChain), + ) +} + +func (p *veadkSpanProcessor) applyAgentSemanticAttributes(span sdktrace.ReadWriteSpan, spanName string) { + agentName := strings.TrimPrefix(spanName, SpanPrefixInvokeAgent) + if agentName == "" { + agentName = FallbackAgentName + } + span.SetAttributes( + attribute.String(AttrGenAISpanKind, SpanKindWorkflow), + attribute.String(AttrGenAIOperationName, OperationNameChain), + attribute.String(AttrGenAIAgentName, agentName), + attribute.String(AttrAgentName, agentName), + attribute.String(AttrAgentNameDot, agentName), + ) +} + +func (p *veadkSpanProcessor) applyLLMSemanticAttributes(span sdktrace.ReadWriteSpan) { + span.SetAttributes( + attribute.String(AttrGenAISpanKind, SpanKindLLM), + attribute.String(AttrGenAIOperationName, OperationNameChat), + attribute.String(AttrGenAIRequestType, OperationNameChat), + ) +} + +func (p *veadkSpanProcessor) applyToolSemanticAttributes(span sdktrace.ReadWriteSpan, spanName string) { + toolName := strings.TrimPrefix(spanName, SpanPrefixExecuteTool) + if toolName == "" { + toolName = "" + } + span.SetAttributes( + attribute.String(AttrGenAISpanKind, SpanKindTool), + attribute.String(AttrGenAIOperationName, OperationNameExecuteTool), + attribute.String(AttrGenAIToolName, toolName), + ) +} + +func getStringAttribute(attrs []attribute.KeyValue, key, fallback string) string { + for _, kv := range attrs { + if string(kv.Key) == key { + v := kv.Value.AsString() + if v != "" { + return v + } } - span.SetAttributes( - attribute.String(AttrGenAISpanKind, SpanKindTool), - attribute.String(AttrGenAIOperationName, "execute_tool"), - attribute.String(AttrGenAIToolName, toolName), - ) } + return fallback +} - _ = ctx +func (p *veadkSpanProcessor) recordToolTokenUsageFromSpanAttributes(span sdktrace.ReadOnlySpan, metricAttrs []attribute.KeyValue) { + inputRaw := getStringAttribute(span.Attributes(), ADKAttrToolCallArgsName, "") + outputRaw := getStringAttribute(span.Attributes(), ADKAttrToolResponseName, "") + + inputTokens := int64(len(inputRaw)) / 4 + outputTokens := int64(len(outputRaw)) / 4 + + if inputTokens > 0 { + RecordAPMPlusToolTokenUsage(context.Background(), inputTokens, append(metricAttrs, attribute.String(MetricAttrTokenType, TokenTypeInput))...) + } + if outputTokens > 0 { + RecordAPMPlusToolTokenUsage(context.Background(), outputTokens, append(metricAttrs, attribute.String(MetricAttrTokenType, TokenTypeOutput))...) + } } diff --git a/observability/span_processor_test.go b/observability/span_processor_test.go new file mode 100644 index 0000000..85ed64f --- /dev/null +++ b/observability/span_processor_test.go @@ -0,0 +1,49 @@ +package observability + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "go.opentelemetry.io/otel/attribute" +) + +func TestClassifySemanticSpanKind(t *testing.T) { + tests := []struct { + name string + spanName string + expect semanticSpanKind + }{ + {name: "invocation", spanName: SpanInvocation, expect: semanticSpanInvocation}, + {name: "agent", spanName: SpanPrefixInvokeAgent + "planner", expect: semanticSpanAgent}, + {name: "llm by generate_content prefix", spanName: SpanPrefixGenerateContent + "model", expect: semanticSpanLLM}, + {name: "llm by call_llm", spanName: SpanCallLLM, expect: semanticSpanLLM}, + {name: "tool", spanName: SpanPrefixExecuteTool + "search", expect: semanticSpanTool}, + {name: "unknown", spanName: "custom_span", expect: semanticSpanUnknown}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + actual := classifySemanticSpanKind(tt.spanName) + assert.Equal(t, tt.expect, actual) + }) + } +} + +func TestGetStringAttribute(t *testing.T) { + attrs := []attribute.KeyValue{ + attribute.String("k1", "v1"), + attribute.String("empty", ""), + } + + t.Run("returns matched value", func(t *testing.T) { + assert.Equal(t, "v1", getStringAttribute(attrs, "k1", "fallback")) + }) + + t.Run("returns fallback when key missing", func(t *testing.T) { + assert.Equal(t, "fallback", getStringAttribute(attrs, "missing", "fallback")) + }) + + t.Run("returns fallback when value empty", func(t *testing.T) { + assert.Equal(t, "fallback", getStringAttribute(attrs, "empty", "fallback")) + }) +} diff --git a/observability/translator.go b/observability/translator.go index 0e72099..154ceae 100644 --- a/observability/translator.go +++ b/observability/translator.go @@ -18,6 +18,7 @@ import ( "context" "encoding/json" "strings" + "time" "github.com/volcengine/veadk-go/log" "go.opentelemetry.io/otel/attribute" @@ -27,36 +28,17 @@ import ( ) var ( - gcpVertexAgentLLMRequestName = "gcp.vertex.agent.llm_request" - gcpVertexAgentToolCallArgsName = "gcp.vertex.agent.tool_call_args" - gcpVertexAgentToolResponseName = "gcp.vertex.agent.tool_response" - gcpVertexAgentLLMResponseName = "gcp.vertex.agent.llm_response" - gcpVertexAgentInvocationID = "gcp.vertex.agent.invocation_id" - gcpVertexAgentSessionID = "gcp.vertex.agent.session_id" - // ADKAttributeKeyMap maps ADK-specific attributes to standard GenAI attributes. ADKAttributeKeyMap = map[string]string{ - gcpVertexAgentLLMRequestName: AttrInputValue, - gcpVertexAgentLLMResponseName: AttrOutputValue, - gcpVertexAgentToolCallArgsName: AttrGenAIToolInput, - gcpVertexAgentToolResponseName: AttrGenAIToolOutput, - gcpVertexAgentInvocationID: AttrGenAIInvocationID, - gcpVertexAgentSessionID: AttrGenAISessionID, + ADKAttrLLMRequestName: AttrInputValue, + ADKAttrLLMResponseName: AttrOutputValue, + ADKAttrToolCallArgsName: AttrGenAIToolInput, + ADKAttrToolResponseName: AttrGenAIToolOutput, + ADKAttrInvocationID: AttrGenAIInvocationID, + ADKAttrSessionID: AttrGenAISessionID, } ) -// isMatch returns true if we should keep the span in the final output. -// At this point, we filter out "call_llm" span generated by ADK-go with scope name is "gcp.vertex.agent". -func isMatch(span trace.ReadOnlySpan) bool { - if span.InstrumentationScope().Name == "gcp.vertex.agent" { - name := span.Name() - if name == "call_llm" { - return false - } - } - return true -} - // VeADKTranslatedExporter wraps a SpanExporter and remaps ADK attributes to standard fields. type VeADKTranslatedExporter struct { trace.SpanExporter @@ -64,38 +46,19 @@ type VeADKTranslatedExporter struct { // ExportSpans filters and translates spans before exporting them to the underlying exporter. func (e *VeADKTranslatedExporter) ExportSpans(ctx context.Context, spans []trace.ReadOnlySpan) error { + if e.SpanExporter == nil { + return nil + } + translated := make([]trace.ReadOnlySpan, 0, len(spans)) - registry := GetRegistry() for _, s := range spans { - if !isMatch(s) { - continue - } - ts := &translatedSpan{ReadOnlySpan: s} translated = append(translated, ts) - // 1. Logic stitching via ToolCallID - toolCallID := "" - for _, kv := range s.Attributes() { - if string(kv.Key) == AttrGenAIToolCallID { - toolCallID = kv.Value.AsString() - break - } - } - - if toolCallID != "" { - if veadkParentSC, ok := registry.GetVeadkParentContextByToolCallID(toolCallID); ok { - // We found a match! Record this TraceID mapping to align other spans in the same trace (like merged spans) - registry.RegisterTraceMapping(s.SpanContext().TraceID(), veadkParentSC.TraceID()) - log.Debug("Matched tool via ToolCallID, established TraceID mapping", - "tool_call_id", toolCallID, - "adk_trace_id", s.SpanContext().TraceID().String(), - "veadk_trace_id", veadkParentSC.TraceID().String(), - ) - } + if isToolSpanForTraceMapping(s) { + registerTraceMappingFromToolCall(s) } - } if len(translated) == 0 { @@ -105,111 +68,285 @@ func (e *VeADKTranslatedExporter) ExportSpans(ctx context.Context, spans []trace return e.SpanExporter.ExportSpans(ctx, translated) } +func registerTraceMappingFromToolCall(span trace.ReadOnlySpan) { + if !isToolSpanForTraceMapping(span) { + return + } + + toolCallID := findToolCallID(span.Attributes()) + if toolCallID == "" { + return + } + + adkTraceID := span.SpanContext().TraceID() + if !adkTraceID.IsValid() { + return + } + + registry := GetRegistry() + if veadkParentSC, ok := registry.GetVeadkParentContextByToolCallID(toolCallID); ok { + registry.RegisterTraceMapping(adkTraceID, veadkParentSC.TraceID()) + log.Debug("Matched tool via ToolCallID, established TraceID mapping", + "tool_call_id", toolCallID, + "adk_trace_id", adkTraceID.String(), + "veadk_trace_id", veadkParentSC.TraceID().String(), + ) + } +} + +func isToolSpanForTraceMapping(span trace.ReadOnlySpan) bool { + return classifyTranslatedSpanKind(span.Name()) == translatedSpanTool +} + // translatedSpan wraps a ReadOnlySpan and intercepts calls to Attributes(). type translatedSpan struct { trace.ReadOnlySpan } +type translatedSpanKind int + +const ( + translatedSpanUnknown translatedSpanKind = iota + translatedSpanInvocation + translatedSpanAgent + translatedSpanLLM + translatedSpanTool +) + +type toolSpanRawData struct { + ToolName string + ToolDesc string + ToolArgs string + ToolCallID string + ToolResponse string +} + func (p *translatedSpan) Attributes() []attribute.KeyValue { attrs := p.ReadOnlySpan.Attributes() + kind := classifyTranslatedSpanKind(p.ReadOnlySpan.Name()) + existingKeys, raw := scanToolSpanRawData(attrs) + + newAttrs := p.processAttributesByKind(kind, attrs, existingKeys) + newAttrs = p.appendToolReconstructedAttributes(kind, newAttrs, raw) + newAttrs = appendToolSpanKindAttribute(newAttrs, raw) + + return newAttrs +} - // Track existing keys and tool-related fields +func scanToolSpanRawData(attrs []attribute.KeyValue) (map[string]bool, toolSpanRawData) { existingKeys := make(map[string]bool) - var toolName, toolDesc, toolArgs, toolCallID, toolResponse string + raw := toolSpanRawData{} - // First pass: scan for existing keys and raw data for _, kv := range attrs { - existingKeys[string(kv.Key)] = true key := string(kv.Key) - - // Collect raw data for reconstruction + existingKeys[key] = true switch key { case AttrGenAIToolName: - toolName = kv.Value.AsString() - case AttrGenAIToolDescription: // Note: ADK uses gen_ai.tool.description - toolDesc = kv.Value.AsString() - case gcpVertexAgentToolCallArgsName: - toolArgs = kv.Value.AsString() + raw.ToolName = kv.Value.AsString() + case AttrGenAIToolDescription: + raw.ToolDesc = kv.Value.AsString() + case ADKAttrToolCallArgsName: + raw.ToolArgs = kv.Value.AsString() case AttrGenAIToolCallID: - toolCallID = kv.Value.AsString() - case gcpVertexAgentToolResponseName: - toolResponse = kv.Value.AsString() + raw.ToolCallID = kv.Value.AsString() + case ADKAttrToolResponseName: + raw.ToolResponse = kv.Value.AsString() } } - newAttrs := p.processAttributes(attrs, existingKeys) + return existingKeys, raw +} + +func (p *translatedSpan) appendToolReconstructedAttributes(kind translatedSpanKind, attrs []attribute.KeyValue, raw toolSpanRawData) []attribute.KeyValue { + if kind != translatedSpanTool { + return attrs + } - // Dynamic Reconstruction: Tool Input/Output from raw attributes - if toolArgs != "" && toolName != "" { - if inputAttrs := p.reconstructToolInput(toolName, toolDesc, toolArgs); inputAttrs != nil { - newAttrs = append(newAttrs, inputAttrs...) + if raw.ToolArgs != "" && raw.ToolName != "" { + if inputAttrs := p.reconstructToolInput(raw.ToolName, raw.ToolDesc, raw.ToolArgs); inputAttrs != nil { + attrs = append(attrs, inputAttrs...) } } - if toolResponse != "" && toolCallID != "" { - if outputAttrs := p.reconstructToolOutput(toolName, toolCallID, toolResponse); outputAttrs != nil { - newAttrs = append(newAttrs, outputAttrs...) + if raw.ToolResponse != "" && raw.ToolCallID != "" { + if outputAttrs := p.reconstructToolOutput(raw.ToolName, raw.ToolCallID, raw.ToolResponse); outputAttrs != nil { + attrs = append(attrs, outputAttrs...) } } - // Enrich with Span Kind if it's determined to be a tool span - if toolName != "" || toolCallID != "" { - newAttrs = append(newAttrs, attribute.String(AttrGenAISpanKind, SpanKindTool)) + return attrs +} + +func appendToolSpanKindAttribute(attrs []attribute.KeyValue, raw toolSpanRawData) []attribute.KeyValue { + if raw.ToolName != "" || raw.ToolCallID != "" { + return append(attrs, attribute.String(AttrGenAISpanKind, SpanKindTool)) } + return attrs +} - return newAttrs +func classifyTranslatedSpanKind(name string) translatedSpanKind { + switch { + case name == SpanInvocation: + return translatedSpanInvocation + case strings.HasPrefix(name, SpanPrefixInvokeAgent): + return translatedSpanAgent + case strings.HasPrefix(name, SpanPrefixGenerateContent) || name == OperationNameGenerateContent || name == SpanCallLLM: + return translatedSpanLLM + case strings.HasPrefix(name, SpanPrefixExecuteTool): + return translatedSpanTool + default: + return translatedSpanUnknown + } } -func (p *translatedSpan) processAttributes(attrs []attribute.KeyValue, existingKeys map[string]bool) []attribute.KeyValue { +func (p *translatedSpan) processAttributesByKind(kind translatedSpanKind, attrs []attribute.KeyValue, existingKeys map[string]bool) []attribute.KeyValue { newAttrs := make([]attribute.KeyValue, 0, len(attrs)) for _, kv := range attrs { key := string(kv.Key) - if key == AttrGenAIOperationName { - op := kv.Value.AsString() - switch op { - case "generate_content": - kv = attribute.String(AttrGenAIOperationName, "chat") - case "invoke_agent": - kv = attribute.String(AttrGenAIOperationName, "chain") - } - } + kv = normalizeOperationNameBySpanKind(kind, key, kv) // 1. Map ADK internal attributes if not already present in standard form - if strings.HasPrefix(key, "gcp.vertex.agent.") { + if isADKInternalAttribute(key) { targetKey, ok := ADKAttributeKeyMap[key] if ok { - // Skip if we are going to reconstruct this field - if targetKey == AttrGenAIToolInput || targetKey == AttrGenAIToolOutput { + if shouldSkipMappedToolAttribute(targetKey) { continue } - // Only add mapped key if the target key doesn't already exist in the span - if !existingKeys[targetKey] { + if !hasExistingKey(existingKeys, targetKey) { newAttrs = append(newAttrs, attribute.KeyValue{Key: attribute.Key(targetKey), Value: kv.Value}) } } continue } - // 2. Patch gen_ai.system if needed - if key == AttrGenAISystem && kv.Value.AsString() == "gcp.vertex.agent" { - kv = attribute.String(AttrGenAISystem, "volcengine") - } + kv = patchGenAISystem(kv) newAttrs = append(newAttrs, kv) } return newAttrs } +func isADKInternalAttribute(key string) bool { + return strings.HasPrefix(key, ADKAttributePrefix) +} + +func shouldSkipMappedToolAttribute(targetKey string) bool { + return targetKey == AttrGenAIToolInput || targetKey == AttrGenAIToolOutput +} + +func hasExistingKey(existingKeys map[string]bool, key string) bool { + return existingKeys[key] +} + +func patchGenAISystem(kv attribute.KeyValue) attribute.KeyValue { + if string(kv.Key) == AttrGenAISystem && kv.Value.AsString() == ADKModelProvider { + return attribute.String(AttrGenAISystem, ModelProviderVolcengine) + } + return kv +} + +func normalizeOperationNameBySpanKind(kind translatedSpanKind, key string, kv attribute.KeyValue) attribute.KeyValue { + if key != AttrGenAIOperationName { + return kv + } + + op := kv.Value.AsString() + if kind == translatedSpanLLM && op == OperationNameGenerateContent { + return attribute.String(AttrGenAIOperationName, OperationNameChat) + } + if (kind == translatedSpanInvocation || kind == translatedSpanAgent) && op == OperationNameInvokeAgent { + return attribute.String(AttrGenAIOperationName, OperationNameChain) + } + + return kv +} + func (p *translatedSpan) Name() string { name := p.ReadOnlySpan.Name() - if strings.HasPrefix(name, "generate_content ") || name == "generate_content" { + if classifyTranslatedSpanKind(name) == translatedSpanLLM { return SpanCallLLM } return name } +func (p *translatedSpan) Events() []trace.Event { + baseEvents := p.ReadOnlySpan.Events() + if !p.isLLMSpan() { + return baseEvents + } + return appendLLMEventsFromAttributes(p.ReadOnlySpan.Attributes(), baseEvents, p.ReadOnlySpan.StartTime()) +} + +func (p *translatedSpan) isLLMSpan() bool { + return classifyTranslatedSpanKind(p.ReadOnlySpan.Name()) == translatedSpanLLM +} + +func appendLLMEventsFromAttributes(attrs []attribute.KeyValue, baseEvents []trace.Event, eventTime time.Time) []trace.Event { + hasEvent := map[string]bool{} + for _, ev := range baseEvents { + hasEvent[ev.Name] = true + } + + inputVal := "" + outputVal := "" + for _, kv := range attrs { + key := string(kv.Key) + switch key { + case ADKAttrLLMRequestName, AttrInputValue: + if inputVal == "" { + inputVal = kv.Value.AsString() + } + case ADKAttrLLMResponseName, AttrOutputValue: + if outputVal == "" { + outputVal = kv.Value.AsString() + } + } + } + + newEvents := make([]trace.Event, 0, 4) + if inputVal != "" { + if !hasEvent[EventGenAIUserMessage] { + newEvents = append(newEvents, trace.Event{ + Name: EventGenAIUserMessage, + Attributes: []attribute.KeyValue{attribute.String(AttrGenAIMessages, inputVal)}, + Time: eventTime, + }) + } + if !hasEvent[EventGenAIContentPrompt] { + newEvents = append(newEvents, trace.Event{ + Name: EventGenAIContentPrompt, + Attributes: []attribute.KeyValue{attribute.String(AttrInputValue, inputVal)}, + Time: eventTime, + }) + } + } + + if outputVal != "" { + if !hasEvent[EventGenAIChoice] { + newEvents = append(newEvents, trace.Event{ + Name: EventGenAIChoice, + Attributes: []attribute.KeyValue{attribute.String(AttrGenAIChoice, outputVal)}, + Time: eventTime, + }) + } + if !hasEvent[EventGenAIContentCompletion] { + newEvents = append(newEvents, trace.Event{ + Name: EventGenAIContentCompletion, + Attributes: []attribute.KeyValue{attribute.String(AttrOutputValue, outputVal)}, + Time: eventTime, + }) + } + } + + if len(newEvents) == 0 { + return baseEvents + } + + return append(baseEvents, newEvents...) +} + func (p *translatedSpan) reconstructToolInput(toolName, toolDesc, toolArgs string) []attribute.KeyValue { var paramsMap map[string]any if err := json.Unmarshal([]byte(toolArgs), ¶msMap); err == nil { @@ -224,6 +361,7 @@ func (p *translatedSpan) reconstructToolInput(toolName, toolDesc, toolArgs strin attribute.String(AttrGenAIToolInput, val), attribute.String(AttrCozeloopInput, val), attribute.String(AttrGenAIInput, val), + attribute.String(AttrInputValue, val), } } } @@ -245,6 +383,7 @@ func (p *translatedSpan) reconstructToolOutput(toolName, toolCallID, toolRespons attribute.String(AttrGenAIToolOutput, val), attribute.String(AttrCozeloopOutput, val), attribute.String(AttrGenAIOutput, val), + attribute.String(AttrOutputValue, val), } } } @@ -255,89 +394,90 @@ func (p *translatedSpan) SpanContext() oteltrace.SpanContext { sc := p.ReadOnlySpan.SpanContext() registry := GetRegistry() - toolCallID := "" - for _, kv := range p.ReadOnlySpan.Attributes() { + toolCallID := findToolCallID(p.ReadOnlySpan.Attributes()) + + if remapped, ok := p.tryRemapSpanContextByToolCallID(registry, sc, toolCallID); ok { + return remapped + } + + if remapped, ok := p.tryRemapSpanContextByTraceID(registry, sc); ok { + return remapped + } + + return sc +} + +func findToolCallID(attrs []attribute.KeyValue) string { + for _, kv := range attrs { if string(kv.Key) == AttrGenAIToolCallID { - toolCallID = kv.Value.AsString() - break + return kv.Value.AsString() } } + return "" +} +func (p *translatedSpan) tryRemapSpanContextByToolCallID(registry *TraceRegistry, sc oteltrace.SpanContext, toolCallID string) (oteltrace.SpanContext, bool) { if veadkParentSC, ok := registry.GetVeadkParentContextByToolCallID(toolCallID); ok { - return oteltrace.NewSpanContext(oteltrace.SpanContextConfig{ - TraceID: veadkParentSC.TraceID(), - SpanID: sc.SpanID(), - TraceFlags: sc.TraceFlags(), - TraceState: sc.TraceState(), - Remote: sc.IsRemote(), - }) - } - - // 2. Try global TraceID mapping (for spans in the same trace without their own tool_call_id) - if veadkParentSC, ok := registry.GetVeadkTraceID(sc.TraceID()); ok { - return oteltrace.NewSpanContext(oteltrace.SpanContextConfig{ - TraceID: veadkParentSC, - SpanID: sc.SpanID(), - TraceFlags: sc.TraceFlags(), - TraceState: sc.TraceState(), - Remote: sc.IsRemote(), - }) - } - - // 3. Fallback to Tool SpanID mapping - if veadkParentSC, ok := registry.GetVeadkSpanContext(sc.SpanID()); ok { - return oteltrace.NewSpanContext(oteltrace.SpanContextConfig{ - TraceID: veadkParentSC.TraceID(), - SpanID: sc.SpanID(), - TraceFlags: sc.TraceFlags(), - TraceState: sc.TraceState(), - Remote: sc.IsRemote(), - }) + return newSpanContextWithTraceID(sc, veadkParentSC.TraceID()), true } + return oteltrace.SpanContext{}, false +} - return sc +func (p *translatedSpan) tryRemapSpanContextByTraceID(registry *TraceRegistry, sc oteltrace.SpanContext) (oteltrace.SpanContext, bool) { + if veadkTraceID, ok := registry.GetVeadkTraceID(sc.TraceID()); ok { + return newSpanContextWithTraceID(sc, veadkTraceID), true + } + return oteltrace.SpanContext{}, false +} + +func newSpanContextWithTraceID(sc oteltrace.SpanContext, traceID oteltrace.TraceID) oteltrace.SpanContext { + return oteltrace.NewSpanContext(oteltrace.SpanContextConfig{ + TraceID: traceID, + SpanID: sc.SpanID(), + TraceFlags: sc.TraceFlags(), + TraceState: sc.TraceState(), + Remote: sc.IsRemote(), + }) } func (p *translatedSpan) Parent() oteltrace.SpanContext { parent := p.ReadOnlySpan.Parent() - sc := p.ReadOnlySpan.SpanContext() registry := GetRegistry() - // 1. Precise Re-parenting based on internal ParentID mapping - if parent.IsValid() { - if veadkSC, ok := registry.GetVeadkSpanContext(parent.SpanID()); ok { - return veadkSC - } + toolCallID := findToolCallID(p.ReadOnlySpan.Attributes()) + if remapped, ok := tryParentByToolCallID(registry, toolCallID); ok { + return remapped } - // 2. Try ToolCallID mapping (for tool spans that lost parent context) - toolCallID := "" - for _, kv := range p.ReadOnlySpan.Attributes() { - if string(kv.Key) == AttrGenAIToolCallID { - toolCallID = kv.Value.AsString() - break - } + if remapped, ok := tryParentByTraceID(registry, parent); ok { + return remapped } + return parent +} + +func tryParentByToolCallID(registry *TraceRegistry, toolCallID string) (oteltrace.SpanContext, bool) { if manualParentSC, ok := registry.GetVeadkParentContextByToolCallID(toolCallID); ok { - return manualParentSC + return manualParentSC, true } + return oteltrace.SpanContext{}, false +} - // 3. Fallback: Re-parent root spans if we have a direct mapping for this span ID. +func tryParentByTraceID(registry *TraceRegistry, parent oteltrace.SpanContext) (oteltrace.SpanContext, bool) { if !parent.IsValid() { - if manualSC, ok := registry.GetVeadkSpanContext(sc.SpanID()); ok { - return manualSC - } + return oteltrace.SpanContext{}, false } - - return parent + if veadkTraceID, ok := registry.GetVeadkTraceID(parent.TraceID()); ok { + return newSpanContextWithTraceID(parent, veadkTraceID), true + } + return oteltrace.SpanContext{}, false } func (p *translatedSpan) InstrumentationScope() instrumentation.Scope { scope := p.ReadOnlySpan.InstrumentationScope() // github.com/volcengine/veadk-go is the InstrumentationName defined in observability/constant.go - if scope.Name == "gcp.vertex.agent" || scope.Name == "veadk" || scope.Name == "github.com/volcengine/veadk-go" { - scope.Name = "openinference.instrumentation.veadk" + if scope.Name == ADKInstrumentationName || scope.Name == ADKLegacyScopeName || scope.Name == InstrumentationName { + scope.Name = OpenInferenceScopeName } scope.Version = Version return scope diff --git a/observability/translator_test.go b/observability/translator_test.go new file mode 100644 index 0000000..aa2dc2a --- /dev/null +++ b/observability/translator_test.go @@ -0,0 +1,42 @@ +package observability + +import ( + "testing" + "time" + + "github.com/stretchr/testify/assert" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/sdk/trace" +) + +func TestAppendLLMEventsFromAttributes_BuildsPromptAndCompletionEvents(t *testing.T) { + attrs := []attribute.KeyValue{ + attribute.String(ADKAttrLLMRequestName, `{"role":"user","parts":[{"text":"hello"}]}`), + attribute.String(ADKAttrLLMResponseName, `{"role":"model","parts":[{"text":"hi"}]}`), + } + + out := appendLLMEventsFromAttributes(attrs, nil, time.Unix(1700000000, 0)) + assert.Len(t, out, 4) + assert.Equal(t, EventGenAIUserMessage, out[0].Name) + assert.Equal(t, EventGenAIContentPrompt, out[1].Name) + assert.Equal(t, EventGenAIChoice, out[2].Name) + assert.Equal(t, EventGenAIContentCompletion, out[3].Name) +} + +func TestAppendLLMEventsFromAttributes_DeduplicatesExistingEvents(t *testing.T) { + attrs := []attribute.KeyValue{ + attribute.String(AttrInputValue, `{"parts":[{"text":"hello"}]}`), + attribute.String(AttrOutputValue, `{"parts":[{"text":"hi"}]}`), + } + base := []trace.Event{ + {Name: EventGenAIUserMessage}, + {Name: EventGenAIChoice}, + } + + out := appendLLMEventsFromAttributes(attrs, base, time.Unix(1700000000, 0)) + assert.Len(t, out, 4) + assert.Equal(t, EventGenAIUserMessage, out[0].Name) + assert.Equal(t, EventGenAIChoice, out[1].Name) + assert.Equal(t, EventGenAIContentPrompt, out[2].Name) + assert.Equal(t, EventGenAIContentCompletion, out[3].Name) +} From 1d98a69462a3c48fbebb19d3af9a15dc9f0516a8 Mon Sep 17 00:00:00 2001 From: shunjiazhu Date: Sun, 22 Feb 2026 10:42:08 +0800 Subject: [PATCH 3/8] upgrade adk-go to v0.5.x --- go.mod | 16 ++++++++-------- go.sum | 32 ++++++++++++++++---------------- 2 files changed, 24 insertions(+), 24 deletions(-) diff --git a/go.mod b/go.mod index 1303d00..f109eb0 100644 --- a/go.mod +++ b/go.mod @@ -14,7 +14,7 @@ require ( github.com/stretchr/testify v1.11.1 github.com/volcengine/ve-tos-golang-sdk/v2 v2.7.26 github.com/volcengine/volcengine-go-sdk v1.1.53 - go.opentelemetry.io/otel v1.39.0 + go.opentelemetry.io/otel v1.40.0 go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploggrpc v0.15.0 go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploghttp v0.15.0 go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.39.0 @@ -23,13 +23,13 @@ require ( go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.39.0 go.opentelemetry.io/otel/exporters/stdout/stdoutmetric v1.39.0 go.opentelemetry.io/otel/exporters/stdout/stdouttrace v1.39.0 - go.opentelemetry.io/otel/metric v1.39.0 - go.opentelemetry.io/otel/sdk v1.39.0 - go.opentelemetry.io/otel/sdk/log v0.15.0 - go.opentelemetry.io/otel/sdk/metric v1.39.0 - go.opentelemetry.io/otel/trace v1.39.0 + go.opentelemetry.io/otel/metric v1.40.0 + go.opentelemetry.io/otel/sdk v1.40.0 + go.opentelemetry.io/otel/sdk/log v0.16.0 + go.opentelemetry.io/otel/sdk/metric v1.40.0 + go.opentelemetry.io/otel/trace v1.40.0 golang.org/x/oauth2 v0.32.0 - google.golang.org/adk v0.4.1-0.20260213105330-147b75759ac6 + google.golang.org/adk v0.5.1-0.20260220130408-ecbe1ba152ed google.golang.org/genai v1.40.0 gopkg.in/go-playground/validator.v8 v8.18.2 gopkg.in/yaml.v3 v3.0.1 @@ -94,7 +94,7 @@ require ( go.opentelemetry.io/contrib/detectors/gcp v1.38.0 // indirect go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.63.0 // indirect go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.39.0 // indirect - go.opentelemetry.io/otel/log v0.14.0 // indirect + go.opentelemetry.io/otel/log v0.16.0 // indirect go.opentelemetry.io/proto/otlp v1.9.0 // indirect golang.org/x/arch v0.11.0 // indirect golang.org/x/crypto v0.45.0 // indirect diff --git a/go.sum b/go.sum index dfad47f..89aa532 100644 --- a/go.sum +++ b/go.sum @@ -207,8 +207,8 @@ go.opentelemetry.io/contrib/detectors/gcp v1.38.0 h1:ZoYbqX7OaA/TAikspPl3ozPI6iY go.opentelemetry.io/contrib/detectors/gcp v1.38.0/go.mod h1:SU+iU7nu5ud4oCb3LQOhIZ3nRLj6FNVrKgtflbaf2ts= go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.63.0 h1:RbKq8BG0FI8OiXhBfcRtqqHcZcka+gU3cskNuf05R18= go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.63.0/go.mod h1:h06DGIukJOevXaj/xrNjhi/2098RZzcLTbc0jDAUbsg= -go.opentelemetry.io/otel v1.39.0 h1:8yPrr/S0ND9QEfTfdP9V+SiwT4E0G7Y5MO7p85nis48= -go.opentelemetry.io/otel v1.39.0/go.mod h1:kLlFTywNWrFyEdH0oj2xK0bFYZtHRYUdv1NklR/tgc8= +go.opentelemetry.io/otel v1.40.0 h1:oA5YeOcpRTXq6NN7frwmwFR0Cn3RhTVZvXsP4duvCms= +go.opentelemetry.io/otel v1.40.0/go.mod h1:IMb+uXZUKkMXdPddhwAHm6UfOwJyh4ct1ybIlV14J0g= go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploggrpc v0.15.0 h1:W+m0g+/6v3pa5PgVf2xoFMi5YtNR06WtS7ve5pcvLtM= go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploggrpc v0.15.0/go.mod h1:JM31r0GGZ/GU94mX8hN4D8v6e40aFlUECSQ48HaLgHM= go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploghttp v0.15.0 h1:EKpiGphOYq3CYnIe2eX9ftUkyU+Y8Dtte8OaWyHJ4+I= @@ -227,20 +227,20 @@ go.opentelemetry.io/otel/exporters/stdout/stdoutmetric v1.39.0 h1:5gn2urDL/FBnK8 go.opentelemetry.io/otel/exporters/stdout/stdoutmetric v1.39.0/go.mod h1:0fBG6ZJxhqByfFZDwSwpZGzJU671HkwpWaNe2t4VUPI= go.opentelemetry.io/otel/exporters/stdout/stdouttrace v1.39.0 h1:8UPA4IbVZxpsD76ihGOQiFml99GPAEZLohDXvqHdi6U= go.opentelemetry.io/otel/exporters/stdout/stdouttrace v1.39.0/go.mod h1:MZ1T/+51uIVKlRzGw1Fo46KEWThjlCBZKl2LzY5nv4g= -go.opentelemetry.io/otel/log v0.14.0 h1:2rzJ+pOAZ8qmZ3DDHg73NEKzSZkhkGIua9gXtxNGgrM= -go.opentelemetry.io/otel/log v0.14.0/go.mod h1:5jRG92fEAgx0SU/vFPxmJvhIuDU9E1SUnEQrMlJpOno= -go.opentelemetry.io/otel/metric v1.39.0 h1:d1UzonvEZriVfpNKEVmHXbdf909uGTOQjA0HF0Ls5Q0= -go.opentelemetry.io/otel/metric v1.39.0/go.mod h1:jrZSWL33sD7bBxg1xjrqyDjnuzTUB0x1nBERXd7Ftcs= -go.opentelemetry.io/otel/sdk v1.39.0 h1:nMLYcjVsvdui1B/4FRkwjzoRVsMK8uL/cj0OyhKzt18= -go.opentelemetry.io/otel/sdk v1.39.0/go.mod h1:vDojkC4/jsTJsE+kh+LXYQlbL8CgrEcwmt1ENZszdJE= -go.opentelemetry.io/otel/sdk/log v0.15.0 h1:WgMEHOUt5gjJE93yqfqJOkRflApNif84kxoHWS9VVHE= -go.opentelemetry.io/otel/sdk/log v0.15.0/go.mod h1:qDC/FlKQCXfH5hokGsNg9aUBGMJQsrUyeOiW5u+dKBQ= +go.opentelemetry.io/otel/log v0.16.0 h1:DeuBPqCi6pQwtCK0pO4fvMB5eBq6sNxEnuTs88pjsN4= +go.opentelemetry.io/otel/log v0.16.0/go.mod h1:rWsmqNVTLIA8UnwYVOItjyEZDbKIkMxdQunsIhpUMes= +go.opentelemetry.io/otel/metric v1.40.0 h1:rcZe317KPftE2rstWIBitCdVp89A2HqjkxR3c11+p9g= +go.opentelemetry.io/otel/metric v1.40.0/go.mod h1:ib/crwQH7N3r5kfiBZQbwrTge743UDc7DTFVZrrXnqc= +go.opentelemetry.io/otel/sdk v1.40.0 h1:KHW/jUzgo6wsPh9At46+h4upjtccTmuZCFAc9OJ71f8= +go.opentelemetry.io/otel/sdk v1.40.0/go.mod h1:Ph7EFdYvxq72Y8Li9q8KebuYUr2KoeyHx0DRMKrYBUE= +go.opentelemetry.io/otel/sdk/log v0.16.0 h1:e/b4bdlQwC5fnGtG3dlXUrNOnP7c8YLVSpSfEBIkTnI= +go.opentelemetry.io/otel/sdk/log v0.16.0/go.mod h1:JKfP3T6ycy7QEuv3Hj8oKDy7KItrEkus8XJE6EoSzw4= go.opentelemetry.io/otel/sdk/log/logtest v0.14.0 h1:Ijbtz+JKXl8T2MngiwqBlPaHqc4YCaP/i13Qrow6gAM= go.opentelemetry.io/otel/sdk/log/logtest v0.14.0/go.mod h1:dCU8aEL6q+L9cYTqcVOk8rM9Tp8WdnHOPLiBgp0SGOA= -go.opentelemetry.io/otel/sdk/metric v1.39.0 h1:cXMVVFVgsIf2YL6QkRF4Urbr/aMInf+2WKg+sEJTtB8= -go.opentelemetry.io/otel/sdk/metric v1.39.0/go.mod h1:xq9HEVH7qeX69/JnwEfp6fVq5wosJsY1mt4lLfYdVew= -go.opentelemetry.io/otel/trace v1.39.0 h1:2d2vfpEDmCJ5zVYz7ijaJdOF59xLomrvj7bjt6/qCJI= -go.opentelemetry.io/otel/trace v1.39.0/go.mod h1:88w4/PnZSazkGzz/w84VHpQafiU4EtqqlVdxWy+rNOA= +go.opentelemetry.io/otel/sdk/metric v1.40.0 h1:mtmdVqgQkeRxHgRv4qhyJduP3fYJRMX4AtAlbuWdCYw= +go.opentelemetry.io/otel/sdk/metric v1.40.0/go.mod h1:4Z2bGMf0KSK3uRjlczMOeMhKU2rhUqdWNoKcYrtcBPg= +go.opentelemetry.io/otel/trace v1.40.0 h1:WA4etStDttCSYuhwvEa8OP8I5EWu24lkOzp+ZYblVjw= +go.opentelemetry.io/otel/trace v1.40.0/go.mod h1:zeAhriXecNGP/s2SEG3+Y8X9ujcJOTqQ5RgdEJcawiA= go.opentelemetry.io/proto/otlp v1.9.0 h1:l706jCMITVouPOqEnii2fIAuO3IVGBRPV5ICjceRb/A= go.opentelemetry.io/proto/otlp v1.9.0/go.mod h1:xE+Cx5E/eEHw+ISFkwPLwCZefwVjY+pqKg1qcK03+/4= go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= @@ -292,8 +292,8 @@ golang.org/x/tools v0.38.0/go.mod h1:yEsQ/d/YK8cjh0L6rZlY8tgtlKiBNTL14pGDJPJpYQs golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= gonum.org/v1/gonum v0.16.0 h1:5+ul4Swaf3ESvrOnidPp4GZbzf0mxVQpDCYUQE7OJfk= gonum.org/v1/gonum v0.16.0/go.mod h1:fef3am4MQ93R2HHpKnLk4/Tbh/s0+wqD5nfa6Pnwy4E= -google.golang.org/adk v0.4.1-0.20260213105330-147b75759ac6 h1:lrH4NZMy+Mw9mhDSR0wMAnNkUGQMfaW3DIoLRXfGl2c= -google.golang.org/adk v0.4.1-0.20260213105330-147b75759ac6/go.mod h1:K9gcJhkWBF2NcLvLh/oq4BM7QwPswwSX/gPqukLpgwQ= +google.golang.org/adk v0.5.1-0.20260220130408-ecbe1ba152ed h1:PTGgixN20I1zmA5YWCBX4+3Qzge87ZUnNjHeUsPyimY= +google.golang.org/adk v0.5.1-0.20260220130408-ecbe1ba152ed/go.mod h1:W0RyHt+JXfZHA1VnxeGALRZeqAlp54nv2cw7Sn7M5Jc= google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= google.golang.org/genai v1.40.0 h1:kYxyQSH+vsib8dvsgyLJzsVEIv5k3ZmHJyVqdvGncmc= From a7fafaa2fb4ef46a37254232b0d168c1b7c70d97 Mon Sep 17 00:00:00 2001 From: shunjiazhu Date: Tue, 24 Feb 2026 23:42:05 +0800 Subject: [PATCH 4/8] upgrade adk-go and add http middleware --- apps/agentkit_server_app/app.go | 10 ++-- go.mod | 28 +++++----- go.sum | 29 +++++++++++ observability/http.go | 90 +++++++++++++++++++++++++++++++++ observability/plugin.go | 21 ++++---- 5 files changed, 150 insertions(+), 28 deletions(-) create mode 100644 observability/http.go diff --git a/apps/agentkit_server_app/app.go b/apps/agentkit_server_app/app.go index 5e4a1ab..f3b1d05 100644 --- a/apps/agentkit_server_app/app.go +++ b/apps/agentkit_server_app/app.go @@ -24,11 +24,11 @@ import ( "github.com/volcengine/veadk-go/apps/a2a_app" "github.com/volcengine/veadk-go/apps/simple_app" "github.com/volcengine/veadk-go/log" + "github.com/volcengine/veadk-go/observability" "google.golang.org/adk/cmd/launcher" "google.golang.org/adk/cmd/launcher/web/webui" "google.golang.org/adk/server/adkrest" -) - + ) const serverName = "agentkit server" type agentkitServerApp struct { @@ -97,9 +97,9 @@ func (a *agentkitServerApp) SetupRouters(router *mux.Router, config *apps.RunCon // Wrap it with CORS middleware corsHandler := corsWithArgs(a.GetWebUrl())(apiHandler) - router.Methods("GET", "POST", "DELETE", "OPTIONS").PathPrefix(fmt.Sprintf("%s/", a.ApiPathPrefix)).Handler( - http.StripPrefix(a.ApiPathPrefix, corsHandler), - ) + // Wrap with OpenTelemetry instrumentation first, then add to router + wrappedHandler := observability.HTTPMiddleware(http.StripPrefix(a.ApiPathPrefix, corsHandler)) + router.Methods("GET", "POST", "DELETE", "OPTIONS").PathPrefix(fmt.Sprintf("%s/", a.ApiPathPrefix)).Handler(wrappedHandler) log.Infof(" api: you can access API using %s", a.GetAPIPath()) log.Infof(" api: for instance: %s/list-apps", a.GetAPIPath()) diff --git a/go.mod b/go.mod index f109eb0..199fbe6 100644 --- a/go.mod +++ b/go.mod @@ -16,7 +16,7 @@ require ( github.com/volcengine/volcengine-go-sdk v1.1.53 go.opentelemetry.io/otel v1.40.0 go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploggrpc v0.15.0 - go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploghttp v0.15.0 + go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploghttp v0.16.0 go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.39.0 go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v1.39.0 go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.39.0 @@ -28,8 +28,8 @@ require ( go.opentelemetry.io/otel/sdk/log v0.16.0 go.opentelemetry.io/otel/sdk/metric v1.40.0 go.opentelemetry.io/otel/trace v1.40.0 - golang.org/x/oauth2 v0.32.0 - google.golang.org/adk v0.5.1-0.20260220130408-ecbe1ba152ed + golang.org/x/oauth2 v0.34.0 + google.golang.org/adk v0.5.1-0.20260223191314-02e275dd84a3 google.golang.org/genai v1.40.0 gopkg.in/go-playground/validator.v8 v8.18.2 gopkg.in/yaml.v3 v3.0.1 @@ -41,7 +41,7 @@ require ( cloud.google.com/go v0.123.0 // indirect cloud.google.com/go/auth v0.17.0 // indirect cloud.google.com/go/compute/metadata v0.9.0 // indirect - github.com/GoogleCloudPlatform/opentelemetry-operations-go/detectors/gcp v1.30.0 // indirect + github.com/GoogleCloudPlatform/opentelemetry-operations-go/detectors/gcp v1.31.0 // indirect github.com/awalterschulze/gographviz v2.0.3+incompatible // indirect github.com/bluele/gcache v0.0.2 // indirect github.com/bytedance/sonic v1.14.2 // indirect @@ -63,7 +63,7 @@ require ( github.com/googleapis/gax-go/v2 v2.15.0 // indirect github.com/gopherjs/gopherjs v1.17.2 // indirect github.com/gorilla/websocket v1.5.3 // indirect - github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.3 // indirect + github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.7 // indirect github.com/jackc/pgpassfile v1.0.0 // indirect github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 // indirect github.com/jackc/pgx/v5 v5.6.0 // indirect @@ -91,22 +91,22 @@ require ( github.com/yosida95/uritemplate/v3 v3.0.2 // indirect go.mongodb.org/mongo-driver v1.17.6 // indirect go.opentelemetry.io/auto/sdk v1.2.1 // indirect - go.opentelemetry.io/contrib/detectors/gcp v1.38.0 // indirect + go.opentelemetry.io/contrib/detectors/gcp v1.40.0 // indirect go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.63.0 // indirect go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.39.0 // indirect go.opentelemetry.io/otel/log v0.16.0 // indirect go.opentelemetry.io/proto/otlp v1.9.0 // indirect golang.org/x/arch v0.11.0 // indirect - golang.org/x/crypto v0.45.0 // indirect + golang.org/x/crypto v0.47.0 // indirect golang.org/x/exp v0.0.0-20240404231335-c0f41cb1a7a0 // indirect - golang.org/x/net v0.47.0 // indirect - golang.org/x/sync v0.18.0 // indirect + golang.org/x/net v0.49.0 // indirect + golang.org/x/sync v0.19.0 // indirect golang.org/x/sys v0.40.0 // indirect - golang.org/x/text v0.31.0 // indirect - google.golang.org/genproto/googleapis/api v0.0.0-20251202230838-ff82c1b0f217 // indirect - google.golang.org/genproto/googleapis/rpc v0.0.0-20251202230838-ff82c1b0f217 // indirect - google.golang.org/grpc v1.77.0 // indirect - google.golang.org/protobuf v1.36.10 // indirect + golang.org/x/text v0.33.0 // indirect + google.golang.org/genproto/googleapis/api v0.0.0-20260128011058-8636f8732409 // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20260128011058-8636f8732409 // indirect + google.golang.org/grpc v1.78.0 // indirect + google.golang.org/protobuf v1.36.11 // indirect gopkg.in/go-playground/assert.v1 v1.2.1 // indirect gopkg.in/yaml.v2 v2.4.0 // indirect rsc.io/omap v1.2.0 // indirect diff --git a/go.sum b/go.sum index 89aa532..c849602 100644 --- a/go.sum +++ b/go.sum @@ -8,6 +8,8 @@ cloud.google.com/go/compute/metadata v0.9.0/go.mod h1:E0bWwX5wTnLPedCKqk3pJmVgCB github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= github.com/GoogleCloudPlatform/opentelemetry-operations-go/detectors/gcp v1.30.0 h1:sBEjpZlNHzK1voKq9695PJSX2o5NEXl7/OL3coiIY0c= github.com/GoogleCloudPlatform/opentelemetry-operations-go/detectors/gcp v1.30.0/go.mod h1:P4WPRUkOhJC13W//jWpyfJNDAIpvRbAUIYLX/4jtlE0= +github.com/GoogleCloudPlatform/opentelemetry-operations-go/detectors/gcp v1.31.0 h1:DHa2U07rk8syqvCge0QIGMCE1WxGj9njT44GH7zNJLQ= +github.com/GoogleCloudPlatform/opentelemetry-operations-go/detectors/gcp v1.31.0/go.mod h1:P4WPRUkOhJC13W//jWpyfJNDAIpvRbAUIYLX/4jtlE0= github.com/MakeNowJust/heredoc v1.0.0 h1:cXCdzVdstXyiTqTvfqk9SDHpKNjxuom+DOlyEeQ4pzQ= github.com/MakeNowJust/heredoc v1.0.0/go.mod h1:mG5amYoWBHf8vpLOuehzbGGw0EHxpZZ6lCpQ4fNJ8LE= github.com/a2aproject/a2a-go v0.3.3 h1:NqGDw2c8hCSW3/9MakeeRpw5yCZUUmW2Y/yINV15GwQ= @@ -105,6 +107,8 @@ github.com/gorilla/websocket v1.5.3 h1:saDtZ6Pbx/0u+bgYQ3q96pZgCzfhKXGPqt7kZ72aN github.com/gorilla/websocket v1.5.3/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE= github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.3 h1:NmZ1PKzSTQbuGHw9DGPFomqkkLWMC+vZCkfs+FHv1Vg= github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.3/go.mod h1:zQrxl1YP88HQlA6i9c63DSVPFklWpGX4OWAc9bFuaH4= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.7 h1:X+2YciYSxvMQK0UZ7sg45ZVabVZBeBuvMkmuI2V3Fak= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.7/go.mod h1:lW34nIZuQ8UDPdkon5fmfp2l3+ZkQ2me/+oecHYLOII= github.com/jackc/pgpassfile v1.0.0 h1:/6Hmqy13Ss2zCq62VdNG8tM1wchn8zjSGOBJ6icpsIM= github.com/jackc/pgpassfile v1.0.0/go.mod h1:CEx0iS5ambNFdcRtxPj5JhEz+xB6uRky5eyVu/W2HEg= github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 h1:iCEnooe7UlwOQYpKFhBabPMi4aNAfoODPEFNiAnClxo= @@ -205,6 +209,8 @@ go.opentelemetry.io/auto/sdk v1.2.1 h1:jXsnJ4Lmnqd11kwkBV2LgLoFMZKizbCi5fNZ/ipaZ go.opentelemetry.io/auto/sdk v1.2.1/go.mod h1:KRTj+aOaElaLi+wW1kO/DZRXwkF4C5xPbEe3ZiIhN7Y= go.opentelemetry.io/contrib/detectors/gcp v1.38.0 h1:ZoYbqX7OaA/TAikspPl3ozPI6iY6LiIY9I8cUfm+pJs= go.opentelemetry.io/contrib/detectors/gcp v1.38.0/go.mod h1:SU+iU7nu5ud4oCb3LQOhIZ3nRLj6FNVrKgtflbaf2ts= +go.opentelemetry.io/contrib/detectors/gcp v1.40.0 h1:Awaf8gmW99tZTOWqkLCOl6aw1/rxAWVlHsHIZ3fT2sA= +go.opentelemetry.io/contrib/detectors/gcp v1.40.0/go.mod h1:99OY9ZCqyLkzJLTh5XhECpLRSxcZl+ZDKBEO+jMBFR4= go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.63.0 h1:RbKq8BG0FI8OiXhBfcRtqqHcZcka+gU3cskNuf05R18= go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.63.0/go.mod h1:h06DGIukJOevXaj/xrNjhi/2098RZzcLTbc0jDAUbsg= go.opentelemetry.io/otel v1.40.0 h1:oA5YeOcpRTXq6NN7frwmwFR0Cn3RhTVZvXsP4duvCms= @@ -213,6 +219,8 @@ go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploggrpc v0.15.0 h1:W+m0g+/6v go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploggrpc v0.15.0/go.mod h1:JM31r0GGZ/GU94mX8hN4D8v6e40aFlUECSQ48HaLgHM= go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploghttp v0.15.0 h1:EKpiGphOYq3CYnIe2eX9ftUkyU+Y8Dtte8OaWyHJ4+I= go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploghttp v0.15.0/go.mod h1:nWFP7C+T8TygkTjJ7mAyEaFaE7wNfms3nV/vexZ6qt0= +go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploghttp v0.16.0 h1:djrxvDxAe44mJUrKataUbOhCKhR3F8QCyWucO16hTQs= +go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploghttp v0.16.0/go.mod h1:dt3nxpQEiSoKvfTVxp3TUg5fHPLhKtbcnN3Z1I1ePD0= go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.39.0 h1:cEf8jF6WbuGQWUVcqgyWtTR0kOOAWY1DYZ+UhvdmQPw= go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.39.0/go.mod h1:k1lzV5n5U3HkGvTCJHraTAGJ7MqsgL1wrGwTj1Isfiw= go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v1.39.0 h1:nKP4Z2ejtHn3yShBb+2KawiXgpn8In5cT7aO2wXuOTE= @@ -237,6 +245,7 @@ go.opentelemetry.io/otel/sdk/log v0.16.0 h1:e/b4bdlQwC5fnGtG3dlXUrNOnP7c8YLVSpSf go.opentelemetry.io/otel/sdk/log v0.16.0/go.mod h1:JKfP3T6ycy7QEuv3Hj8oKDy7KItrEkus8XJE6EoSzw4= go.opentelemetry.io/otel/sdk/log/logtest v0.14.0 h1:Ijbtz+JKXl8T2MngiwqBlPaHqc4YCaP/i13Qrow6gAM= go.opentelemetry.io/otel/sdk/log/logtest v0.14.0/go.mod h1:dCU8aEL6q+L9cYTqcVOk8rM9Tp8WdnHOPLiBgp0SGOA= +go.opentelemetry.io/otel/sdk/log/logtest v0.16.0 h1:/XVkpZ41rVRTP4DfMgYv1nEtNmf65XPPyAdqV90TMy4= go.opentelemetry.io/otel/sdk/metric v1.40.0 h1:mtmdVqgQkeRxHgRv4qhyJduP3fYJRMX4AtAlbuWdCYw= go.opentelemetry.io/otel/sdk/metric v1.40.0/go.mod h1:4Z2bGMf0KSK3uRjlczMOeMhKU2rhUqdWNoKcYrtcBPg= go.opentelemetry.io/otel/trace v1.40.0 h1:WA4etStDttCSYuhwvEa8OP8I5EWu24lkOzp+ZYblVjw= @@ -250,6 +259,8 @@ golang.org/x/arch v0.11.0/go.mod h1:FEVrYAQjsQXMVJ1nsMoVVXPZg6p2JE2mx8psSWTDQys= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.45.0 h1:jMBrvKuj23MTlT0bQEOBcAE0mjg8mK9RXFhRH6nyF3Q= golang.org/x/crypto v0.45.0/go.mod h1:XTGrrkGJve7CYK7J8PEww4aY7gM3qMCElcJQ8n8JdX4= +golang.org/x/crypto v0.47.0 h1:V6e3FRj+n4dbpw86FJ8Fv7XVOql7TEwpHapKoMJ/GO8= +golang.org/x/crypto v0.47.0/go.mod h1:ff3Y9VzzKbwSSEzWqJsJVBnWmRwRSHt/6Op5n9bQc4A= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20240404231335-c0f41cb1a7a0 h1:985EYyeCOxTpcgOTJpflJUwOeEz0CQOdPt73OzpE9F8= golang.org/x/exp v0.0.0-20240404231335-c0f41cb1a7a0/go.mod h1:/lliqkxwWAhPjf5oSOIJup2XcqJaw8RGS6k3TGEc7GI= @@ -262,15 +273,21 @@ golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73r golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.47.0 h1:Mx+4dIFzqraBXUugkia1OOvlD6LemFo1ALMHjrXDOhY= golang.org/x/net v0.47.0/go.mod h1:/jNxtkgq5yWUGYkaZGqo27cfGZ1c5Nen03aYrrKpVRU= +golang.org/x/net v0.49.0 h1:eeHFmOGUTtaaPSGNmjBKpbng9MulQsJURQUAfUwY++o= +golang.org/x/net v0.49.0/go.mod h1:/ysNB2EvaqvesRkuLAyjI1ycPZlQHM3q01F02UY/MV8= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.32.0 h1:jsCblLleRMDrxMN29H3z/k1KliIvpLgCkE6R8FXXNgY= golang.org/x/oauth2 v0.32.0/go.mod h1:lzm5WQJQwKZ3nwavOZ3IS5Aulzxi68dUSgRHujetwEA= +golang.org/x/oauth2 v0.34.0 h1:hqK/t4AKgbqWkdkcAeI8XLmbK+4m4G5YeQRrmiotGlw= +golang.org/x/oauth2 v0.34.0/go.mod h1:lzm5WQJQwKZ3nwavOZ3IS5Aulzxi68dUSgRHujetwEA= golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.18.0 h1:kr88TuHDroi+UVf+0hZnirlk8o8T+4MrK6mr60WkH/I= golang.org/x/sync v0.18.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI= +golang.org/x/sync v0.19.0 h1:vV+1eWNmZ5geRlYjzm2adRgW2/mcpevXNg50YZtPCE4= +golang.org/x/sync v0.19.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI= golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= @@ -282,6 +299,8 @@ golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= golang.org/x/text v0.31.0 h1:aC8ghyu4JhP8VojJ2lEHBnochRno1sgL6nEi9WGFGMM= golang.org/x/text v0.31.0/go.mod h1:tKRAlv61yKIjGGHX/4tP1LTbc13YSec1pxVEWXzfoeM= +golang.org/x/text v0.33.0 h1:B3njUFyqtHDUI5jMn1YIr5B0IE2U0qck04r6d4KPAxE= +golang.org/x/text v0.33.0/go.mod h1:LuMebE6+rBincTi9+xWTY8TztLzKHc/9C1uBCG27+q8= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY= @@ -294,6 +313,8 @@ gonum.org/v1/gonum v0.16.0 h1:5+ul4Swaf3ESvrOnidPp4GZbzf0mxVQpDCYUQE7OJfk= gonum.org/v1/gonum v0.16.0/go.mod h1:fef3am4MQ93R2HHpKnLk4/Tbh/s0+wqD5nfa6Pnwy4E= google.golang.org/adk v0.5.1-0.20260220130408-ecbe1ba152ed h1:PTGgixN20I1zmA5YWCBX4+3Qzge87ZUnNjHeUsPyimY= google.golang.org/adk v0.5.1-0.20260220130408-ecbe1ba152ed/go.mod h1:W0RyHt+JXfZHA1VnxeGALRZeqAlp54nv2cw7Sn7M5Jc= +google.golang.org/adk v0.5.1-0.20260223191314-02e275dd84a3 h1:kPtfbli/dNIOdJI2I1zCTFa7ZQZqQ0r8Caa7hKywb2I= +google.golang.org/adk v0.5.1-0.20260223191314-02e275dd84a3/go.mod h1:KRMCcfekE0mVn4VyjhIzAalbD1bRaB4TfwqP4Hog3qA= google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= google.golang.org/genai v1.40.0 h1:kYxyQSH+vsib8dvsgyLJzsVEIv5k3ZmHJyVqdvGncmc= @@ -303,13 +324,19 @@ google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98 google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo= google.golang.org/genproto/googleapis/api v0.0.0-20251202230838-ff82c1b0f217 h1:fCvbg86sFXwdrl5LgVcTEvNC+2txB5mgROGmRL5mrls= google.golang.org/genproto/googleapis/api v0.0.0-20251202230838-ff82c1b0f217/go.mod h1:+rXWjjaukWZun3mLfjmVnQi18E1AsFbDN9QdJ5YXLto= +google.golang.org/genproto/googleapis/api v0.0.0-20260128011058-8636f8732409 h1:merA0rdPeUV3YIIfHHcH4qBkiQAc1nfCKSI7lB4cV2M= +google.golang.org/genproto/googleapis/api v0.0.0-20260128011058-8636f8732409/go.mod h1:fl8J1IvUjCilwZzQowmw2b7HQB2eAuYBabMXzWurF+I= google.golang.org/genproto/googleapis/rpc v0.0.0-20251202230838-ff82c1b0f217 h1:gRkg/vSppuSQoDjxyiGfN4Upv/h/DQmIR10ZU8dh4Ww= google.golang.org/genproto/googleapis/rpc v0.0.0-20251202230838-ff82c1b0f217/go.mod h1:7i2o+ce6H/6BluujYR+kqX3GKH+dChPTQU19wjRPiGk= +google.golang.org/genproto/googleapis/rpc v0.0.0-20260128011058-8636f8732409 h1:H86B94AW+VfJWDqFeEbBPhEtHzJwJfTbgE2lZa54ZAQ= +google.golang.org/genproto/googleapis/rpc v0.0.0-20260128011058-8636f8732409/go.mod h1:j9x/tPzZkyxcgEFkiKEEGxfvyumM01BEtsW8xzOahRQ= google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg= google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= google.golang.org/grpc v1.77.0 h1:wVVY6/8cGA6vvffn+wWK5ToddbgdU3d8MNENr4evgXM= google.golang.org/grpc v1.77.0/go.mod h1:z0BY1iVj0q8E1uSQCjL9cppRj+gnZjzDnzV0dHhrNig= +google.golang.org/grpc v1.78.0 h1:K1XZG/yGDJnzMdd/uZHAkVqJE+xIDOcmdSFZkBUicNc= +google.golang.org/grpc v1.78.0/go.mod h1:I47qjTo4OKbMkjA/aOOwxDIiPSBofUtQUI5EfpWvW7U= google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0= google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM= @@ -323,6 +350,8 @@ google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp0 google.golang.org/protobuf v1.31.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= google.golang.org/protobuf v1.36.10 h1:AYd7cD/uASjIL6Q9LiTjz8JLcrh/88q5UObnmY3aOOE= google.golang.org/protobuf v1.36.10/go.mod h1:HTf+CrKn2C3g5S8VImy6tdcUvCska2kB7j23XfzDpco= +google.golang.org/protobuf v1.36.11 h1:fV6ZwhNocDyBLK0dj+fg8ektcVegBBuEolpbTQyBNVE= +google.golang.org/protobuf v1.36.11/go.mod h1:HTf+CrKn2C3g5S8VImy6tdcUvCska2kB7j23XfzDpco= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= diff --git a/observability/http.go b/observability/http.go new file mode 100644 index 0000000..1b5e993 --- /dev/null +++ b/observability/http.go @@ -0,0 +1,90 @@ +// Copyright (c) 2025 Beijing Volcano Engine Technology Co., Ltd. and/or its affiliates. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package observability + +import ( + "context" + "net/http" + + "go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp" + "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/trace" +) + +// HTTPMiddleware returns an HTTP middleware that instruments incoming HTTP requests with OpenTelemetry. +// It creates spans for each HTTP request and propagates trace context. +// +// Usage: +// +// import ( +// "github.com/volcengine/veadk-go/observability" +// ) +// +// // Wrap your handler +// wrappedHandler := observability.HTTPMiddleware(originalHandler) +// http.Handle("/", wrappedHandler) +func HTTPMiddleware(next http.Handler) http.Handler { + return otelhttp.NewHandler( + next, + InstrumentationName, + otelhttp.WithTracerProvider(otel.GetTracerProvider()), + otelhttp.WithPublicEndpoint(), + otelhttp.WithSpanNameFormatter(func(operation string, r *http.Request) string { + return "HTTP " + r.Method + " " + r.URL.Path + }), + ) +} +// StartSpan starts a new span as a child of the span in the context. +// This can be used within an HTTP handler to start a span for a specific operation. +// +// Usage: +// +// func handler(w http.ResponseWriter, r *http.Request) { +// ctx, span := observability.StartSpan(r.Context(), "operation_name") +// defer span.End() +// // ... do work ... +// } +func StartSpan(ctx context.Context, name string, opts ...trace.SpanStartOption) (context.Context, trace.Span) { + tracer := otel.GetTracerProvider().Tracer(InstrumentationName) + return tracer.Start(ctx, name, opts...) +} + +// SetAttributes adds attributes to a span. +func SetAttributes(span trace.Span, attrs ...attribute.KeyValue) { + if span == nil { + return + } + span.SetAttributes(attrs...) +} + +// SetHTTPAttributes adds HTTP-specific attributes to a span. +func SetHTTPAttributes(span trace.Span, method, path, route string) { + if span == nil { + return + } + + attrs := []attribute.KeyValue{ + attribute.String("http.method", method), + attribute.String("http.route", route), + attribute.String("http.target", path), + } + span.SetAttributes(attrs...) +} + +// GetSpanFromContext extracts the span from a context. +func GetSpanFromContext(ctx context.Context) trace.Span { + return trace.SpanFromContext(ctx) +} diff --git a/observability/plugin.go b/observability/plugin.go index 72a7fdc..0de1681 100644 --- a/observability/plugin.go +++ b/observability/plugin.go @@ -106,10 +106,10 @@ func (p *adkObservabilityPlugin) isMetricsEnabled() bool { // BeforeRun is called before an agent run starts. func (p *adkObservabilityPlugin) BeforeRun(ctx agent.InvocationContext) (*genai.Content, error) { log.Debug("Before Run", "InvocationID", ctx.InvocationID(), "SessionID", ctx.Session().ID(), "UserID", ctx.Session().UserID()) - // 1. Start the 'invocation' span + // 1. Start the 'invocation' span - ADK doesn't create this yet + // tracer.Start() automatically uses any existing span from context as parent + // (e.g. spans from HTTP middleware will be the parent) _, span := p.tracer.Start(context.Context(ctx), SpanInvocation, trace.WithSpanKind(trace.SpanKindServer)) - - // 2. Store in state for AfterRun _ = ctx.Session().State().Set(stateKeyInvocationSpan, span) GetRegistry().RegisterInvocationSpan(span) @@ -227,7 +227,6 @@ func (p *adkObservabilityPlugin) AfterRun(ctx agent.InvocationContext) { span.End() } - } // BeforeModel is called before the LLM is called. @@ -235,7 +234,7 @@ func (p *adkObservabilityPlugin) BeforeModel(ctx agent.CallbackContext, req *mod log.Debug("BeforeModel", "InvocationID", ctx.InvocationID(), "SessionID", ctx.SessionID(), "UserID", ctx.UserID(), "AgentName", ctx.AgentName(), "AppName", ctx.AppName()) p.tryBridgeTraceMappingFromCallback(ctx, "BeforeModel") - // New ADK emits model/tool spans natively. Plugin only keeps metadata for metrics and invocation aggregation. + // ADK now emits model spans natively. Plugin only keeps metadata for metrics and invocation aggregation. meta := p.getSpanMetadata(ctx.State()) meta.StartTime = time.Now() meta.PrevPromptTokens = meta.PromptTokens @@ -369,19 +368,23 @@ func mergeUsageTotals(prevPrompt, prevCandidate, prevTotal, currentPrompt, curre // BeforeTool is a lightweight debug-only callback. // Tool span metrics and token estimation are handled in span processor / translator paths. -func (p *adkObservabilityPlugin) BeforeTool(ctx tool.Context, tool tool.Tool, args map[string]any) (map[string]any, error) { +// BeforeTool is a lightweight debug-only callback. +// Tool span metrics and token estimation are handled in span processor / translator paths. +func (p *adkObservabilityPlugin) BeforeTool(ctx tool.Context, t tool.Tool, args map[string]any) (map[string]any, error) { log.Debug("BeforeTool", "InvocationID", ctx.InvocationID(), "SessionID", ctx.SessionID(), "UserID", ctx.UserID(), "AgentName", ctx.AgentName(), "AppName", ctx.AppName(), - "ToolName", tool.Name(), "ToolArgs", args) + "ToolName", t.Name(), "ToolArgs", args) return nil, nil } // AfterTool is a lightweight debug-only callback. // Tool span metrics and token estimation are handled in span processor / translator paths. -func (p *adkObservabilityPlugin) AfterTool(ctx tool.Context, tool tool.Tool, args map[string]any, result map[string]any, err error) (map[string]any, error) { +// AfterTool is a lightweight debug-only callback. +// Tool span metrics and token estimation are handled in span processor / translator paths. +func (p *adkObservabilityPlugin) AfterTool(ctx tool.Context, t tool.Tool, args map[string]any, result map[string]any, err error) (map[string]any, error) { log.Debug("AfterTool", "InvocationID", ctx.InvocationID(), "SessionID", ctx.SessionID(), "UserID", ctx.UserID(), "AgentName", ctx.AgentName(), "AppName", ctx.AppName(), - "ToolName", tool.Name(), "ToolArgs", args, "ToolResult", result, "ToolError", err) + "ToolName", t.Name(), "ToolArgs", args, "ToolResult", result, "ToolError", err) return nil, nil } From 4a3d1a6a437e34fe4b4f665faa9170ee6ca7cf03 Mon Sep 17 00:00:00 2001 From: shunjiazhu Date: Wed, 25 Feb 2026 17:40:06 +0800 Subject: [PATCH 5/8] go mod tiy --- go.mod | 2 +- go.sum | 35 +++-------------------------------- 2 files changed, 4 insertions(+), 33 deletions(-) diff --git a/go.mod b/go.mod index 199fbe6..eb008c2 100644 --- a/go.mod +++ b/go.mod @@ -14,6 +14,7 @@ require ( github.com/stretchr/testify v1.11.1 github.com/volcengine/ve-tos-golang-sdk/v2 v2.7.26 github.com/volcengine/volcengine-go-sdk v1.1.53 + go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.63.0 go.opentelemetry.io/otel v1.40.0 go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploggrpc v0.15.0 go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploghttp v0.16.0 @@ -92,7 +93,6 @@ require ( go.mongodb.org/mongo-driver v1.17.6 // indirect go.opentelemetry.io/auto/sdk v1.2.1 // indirect go.opentelemetry.io/contrib/detectors/gcp v1.40.0 // indirect - go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.63.0 // indirect go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.39.0 // indirect go.opentelemetry.io/otel/log v0.16.0 // indirect go.opentelemetry.io/proto/otlp v1.9.0 // indirect diff --git a/go.sum b/go.sum index c849602..d95dc58 100644 --- a/go.sum +++ b/go.sum @@ -6,8 +6,6 @@ cloud.google.com/go/auth v0.17.0/go.mod h1:6wv/t5/6rOPAX4fJiRjKkJCvswLwdet7G8+UG cloud.google.com/go/compute/metadata v0.9.0 h1:pDUj4QMoPejqq20dK0Pg2N4yG9zIkYGdBtwLoEkH9Zs= cloud.google.com/go/compute/metadata v0.9.0/go.mod h1:E0bWwX5wTnLPedCKqk3pJmVgCBSM6qQI1yTBdEb3C10= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= -github.com/GoogleCloudPlatform/opentelemetry-operations-go/detectors/gcp v1.30.0 h1:sBEjpZlNHzK1voKq9695PJSX2o5NEXl7/OL3coiIY0c= -github.com/GoogleCloudPlatform/opentelemetry-operations-go/detectors/gcp v1.30.0/go.mod h1:P4WPRUkOhJC13W//jWpyfJNDAIpvRbAUIYLX/4jtlE0= github.com/GoogleCloudPlatform/opentelemetry-operations-go/detectors/gcp v1.31.0 h1:DHa2U07rk8syqvCge0QIGMCE1WxGj9njT44GH7zNJLQ= github.com/GoogleCloudPlatform/opentelemetry-operations-go/detectors/gcp v1.31.0/go.mod h1:P4WPRUkOhJC13W//jWpyfJNDAIpvRbAUIYLX/4jtlE0= github.com/MakeNowJust/heredoc v1.0.0 h1:cXCdzVdstXyiTqTvfqk9SDHpKNjxuom+DOlyEeQ4pzQ= @@ -105,8 +103,6 @@ github.com/gorilla/mux v1.8.1 h1:TuBL49tXwgrFYWhqrNgrUNEY92u81SPhu7sTdzQEiWY= github.com/gorilla/mux v1.8.1/go.mod h1:AKf9I4AEqPTmMytcMc0KkNouC66V3BtZ4qD5fmWSiMQ= github.com/gorilla/websocket v1.5.3 h1:saDtZ6Pbx/0u+bgYQ3q96pZgCzfhKXGPqt7kZ72aNNg= github.com/gorilla/websocket v1.5.3/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE= -github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.3 h1:NmZ1PKzSTQbuGHw9DGPFomqkkLWMC+vZCkfs+FHv1Vg= -github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.3/go.mod h1:zQrxl1YP88HQlA6i9c63DSVPFklWpGX4OWAc9bFuaH4= github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.7 h1:X+2YciYSxvMQK0UZ7sg45ZVabVZBeBuvMkmuI2V3Fak= github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.7/go.mod h1:lW34nIZuQ8UDPdkon5fmfp2l3+ZkQ2me/+oecHYLOII= github.com/jackc/pgpassfile v1.0.0 h1:/6Hmqy13Ss2zCq62VdNG8tM1wchn8zjSGOBJ6icpsIM= @@ -207,8 +203,6 @@ go.mongodb.org/mongo-driver v1.17.6 h1:87JUG1wZfWsr6rIz3ZmpH90rL5tea7O3IHuSwHUps go.mongodb.org/mongo-driver v1.17.6/go.mod h1:Hy04i7O2kC4RS06ZrhPRqj/u4DTYkFDAAccj+rVKqgQ= go.opentelemetry.io/auto/sdk v1.2.1 h1:jXsnJ4Lmnqd11kwkBV2LgLoFMZKizbCi5fNZ/ipaZ64= go.opentelemetry.io/auto/sdk v1.2.1/go.mod h1:KRTj+aOaElaLi+wW1kO/DZRXwkF4C5xPbEe3ZiIhN7Y= -go.opentelemetry.io/contrib/detectors/gcp v1.38.0 h1:ZoYbqX7OaA/TAikspPl3ozPI6iY6LiIY9I8cUfm+pJs= -go.opentelemetry.io/contrib/detectors/gcp v1.38.0/go.mod h1:SU+iU7nu5ud4oCb3LQOhIZ3nRLj6FNVrKgtflbaf2ts= go.opentelemetry.io/contrib/detectors/gcp v1.40.0 h1:Awaf8gmW99tZTOWqkLCOl6aw1/rxAWVlHsHIZ3fT2sA= go.opentelemetry.io/contrib/detectors/gcp v1.40.0/go.mod h1:99OY9ZCqyLkzJLTh5XhECpLRSxcZl+ZDKBEO+jMBFR4= go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.63.0 h1:RbKq8BG0FI8OiXhBfcRtqqHcZcka+gU3cskNuf05R18= @@ -217,8 +211,6 @@ go.opentelemetry.io/otel v1.40.0 h1:oA5YeOcpRTXq6NN7frwmwFR0Cn3RhTVZvXsP4duvCms= go.opentelemetry.io/otel v1.40.0/go.mod h1:IMb+uXZUKkMXdPddhwAHm6UfOwJyh4ct1ybIlV14J0g= go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploggrpc v0.15.0 h1:W+m0g+/6v3pa5PgVf2xoFMi5YtNR06WtS7ve5pcvLtM= go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploggrpc v0.15.0/go.mod h1:JM31r0GGZ/GU94mX8hN4D8v6e40aFlUECSQ48HaLgHM= -go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploghttp v0.15.0 h1:EKpiGphOYq3CYnIe2eX9ftUkyU+Y8Dtte8OaWyHJ4+I= -go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploghttp v0.15.0/go.mod h1:nWFP7C+T8TygkTjJ7mAyEaFaE7wNfms3nV/vexZ6qt0= go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploghttp v0.16.0 h1:djrxvDxAe44mJUrKataUbOhCKhR3F8QCyWucO16hTQs= go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploghttp v0.16.0/go.mod h1:dt3nxpQEiSoKvfTVxp3TUg5fHPLhKtbcnN3Z1I1ePD0= go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.39.0 h1:cEf8jF6WbuGQWUVcqgyWtTR0kOOAWY1DYZ+UhvdmQPw= @@ -243,9 +235,8 @@ go.opentelemetry.io/otel/sdk v1.40.0 h1:KHW/jUzgo6wsPh9At46+h4upjtccTmuZCFAc9OJ7 go.opentelemetry.io/otel/sdk v1.40.0/go.mod h1:Ph7EFdYvxq72Y8Li9q8KebuYUr2KoeyHx0DRMKrYBUE= go.opentelemetry.io/otel/sdk/log v0.16.0 h1:e/b4bdlQwC5fnGtG3dlXUrNOnP7c8YLVSpSfEBIkTnI= go.opentelemetry.io/otel/sdk/log v0.16.0/go.mod h1:JKfP3T6ycy7QEuv3Hj8oKDy7KItrEkus8XJE6EoSzw4= -go.opentelemetry.io/otel/sdk/log/logtest v0.14.0 h1:Ijbtz+JKXl8T2MngiwqBlPaHqc4YCaP/i13Qrow6gAM= -go.opentelemetry.io/otel/sdk/log/logtest v0.14.0/go.mod h1:dCU8aEL6q+L9cYTqcVOk8rM9Tp8WdnHOPLiBgp0SGOA= go.opentelemetry.io/otel/sdk/log/logtest v0.16.0 h1:/XVkpZ41rVRTP4DfMgYv1nEtNmf65XPPyAdqV90TMy4= +go.opentelemetry.io/otel/sdk/log/logtest v0.16.0/go.mod h1:iOOPgQr5MY9oac/F5W86mXdeyWZGleIx3uXO98X2R6Y= go.opentelemetry.io/otel/sdk/metric v1.40.0 h1:mtmdVqgQkeRxHgRv4qhyJduP3fYJRMX4AtAlbuWdCYw= go.opentelemetry.io/otel/sdk/metric v1.40.0/go.mod h1:4Z2bGMf0KSK3uRjlczMOeMhKU2rhUqdWNoKcYrtcBPg= go.opentelemetry.io/otel/trace v1.40.0 h1:WA4etStDttCSYuhwvEa8OP8I5EWu24lkOzp+ZYblVjw= @@ -257,8 +248,6 @@ go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= golang.org/x/arch v0.11.0 h1:KXV8WWKCXm6tRpLirl2szsO5j/oOODwZf4hATmGVNs4= golang.org/x/arch v0.11.0/go.mod h1:FEVrYAQjsQXMVJ1nsMoVVXPZg6p2JE2mx8psSWTDQys= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= -golang.org/x/crypto v0.45.0 h1:jMBrvKuj23MTlT0bQEOBcAE0mjg8mK9RXFhRH6nyF3Q= -golang.org/x/crypto v0.45.0/go.mod h1:XTGrrkGJve7CYK7J8PEww4aY7gM3qMCElcJQ8n8JdX4= golang.org/x/crypto v0.47.0 h1:V6e3FRj+n4dbpw86FJ8Fv7XVOql7TEwpHapKoMJ/GO8= golang.org/x/crypto v0.47.0/go.mod h1:ff3Y9VzzKbwSSEzWqJsJVBnWmRwRSHt/6Op5n9bQc4A= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= @@ -271,21 +260,15 @@ golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73r golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= -golang.org/x/net v0.47.0 h1:Mx+4dIFzqraBXUugkia1OOvlD6LemFo1ALMHjrXDOhY= -golang.org/x/net v0.47.0/go.mod h1:/jNxtkgq5yWUGYkaZGqo27cfGZ1c5Nen03aYrrKpVRU= golang.org/x/net v0.49.0 h1:eeHFmOGUTtaaPSGNmjBKpbng9MulQsJURQUAfUwY++o= golang.org/x/net v0.49.0/go.mod h1:/ysNB2EvaqvesRkuLAyjI1ycPZlQHM3q01F02UY/MV8= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= -golang.org/x/oauth2 v0.32.0 h1:jsCblLleRMDrxMN29H3z/k1KliIvpLgCkE6R8FXXNgY= -golang.org/x/oauth2 v0.32.0/go.mod h1:lzm5WQJQwKZ3nwavOZ3IS5Aulzxi68dUSgRHujetwEA= golang.org/x/oauth2 v0.34.0 h1:hqK/t4AKgbqWkdkcAeI8XLmbK+4m4G5YeQRrmiotGlw= golang.org/x/oauth2 v0.34.0/go.mod h1:lzm5WQJQwKZ3nwavOZ3IS5Aulzxi68dUSgRHujetwEA= golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.18.0 h1:kr88TuHDroi+UVf+0hZnirlk8o8T+4MrK6mr60WkH/I= -golang.org/x/sync v0.18.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI= golang.org/x/sync v0.19.0 h1:vV+1eWNmZ5geRlYjzm2adRgW2/mcpevXNg50YZtPCE4= golang.org/x/sync v0.19.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI= golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= @@ -297,8 +280,6 @@ golang.org/x/sys v0.40.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= -golang.org/x/text v0.31.0 h1:aC8ghyu4JhP8VojJ2lEHBnochRno1sgL6nEi9WGFGMM= -golang.org/x/text v0.31.0/go.mod h1:tKRAlv61yKIjGGHX/4tP1LTbc13YSec1pxVEWXzfoeM= golang.org/x/text v0.33.0 h1:B3njUFyqtHDUI5jMn1YIr5B0IE2U0qck04r6d4KPAxE= golang.org/x/text v0.33.0/go.mod h1:LuMebE6+rBincTi9+xWTY8TztLzKHc/9C1uBCG27+q8= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= @@ -306,13 +287,11 @@ golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGm golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY= golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= -golang.org/x/tools v0.38.0 h1:Hx2Xv8hISq8Lm16jvBZ2VQf+RLmbd7wVUsALibYI/IQ= -golang.org/x/tools v0.38.0/go.mod h1:yEsQ/d/YK8cjh0L6rZlY8tgtlKiBNTL14pGDJPJpYQs= +golang.org/x/tools v0.40.0 h1:yLkxfA+Qnul4cs9QA3KnlFu0lVmd8JJfoq+E41uSutA= +golang.org/x/tools v0.40.0/go.mod h1:Ik/tzLRlbscWpqqMRjyWYDisX8bG13FrdXp3o4Sr9lc= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= gonum.org/v1/gonum v0.16.0 h1:5+ul4Swaf3ESvrOnidPp4GZbzf0mxVQpDCYUQE7OJfk= gonum.org/v1/gonum v0.16.0/go.mod h1:fef3am4MQ93R2HHpKnLk4/Tbh/s0+wqD5nfa6Pnwy4E= -google.golang.org/adk v0.5.1-0.20260220130408-ecbe1ba152ed h1:PTGgixN20I1zmA5YWCBX4+3Qzge87ZUnNjHeUsPyimY= -google.golang.org/adk v0.5.1-0.20260220130408-ecbe1ba152ed/go.mod h1:W0RyHt+JXfZHA1VnxeGALRZeqAlp54nv2cw7Sn7M5Jc= google.golang.org/adk v0.5.1-0.20260223191314-02e275dd84a3 h1:kPtfbli/dNIOdJI2I1zCTFa7ZQZqQ0r8Caa7hKywb2I= google.golang.org/adk v0.5.1-0.20260223191314-02e275dd84a3/go.mod h1:KRMCcfekE0mVn4VyjhIzAalbD1bRaB4TfwqP4Hog3qA= google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= @@ -322,19 +301,13 @@ google.golang.org/genai v1.40.0/go.mod h1:A3kkl0nyBjyFlNjgxIwKq70julKbIxpSxqKO5g google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc= google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo= -google.golang.org/genproto/googleapis/api v0.0.0-20251202230838-ff82c1b0f217 h1:fCvbg86sFXwdrl5LgVcTEvNC+2txB5mgROGmRL5mrls= -google.golang.org/genproto/googleapis/api v0.0.0-20251202230838-ff82c1b0f217/go.mod h1:+rXWjjaukWZun3mLfjmVnQi18E1AsFbDN9QdJ5YXLto= google.golang.org/genproto/googleapis/api v0.0.0-20260128011058-8636f8732409 h1:merA0rdPeUV3YIIfHHcH4qBkiQAc1nfCKSI7lB4cV2M= google.golang.org/genproto/googleapis/api v0.0.0-20260128011058-8636f8732409/go.mod h1:fl8J1IvUjCilwZzQowmw2b7HQB2eAuYBabMXzWurF+I= -google.golang.org/genproto/googleapis/rpc v0.0.0-20251202230838-ff82c1b0f217 h1:gRkg/vSppuSQoDjxyiGfN4Upv/h/DQmIR10ZU8dh4Ww= -google.golang.org/genproto/googleapis/rpc v0.0.0-20251202230838-ff82c1b0f217/go.mod h1:7i2o+ce6H/6BluujYR+kqX3GKH+dChPTQU19wjRPiGk= google.golang.org/genproto/googleapis/rpc v0.0.0-20260128011058-8636f8732409 h1:H86B94AW+VfJWDqFeEbBPhEtHzJwJfTbgE2lZa54ZAQ= google.golang.org/genproto/googleapis/rpc v0.0.0-20260128011058-8636f8732409/go.mod h1:j9x/tPzZkyxcgEFkiKEEGxfvyumM01BEtsW8xzOahRQ= google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg= google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= -google.golang.org/grpc v1.77.0 h1:wVVY6/8cGA6vvffn+wWK5ToddbgdU3d8MNENr4evgXM= -google.golang.org/grpc v1.77.0/go.mod h1:z0BY1iVj0q8E1uSQCjL9cppRj+gnZjzDnzV0dHhrNig= google.golang.org/grpc v1.78.0 h1:K1XZG/yGDJnzMdd/uZHAkVqJE+xIDOcmdSFZkBUicNc= google.golang.org/grpc v1.78.0/go.mod h1:I47qjTo4OKbMkjA/aOOwxDIiPSBofUtQUI5EfpWvW7U= google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= @@ -348,8 +321,6 @@ google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpAD google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c= google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= google.golang.org/protobuf v1.31.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= -google.golang.org/protobuf v1.36.10 h1:AYd7cD/uASjIL6Q9LiTjz8JLcrh/88q5UObnmY3aOOE= -google.golang.org/protobuf v1.36.10/go.mod h1:HTf+CrKn2C3g5S8VImy6tdcUvCska2kB7j23XfzDpco= google.golang.org/protobuf v1.36.11 h1:fV6ZwhNocDyBLK0dj+fg8ektcVegBBuEolpbTQyBNVE= google.golang.org/protobuf v1.36.11/go.mod h1:HTf+CrKn2C3g5S8VImy6tdcUvCska2kB7j23XfzDpco= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= From ff84f25c4c45204019bf90925616ab717e773baa Mon Sep 17 00:00:00 2001 From: shunjiazhu Date: Wed, 25 Feb 2026 20:23:32 +0800 Subject: [PATCH 6/8] relink veadk spanID and veadk spanID --- observability/plugin.go | 90 +++++++++++++++++++------------------ observability/registry.go | 23 ++++++++++ observability/translator.go | 10 +++++ 3 files changed, 80 insertions(+), 43 deletions(-) diff --git a/observability/plugin.go b/observability/plugin.go index 0de1681..d7bdbc4 100644 --- a/observability/plugin.go +++ b/observability/plugin.go @@ -107,7 +107,6 @@ func (p *adkObservabilityPlugin) isMetricsEnabled() bool { func (p *adkObservabilityPlugin) BeforeRun(ctx agent.InvocationContext) (*genai.Content, error) { log.Debug("Before Run", "InvocationID", ctx.InvocationID(), "SessionID", ctx.Session().ID(), "UserID", ctx.Session().UserID()) // 1. Start the 'invocation' span - ADK doesn't create this yet - // tracer.Start() automatically uses any existing span from context as parent // (e.g. spans from HTTP middleware will be the parent) _, span := p.tracer.Start(context.Context(ctx), SpanInvocation, trace.WithSpanKind(trace.SpanKindServer)) _ = ctx.Session().State().Set(stateKeyInvocationSpan, span) @@ -116,6 +115,11 @@ func (p *adkObservabilityPlugin) BeforeRun(ctx agent.InvocationContext) (*genai. setCommonAttributesFromInvocation(ctx, span) setWorkflowAttributes(span) + // 2. Link the adk trace ID to our invocation span + // This makes invoke_agent span's parent point to our invocation span + if adkSpan := trace.SpanFromContext(context.Context(ctx)); adkSpan.SpanContext().IsValid() { + GetRegistry().RegisterInvocationSpanContext(adkSpan.SpanContext().TraceID(), span.SpanContext()) + } // Record start time for metrics meta := &spanMetadata{ StartTime: time.Now(), @@ -229,6 +233,40 @@ func (p *adkObservabilityPlugin) AfterRun(ctx agent.InvocationContext) { } } +// BeforeAgent is called before an agent execution. +// This is the primary trace-bridging point for adk trace -> veadk invocation trace. +// BeforeModel keeps an idempotent bridge as a secondary safety net. +func (p *adkObservabilityPlugin) BeforeAgent(ctx agent.CallbackContext) (*genai.Content, error) { + log.Debug("BeforeAgent", + "InvocationID", ctx.InvocationID(), "SessionID", ctx.SessionID(), "UserID", ctx.UserID(), "AgentName", ctx.AgentName(), "AppName", ctx.AppName()) + p.tryBridgeTraceMappingFromCallback(ctx, "BeforeAgent") + return nil, nil +} + +func (p *adkObservabilityPlugin) tryBridgeTraceMappingFromCallback(ctx agent.CallbackContext, stage string) { + adkSC := trace.SpanFromContext(context.Context(ctx)).SpanContext() + veadkInvocationSC, ok := getInvocationSpanContextFromState(ctx.State()) + if !ok { + log.Debug("Skip trace mapping bridge: invocation span missing in state", "stage", stage) + return + } + + if registerTraceMappingIfPossible(GetRegistry(), adkSC, veadkInvocationSC) { + log.Debug("Bridged adk trace to veadk invocation trace", + "stage", stage, + "adk_trace_id", adkSC.TraceID().String(), + "veadk_trace_id", veadkInvocationSC.TraceID().String(), + ) + } +} + +// AfterAgent is called after an agent execution. +func (p *adkObservabilityPlugin) AfterAgent(ctx agent.CallbackContext) (*genai.Content, error) { + log.Debug("AfterAgent", + "InvocationID", ctx.InvocationID(), "SessionID", ctx.SessionID(), "UserID", ctx.UserID(), "AgentName", ctx.AgentName(), "AppName", ctx.AppName()) + return nil, nil +} + // BeforeModel is called before the LLM is called. func (p *adkObservabilityPlugin) BeforeModel(ctx agent.CallbackContext, req *model.LLMRequest) (*model.LLMResponse, error) { log.Debug("BeforeModel", @@ -389,31 +427,16 @@ func (p *adkObservabilityPlugin) AfterTool(ctx tool.Context, t tool.Tool, args m return nil, nil } -// BeforeAgent is called before an agent execution. -// This is the primary trace-bridging point for adk trace -> veadk invocation trace. -// BeforeModel keeps an idempotent bridge as a secondary safety net. -func (p *adkObservabilityPlugin) BeforeAgent(ctx agent.CallbackContext) (*genai.Content, error) { - log.Debug("BeforeAgent", - "InvocationID", ctx.InvocationID(), "SessionID", ctx.SessionID(), "UserID", ctx.UserID(), "AgentName", ctx.AgentName(), "AppName", ctx.AppName()) - p.tryBridgeTraceMappingFromCallback(ctx, "BeforeAgent") - return nil, nil -} - -func (p *adkObservabilityPlugin) tryBridgeTraceMappingFromCallback(ctx agent.CallbackContext, stage string) { - adkSC := trace.SpanFromContext(context.Context(ctx)).SpanContext() - veadkInvocationSC, ok := getInvocationSpanContextFromState(ctx.State()) - if !ok { - log.Debug("Skip trace mapping bridge: invocation span missing in state", "stage", stage) - return +func (p *adkObservabilityPlugin) getSpanMetadata(state session.State) *spanMetadata { + val, _ := state.Get(stateKeyMetadata) + if meta, ok := val.(*spanMetadata); ok { + return meta } + return &spanMetadata{} +} - if registerTraceMappingIfPossible(GetRegistry(), adkSC, veadkInvocationSC) { - log.Debug("Bridged adk trace to veadk invocation trace", - "stage", stage, - "adk_trace_id", adkSC.TraceID().String(), - "veadk_trace_id", veadkInvocationSC.TraceID().String(), - ) - } +func (p *adkObservabilityPlugin) storeSpanMetadata(state session.State, meta *spanMetadata) { + _ = state.Set(stateKeyMetadata, meta) } func registerTraceMappingIfPossible(registry *TraceRegistry, adkSC, veadkSC trace.SpanContext) bool { @@ -436,25 +459,6 @@ func getInvocationSpanContextFromState(state session.State) (trace.SpanContext, return trace.SpanContext{}, false } -// AfterAgent is called after an agent execution. -func (p *adkObservabilityPlugin) AfterAgent(ctx agent.CallbackContext) (*genai.Content, error) { - log.Debug("AfterAgent", - "InvocationID", ctx.InvocationID(), "SessionID", ctx.SessionID(), "UserID", ctx.UserID(), "AgentName", ctx.AgentName(), "AppName", ctx.AppName()) - return nil, nil -} - -func (p *adkObservabilityPlugin) getSpanMetadata(state session.State) *spanMetadata { - val, _ := state.Get(stateKeyMetadata) - if meta, ok := val.(*spanMetadata); ok { - return meta - } - return &spanMetadata{} -} - -func (p *adkObservabilityPlugin) storeSpanMetadata(state session.State, meta *spanMetadata) { - _ = state.Set(stateKeyMetadata, meta) -} - const ( stateKeyInvocationSpan = "veadk.observability.invocation_span" diff --git a/observability/registry.go b/observability/registry.go index 353fc91..a377f3a 100644 --- a/observability/registry.go +++ b/observability/registry.go @@ -62,6 +62,7 @@ type toolCallInfo struct { type traceInfos struct { veadkTraceID trace.TraceID + invocationSC trace.SpanContext toolCallIDs []string } @@ -260,3 +261,25 @@ func (r *TraceRegistry) EndAllInvocationSpans() { return true }) } + +// RegisterInvocationSpanContext links an adk TraceID to a VeADK invocation span context. +// This allows us to set the invoke_agent span's parent to our invocation span in the translator. +func (r *TraceRegistry) RegisterInvocationSpanContext(adkTraceID trace.TraceID, invocationSC trace.SpanContext) { + if !adkTraceID.IsValid() || !invocationSC.IsValid() { + return + } + res := r.getOrCreateTraceInfos(adkTraceID) + r.resourcesMu.Lock() + defer r.resourcesMu.Unlock() + res.invocationSC = invocationSC +} + +// GetInvocationSpanContext gets the VeADK invocation span context for an adk TraceID. +func (r *TraceRegistry) GetInvocationSpanContext(adkTraceID trace.TraceID) (trace.SpanContext, bool) { + r.resourcesMu.RLock() + defer r.resourcesMu.RUnlock() + if res, ok := r.adkTraceToVeadkTraceMap[adkTraceID]; ok && res.invocationSC.IsValid() { + return res.invocationSC, true + } + return trace.SpanContext{}, false +} diff --git a/observability/translator.go b/observability/translator.go index 154ceae..f01b67d 100644 --- a/observability/translator.go +++ b/observability/translator.go @@ -444,11 +444,21 @@ func (p *translatedSpan) Parent() oteltrace.SpanContext { parent := p.ReadOnlySpan.Parent() registry := GetRegistry() + // 1. Check if this is an invoke_agent span - link to our invocation span if available + if classifyTranslatedSpanKind(p.ReadOnlySpan.Name()) == translatedSpanAgent { + adkTraceID := p.ReadOnlySpan.SpanContext().TraceID() + if invocationSC, ok := registry.GetInvocationSpanContext(adkTraceID); ok { + return invocationSC + } + } + + // 2. Check for tool call ID mapping toolCallID := findToolCallID(p.ReadOnlySpan.Attributes()) if remapped, ok := tryParentByToolCallID(registry, toolCallID); ok { return remapped } + // 3. Check for trace ID mapping if remapped, ok := tryParentByTraceID(registry, parent); ok { return remapped } From 3103622dce415e79c057daf87d817bbd2bb7c8e0 Mon Sep 17 00:00:00 2001 From: shunjiazhu Date: Wed, 25 Feb 2026 21:00:58 +0800 Subject: [PATCH 7/8] add gen_ai.response.model since adk-go removed it --- observability/translator.go | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/observability/translator.go b/observability/translator.go index f01b67d..fc6e51b 100644 --- a/observability/translator.go +++ b/observability/translator.go @@ -130,6 +130,15 @@ func (p *translatedSpan) Attributes() []attribute.KeyValue { newAttrs = p.appendToolReconstructedAttributes(kind, newAttrs, raw) newAttrs = appendToolSpanKindAttribute(newAttrs, raw) + // If it's an LLM span and has request model but no response model, set response model to request model + if kind == translatedSpanLLM { + reqModel := getStringAttrFromList(newAttrs, AttrGenAIRequestModel, "") + respModel := getStringAttrFromList(newAttrs, AttrGenAIResponseModel, "") + if reqModel != "" && respModel == "" { + newAttrs = append(newAttrs, attribute.String(AttrGenAIResponseModel, reqModel)) + } + } + return newAttrs } @@ -496,3 +505,15 @@ func (p *translatedSpan) InstrumentationScope() instrumentation.Scope { func (p *translatedSpan) InstrumentationLibrary() instrumentation.Scope { return p.InstrumentationScope() } + +func getStringAttrFromList(attrs []attribute.KeyValue, key, fallback string) string { + for _, kv := range attrs { + if string(kv.Key) == key { + v := kv.Value.AsString() + if v != "" { + return v + } + } + } + return fallback +} From 0bf3d537917561112d459d97d92e129358e29de9 Mon Sep 17 00:00:00 2001 From: shunjiazhu Date: Wed, 25 Feb 2026 22:25:09 +0800 Subject: [PATCH 8/8] add total token for generate_content span --- observability/translator.go | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/observability/translator.go b/observability/translator.go index fc6e51b..0bc4d98 100644 --- a/observability/translator.go +++ b/observability/translator.go @@ -137,6 +137,13 @@ func (p *translatedSpan) Attributes() []attribute.KeyValue { if reqModel != "" && respModel == "" { newAttrs = append(newAttrs, attribute.String(AttrGenAIResponseModel, reqModel)) } + // Calculate total tokens if input and output tokens are present but total is missing + inputTokens := getIntAttrFromList(newAttrs, AttrGenAIUsageInputTokens, 0) + outputTokens := getIntAttrFromList(newAttrs, AttrGenAIUsageOutputTokens, 0) + hasTotal := hasAttrFromList(newAttrs, AttrGenAIUsageTotalTokens) + if inputTokens > 0 && outputTokens > 0 && !hasTotal { + newAttrs = append(newAttrs, attribute.Int64(AttrGenAIUsageTotalTokens, inputTokens+outputTokens)) + } } return newAttrs @@ -517,3 +524,21 @@ func getStringAttrFromList(attrs []attribute.KeyValue, key, fallback string) str } return fallback } + +func getIntAttrFromList(attrs []attribute.KeyValue, key string, fallback int64) int64 { + for _, kv := range attrs { + if string(kv.Key) == key { + return kv.Value.AsInt64() + } + } + return fallback +} + +func hasAttrFromList(attrs []attribute.KeyValue, key string) bool { + for _, kv := range attrs { + if string(kv.Key) == key { + return true + } + } + return false +}