From 35adf1ad2fb2cfb09edbba9f45d438621db49dd5 Mon Sep 17 00:00:00 2001 From: Derk Schooltink Date: Tue, 25 Nov 2025 15:40:15 +0100 Subject: [PATCH 01/23] implement valkey using glide valkey SDK --- .gitignore | 1 + backend/valkey/activity.go | 85 +++++ backend/valkey/delete.go | 33 ++ backend/valkey/diagnostics.go | 114 ++++++ backend/valkey/events.go | 30 ++ backend/valkey/events_future.go | 27 ++ backend/valkey/expire.go | 38 ++ backend/valkey/instance.go | 249 +++++++++++++ backend/valkey/keys.go | 73 ++++ backend/valkey/options.go | 59 +++ backend/valkey/queue.go | 284 +++++++++++++++ .../scripts/cancel_workflow_instance.lua | 23 ++ .../valkey/scripts/complete_activity_task.lua | 43 +++ .../valkey/scripts/complete_workflow_task.lua | 234 ++++++++++++ .../scripts/create_workflow_instance.lua | 65 ++++ backend/valkey/scripts/delete_instance.lua | 14 + .../scripts/expire_workflow_instance.lua | 29 ++ backend/valkey/scripts/queue/complete.lua | 21 ++ backend/valkey/scripts/queue/enqueue.lua | 13 + backend/valkey/scripts/queue/prepare.lua | 30 ++ backend/valkey/scripts/queue/recover.lua | 16 + backend/valkey/scripts/queue/size.lua | 13 + .../valkey/scripts/schedule_future_events.lua | 39 ++ backend/valkey/scripts/signal_workflow.lua | 35 ++ backend/valkey/signal.go | 61 ++++ backend/valkey/stats.go | 38 ++ backend/valkey/valkey.go | 140 ++++++++ backend/valkey/workflow.go | 335 ++++++++++++++++++ go.mod | 2 + go.sum | 6 +- 30 files changed, 2148 insertions(+), 2 deletions(-) create mode 100644 backend/valkey/activity.go create mode 100644 backend/valkey/delete.go create mode 100644 backend/valkey/diagnostics.go create mode 100644 backend/valkey/events.go create mode 100644 backend/valkey/events_future.go create mode 100644 backend/valkey/expire.go create mode 100644 backend/valkey/instance.go create mode 100644 backend/valkey/keys.go create mode 100644 backend/valkey/options.go create mode 100644 backend/valkey/queue.go create mode 100644 backend/valkey/scripts/cancel_workflow_instance.lua create mode 100644 backend/valkey/scripts/complete_activity_task.lua create mode 100644 backend/valkey/scripts/complete_workflow_task.lua create mode 100644 backend/valkey/scripts/create_workflow_instance.lua create mode 100644 backend/valkey/scripts/delete_instance.lua create mode 100644 backend/valkey/scripts/expire_workflow_instance.lua create mode 100644 backend/valkey/scripts/queue/complete.lua create mode 100644 backend/valkey/scripts/queue/enqueue.lua create mode 100644 backend/valkey/scripts/queue/prepare.lua create mode 100644 backend/valkey/scripts/queue/recover.lua create mode 100644 backend/valkey/scripts/queue/size.lua create mode 100644 backend/valkey/scripts/schedule_future_events.lua create mode 100644 backend/valkey/scripts/signal_workflow.lua create mode 100644 backend/valkey/signal.go create mode 100644 backend/valkey/stats.go create mode 100644 backend/valkey/valkey.go create mode 100644 backend/valkey/workflow.go diff --git a/.gitignore b/.gitignore index 4f21a677..17f7e9d3 100644 --- a/.gitignore +++ b/.gitignore @@ -12,3 +12,4 @@ custom-gcl # Docs build artifacts docs/build/ +.aider* diff --git a/backend/valkey/activity.go b/backend/valkey/activity.go new file mode 100644 index 00000000..733a5123 --- /dev/null +++ b/backend/valkey/activity.go @@ -0,0 +1,85 @@ +package valkey + +import ( + "context" + "fmt" + + "github.com/cschleiden/go-workflows/backend" + "github.com/cschleiden/go-workflows/backend/history" + "github.com/cschleiden/go-workflows/workflow" + "github.com/valkey-io/valkey-glide/go/v2/options" +) + +func (vb *valkeyBackend) PrepareActivityQueues(ctx context.Context, queues []workflow.Queue) error { + return vb.activityQueue.Prepare(ctx, vb.client, queues) +} + +func (vb *valkeyBackend) GetActivityTask(ctx context.Context, queues []workflow.Queue) (*backend.ActivityTask, error) { + activityTask, err := vb.activityQueue.Dequeue(ctx, vb.client, queues, vb.options.ActivityLockTimeout, vb.options.BlockTimeout) + if err != nil { + return nil, err + } + + if activityTask == nil { + return nil, nil + } + + return &backend.ActivityTask{ + WorkflowInstance: activityTask.Data.Instance, + Queue: workflow.Queue(activityTask.Data.Queue), + ID: activityTask.TaskID, + ActivityID: activityTask.Data.ID, + Event: activityTask.Data.Event, + }, nil +} + +func (vb *valkeyBackend) ExtendActivityTask(ctx context.Context, task *backend.ActivityTask) error { + if err := vb.activityQueue.Extend(ctx, vb.client, task.Queue, task.ID); err != nil { + return err + } + + return nil +} + +func (vb *valkeyBackend) CompleteActivityTask(ctx context.Context, task *backend.ActivityTask, result *history.Event) error { + instanceState, err := readInstance(ctx, vb.client, vb.keys.instanceKey(task.WorkflowInstance)) + if err != nil { + return err + } + + // Marshal event data + eventData, payload, err := marshalEvent(result) + if err != nil { + return err + } + + activityQueueKeys := vb.activityQueue.Keys(task.Queue) + workflowQueueKeys := vb.workflowQueue.Keys(workflow.Queue(instanceState.Queue)) + + _, err = vb.client.InvokeScriptWithOptions(ctx, completeActivityTaskScript, options.ScriptOptions{ + Keys: []string{ + activityQueueKeys.SetKey, + activityQueueKeys.StreamKey, + vb.keys.pendingEventsKey(task.WorkflowInstance), + vb.keys.payloadKey(task.WorkflowInstance), + vb.workflowQueue.queueSetKey, + workflowQueueKeys.SetKey, + workflowQueueKeys.StreamKey, + }, + Args: []string{ + task.ID, + vb.activityQueue.groupName, + result.ID, + eventData, + payload, + vb.workflowQueue.groupName, + instanceSegment(task.WorkflowInstance), + }, + }) + + if err != nil { + return fmt.Errorf("completing activity task: %w", err) + } + + return nil +} diff --git a/backend/valkey/delete.go b/backend/valkey/delete.go new file mode 100644 index 00000000..4cdbbb3a --- /dev/null +++ b/backend/valkey/delete.go @@ -0,0 +1,33 @@ +package valkey + +import ( + "context" + "fmt" + + "github.com/cschleiden/go-workflows/core" + "github.com/valkey-io/valkey-glide/go/v2/options" +) + +// deleteInstance deletes an instance from Valkey. It does not attempt to remove any future events or pending +// workflow tasks. It's assumed that the instance is in the finished state. +// +// Note: might want to revisit this in the future if we want to support removing hung instances. +func (vb *valkeyBackend) deleteInstance(ctx context.Context, instance *core.WorkflowInstance) error { + _, err := vb.client.InvokeScriptWithOptions(ctx, deleteInstanceScript, options.ScriptOptions{ + Keys: []string{ + vb.keys.instanceKey(instance), + vb.keys.pendingEventsKey(instance), + vb.keys.historyKey(instance), + vb.keys.payloadKey(instance), + vb.keys.activeInstanceExecutionKey(instance.InstanceID), + vb.keys.instancesByCreation(), + }, + Args: []string{instanceSegment(instance)}, + }) + + if err != nil { + return fmt.Errorf("failed to delete instance: %w", err) + } + + return nil +} diff --git a/backend/valkey/diagnostics.go b/backend/valkey/diagnostics.go new file mode 100644 index 00000000..6fc0b575 --- /dev/null +++ b/backend/valkey/diagnostics.go @@ -0,0 +1,114 @@ +package valkey + +import ( + "context" + "encoding/json" + "fmt" + + "github.com/cschleiden/go-workflows/core" + "github.com/cschleiden/go-workflows/diag" + "github.com/cschleiden/go-workflows/internal/log" + "github.com/valkey-io/valkey-glide/go/v2/constants" + "github.com/valkey-io/valkey-glide/go/v2/options" +) + +var _ diag.Backend = (*valkeyBackend)(nil) + +func (vb *valkeyBackend) GetWorkflowInstances(ctx context.Context, afterInstanceID, afterExecutionID string, count int) ([]*diag.WorkflowInstanceRef, error) { + start := options.NewInclusiveScoreBoundary(0) + end := options.NewInfiniteScoreBoundary(constants.PositiveInfinity) + + zrangeInput := &options.RangeByScore{ + Start: start, + End: end, + Reverse: true, + Limit: &options.Limit{ + Offset: 0, + Count: int64(count), + }, + } + + if afterInstanceID != "" { + afterSegmentID := instanceSegment(core.NewWorkflowInstance(afterInstanceID, afterExecutionID)) + scores, err := vb.client.ZMScore(ctx, vb.keys.instancesByCreation(), []string{afterSegmentID}) + if err != nil { + return nil, fmt.Errorf("getting instance score for %v: %w", afterSegmentID, err) + } + + if len(scores) == 0 { + vb.Options().Logger.Error("could not find instance %v", + log.NamespaceKey+".valkey.afterInstanceID", afterInstanceID, + log.NamespaceKey+".valkey.afterExecutionID", afterExecutionID, + ) + return nil, nil + } + + end := options.NewScoreBoundary(scores[0].Value(), false) + zrangeInput.End = end + } + + instanceSegments, err := vb.client.ZRange(ctx, vb.keys.instancesByCreation(), zrangeInput) + if err != nil { + return nil, fmt.Errorf("getting instances: %w", err) + } + + if len(instanceSegments) == 0 { + return nil, nil + } + + instanceKeys := make([]string, 0) + for _, r := range instanceSegments { + instanceKeys = append(instanceKeys, vb.keys.instanceKeyFromSegment(r)) + } + + instances, err := vb.client.MGet(ctx, instanceKeys) + if err != nil { + return nil, fmt.Errorf("getting instances: %w", err) + } + + instanceRefs := make([]*diag.WorkflowInstanceRef, 0, len(instances)) + for _, instance := range instances { + if instance.IsNil() { + continue + } + + var state instanceState + if err := json.Unmarshal([]byte(instance.Value()), &state); err != nil { + return nil, fmt.Errorf("unmarshaling instance state: %w", err) + } + + instanceRefs = append(instanceRefs, &diag.WorkflowInstanceRef{ + Instance: state.Instance, + CreatedAt: state.CreatedAt, + CompletedAt: state.CompletedAt, + State: state.State, + Queue: state.Queue, + }) + } + + return instanceRefs, nil +} + +func (vb *valkeyBackend) GetWorkflowInstance(ctx context.Context, instance *core.WorkflowInstance) (*diag.WorkflowInstanceRef, error) { + instanceState, err := readInstance(ctx, vb.client, vb.keys.instanceKey(instance)) + if err != nil { + return nil, err + } + + return mapWorkflowInstance(instanceState), nil +} + +func (vb *valkeyBackend) GetWorkflowTree(ctx context.Context, instance *core.WorkflowInstance) (*diag.WorkflowInstanceTree, error) { + itb := diag.NewInstanceTreeBuilder(vb) + return itb.BuildWorkflowInstanceTree(ctx, instance) +} + +func mapWorkflowInstance(instance *instanceState) *diag.WorkflowInstanceRef { + return &diag.WorkflowInstanceRef{ + Instance: instance.Instance, + CreatedAt: instance.CreatedAt, + CompletedAt: instance.CompletedAt, + State: instance.State, + Queue: instance.Queue, + } +} diff --git a/backend/valkey/events.go b/backend/valkey/events.go new file mode 100644 index 00000000..d34dae08 --- /dev/null +++ b/backend/valkey/events.go @@ -0,0 +1,30 @@ +package valkey + +import ( + "encoding/json" + + "github.com/cschleiden/go-workflows/backend/history" +) + +type eventWithoutAttributes struct { + *history.Event +} + +func (e *eventWithoutAttributes) MarshalJSON() ([]byte, error) { + return json.Marshal(&struct { + *history.Event + Attributes interface{} `json:"attr"` + }{ + Event: e.Event, + Attributes: nil, + }) +} + +func marshalEventWithoutAttributes(event *history.Event) (string, error) { + data, err := json.Marshal(&eventWithoutAttributes{event}) + if err != nil { + return "", err + } + + return string(data), nil +} diff --git a/backend/valkey/events_future.go b/backend/valkey/events_future.go new file mode 100644 index 00000000..1dd5ab95 --- /dev/null +++ b/backend/valkey/events_future.go @@ -0,0 +1,27 @@ +package valkey + +import ( + "context" + "fmt" + "strconv" + "time" + + "github.com/valkey-io/valkey-glide/go/v2/options" +) + +func scheduleFutureEvents(ctx context.Context, vb *valkeyBackend) error { + now := time.Now().UnixMilli() + nowStr := strconv.FormatInt(now, 10) + _, err := vb.client.InvokeScriptWithOptions(ctx, futureEventsScript, options.ScriptOptions{ + Keys: []string{ + vb.keys.futureEventsKey(), + }, + Args: []string{nowStr, vb.keys.prefix}, + }) + + if err != nil { + return fmt.Errorf("checking future events: %w", err) + } + + return nil +} diff --git a/backend/valkey/expire.go b/backend/valkey/expire.go new file mode 100644 index 00000000..ce167caf --- /dev/null +++ b/backend/valkey/expire.go @@ -0,0 +1,38 @@ +package valkey + +import ( + "context" + "fmt" + "strconv" + "time" + + "github.com/cschleiden/go-workflows/core" + "github.com/valkey-io/valkey-glide/go/v2/options" +) + +func (vb *valkeyBackend) setWorkflowInstanceExpiration(ctx context.Context, instance *core.WorkflowInstance, expiration time.Duration) error { + now := time.Now().UnixMilli() + nowStr := strconv.FormatInt(now, 10) + + exp := time.Now().Add(expiration).UnixMilli() + expStr := strconv.FormatInt(exp, 10) + + _, err := vb.client.InvokeScriptWithOptions(ctx, expireWorkflowInstanceScript, options.ScriptOptions{ + Keys: []string{ + vb.keys.instancesByCreation(), + vb.keys.instancesExpiring(), + vb.keys.instanceKey(instance), + vb.keys.pendingEventsKey(instance), + vb.keys.historyKey(instance), + vb.keys.payloadKey(instance), + }, + Args: []string{ + nowStr, + fmt.Sprintf("%.0f", expiration.Seconds()), + expStr, + instanceSegment(instance), + }, + }) + + return err +} diff --git a/backend/valkey/instance.go b/backend/valkey/instance.go new file mode 100644 index 00000000..e816e737 --- /dev/null +++ b/backend/valkey/instance.go @@ -0,0 +1,249 @@ +package valkey + +import ( + "context" + "encoding/json" + "fmt" + "strconv" + "time" + + "github.com/cschleiden/go-workflows/backend" + "github.com/cschleiden/go-workflows/backend/history" + "github.com/cschleiden/go-workflows/backend/metadata" + "github.com/cschleiden/go-workflows/core" + "github.com/cschleiden/go-workflows/workflow" + "github.com/valkey-io/valkey-glide/go/v2" + "github.com/valkey-io/valkey-glide/go/v2/constants" + "github.com/valkey-io/valkey-glide/go/v2/options" +) + +func (vb *valkeyBackend) CreateWorkflowInstance(ctx context.Context, instance *workflow.Instance, event *history.Event) error { + a := event.Attributes.(*history.ExecutionStartedAttributes) + + instanceState, err := json.Marshal(&instanceState{ + Queue: string(a.Queue), + Instance: instance, + State: core.WorkflowInstanceStateActive, + Metadata: a.Metadata, + CreatedAt: time.Now(), + }) + if err != nil { + return fmt.Errorf("marshaling instance state: %w", err) + } + + activeInstance, err := json.Marshal(instance) + if err != nil { + return fmt.Errorf("marshaling instance: %w", err) + } + + eventData, payloadData, err := marshalEvent(event) + if err != nil { + return err + } + + keyInfo := vb.workflowQueue.Keys(a.Queue) + + // Execute Lua script for atomic creation + result, err := vb.client.InvokeScriptWithOptions(ctx, createWorkflowInstanceScript, options.ScriptOptions{ + Keys: []string{ + vb.keys.instanceKey(instance), + vb.keys.activeInstanceExecutionKey(instance.InstanceID), + vb.keys.pendingEventsKey(instance), + vb.keys.payloadKey(instance), + vb.keys.instancesActive(), + vb.keys.instancesByCreation(), + keyInfo.SetKey, + keyInfo.StreamKey, + vb.workflowQueue.queueSetKey, + }, + Args: []string{ + instanceSegment(instance), + string(instanceState), + string(activeInstance), + event.ID, + eventData, + payloadData, + fmt.Sprintf("%d", time.Now().UTC().UnixNano()), + }, + }) + + if err != nil { + if err.Error() == "ERR InstanceAlreadyExists" { + return backend.ErrInstanceAlreadyExists + } + return fmt.Errorf("creating workflow instance: %w", err) + } + + if result == nil { + return fmt.Errorf("unexpected nil result from create workflow instance script") + } + + return nil +} + +func (vb *valkeyBackend) GetWorkflowInstanceHistory(ctx context.Context, instance *core.WorkflowInstance, lastSequenceID *int64) ([]*history.Event, error) { + boundary := options.NewInfiniteStreamBoundary(constants.NegativeInfinity) + if lastSequenceID != nil { + boundary = options.NewStreamBoundary(strconv.FormatInt(*lastSequenceID, 10), false) + } + + msgs, err := vb.client.XRange(ctx, vb.keys.historyKey(instance), boundary, "+") + if err != nil { + return nil, err + } + + payloadKeys := make([]string, 0, len(msgs)) + events := make([]*history.Event, 0, len(msgs)) + for _, msg := range msgs { + var eventStr string + for _, field := range msg.Fields { + if field.Field == "event" { + eventStr = field.Value + break + } + } + if eventStr == "" { + continue + } + + var event *history.Event + if err := json.Unmarshal([]byte(eventStr), &event); err != nil { + return nil, fmt.Errorf("unmarshaling event: %w", err) + } + + payloadKeys = append(payloadKeys, event.ID) + events = append(events, event) + } + + if len(payloadKeys) > 0 { + res, err := vb.client.HMGet(ctx, vb.keys.payloadKey(instance), payloadKeys) + if err != nil { + return nil, fmt.Errorf("reading payloads: %w", err) + } + + for i, event := range events { + event.Attributes, err = history.DeserializeAttributes(event.Type, []byte(res[i].Value())) + if err != nil { + return nil, fmt.Errorf("deserializing attributes for event %v: %w", event.Type, err) + } + } + } + + return events, nil +} + +func (vb *valkeyBackend) GetWorkflowInstanceState(ctx context.Context, instance *core.WorkflowInstance) (core.WorkflowInstanceState, error) { + instanceState, err := readInstance(ctx, vb.client, vb.keys.instanceKey(instance)) + if err != nil { + return core.WorkflowInstanceStateActive, err + } + + return instanceState.State, nil +} + +func (vb *valkeyBackend) CancelWorkflowInstance(ctx context.Context, instance *core.WorkflowInstance, event *history.Event) error { + // Read the instance to check if it exists + instanceState, err := readInstance(ctx, vb.client, vb.keys.instanceKey(instance)) + if err != nil { + return err + } + + // Prepare event data + eventData, payloadData, err := marshalEvent(event) + if err != nil { + return err + } + + keyInfo := vb.workflowQueue.Keys(workflow.Queue(instanceState.Queue)) + + // Cancel instance + _, err = vb.client.InvokeScriptWithOptions(ctx, cancelWorkflowInstanceScript, options.ScriptOptions{ + Keys: []string{ + vb.keys.payloadKey(instance), + vb.keys.pendingEventsKey(instance), + keyInfo.SetKey, + keyInfo.StreamKey, + }, + Args: []string{ + event.ID, + eventData, + payloadData, + instanceSegment(instance), + }, + }) + + if err != nil { + return fmt.Errorf("canceling workflow instance: %w", err) + } + + return nil +} + +func (vb *valkeyBackend) RemoveWorkflowInstance(ctx context.Context, instance *core.WorkflowInstance) error { + i, err := readInstance(ctx, vb.client, vb.keys.instanceKey(instance)) + if err != nil { + return err + } + + if i.State != core.WorkflowInstanceStateFinished && i.State != core.WorkflowInstanceStateContinuedAsNew { + return backend.ErrInstanceNotFinished + } + + return vb.deleteInstance(ctx, instance) +} + +func (vb *valkeyBackend) RemoveWorkflowInstances(ctx context.Context, options ...backend.RemovalOption) error { + return backend.ErrNotSupported{ + Message: "not supported, use auto-expiration", + } +} + +type instanceState struct { + Queue string `json:"queue"` + + Instance *core.WorkflowInstance `json:"instance,omitempty"` + State core.WorkflowInstanceState `json:"state,omitempty"` + + Metadata *metadata.WorkflowMetadata `json:"metadata,omitempty"` + + CreatedAt time.Time `json:"created_at,omitempty"` + CompletedAt *time.Time `json:"completed_at,omitempty"` + + LastSequenceID int64 `json:"last_sequence_id,omitempty"` +} + +func readInstance(ctx context.Context, client glide.Client, instanceKey string) (*instanceState, error) { + val, err := client.Get(ctx, instanceKey) + if err != nil { + return nil, fmt.Errorf("reading instance: %w", err) + } + + if val.IsNil() { + return nil, backend.ErrInstanceNotFound + } + + var state instanceState + if err := json.Unmarshal([]byte(val.Value()), &state); err != nil { + return nil, fmt.Errorf("unmarshaling instance state: %w", err) + } + + return &state, nil +} + +func (vb *valkeyBackend) readActiveInstanceExecution(ctx context.Context, instanceID string) (*core.WorkflowInstance, error) { + val, err := vb.client.Get(ctx, vb.keys.activeInstanceExecutionKey(instanceID)) + if err != nil { + return nil, err + } + + if val.IsNil() { + return nil, nil + } + + var instance *core.WorkflowInstance + if err := json.Unmarshal([]byte(val.Value()), &instance); err != nil { + return nil, fmt.Errorf("unmarshaling instance: %w", err) + } + + return instance, nil +} diff --git a/backend/valkey/keys.go b/backend/valkey/keys.go new file mode 100644 index 00000000..30abeb9f --- /dev/null +++ b/backend/valkey/keys.go @@ -0,0 +1,73 @@ +package valkey + +import ( + "fmt" + + "github.com/cschleiden/go-workflows/core" +) + +type keys struct { + // Ensure prefix ends with `:` + prefix string +} + +func newKeys(prefix string) *keys { + if prefix != "" && prefix[len(prefix)-1] != ':' { + prefix += ":" + } + + return &keys{prefix: prefix} +} + +// activeInstanceExecutionKey returns the key for the latest execution of the given instance +func (k *keys) activeInstanceExecutionKey(instanceID string) string { + return fmt.Sprintf("%sactive-instance-execution:%v", k.prefix, instanceID) +} + +func instanceSegment(instance *core.WorkflowInstance) string { + return fmt.Sprintf("%v:%v", instance.InstanceID, instance.ExecutionID) +} + +func (k *keys) instanceKey(instance *core.WorkflowInstance) string { + return k.instanceKeyFromSegment(instanceSegment(instance)) +} + +func (k *keys) instanceKeyFromSegment(segment string) string { + return fmt.Sprintf("%sinstance:%v", k.prefix, segment) +} + +// instancesByCreation returns the key for the ZSET that contains all instances sorted by creation date. The score is the +// creation time as a unix timestamp. Used for listing all workflow instances in the diagnostics UI. +func (k *keys) instancesByCreation() string { + return fmt.Sprintf("%sinstances-by-creation", k.prefix) +} + +// instancesActive returns the key for the SET that contains all active instances. Used for reporting active workflow +// instances in stats. +func (k *keys) instancesActive() string { + return fmt.Sprintf("%sinstances-active", k.prefix) +} + +func (k *keys) instancesExpiring() string { + return fmt.Sprintf("%sinstances-expiring", k.prefix) +} + +func (k *keys) pendingEventsKey(instance *core.WorkflowInstance) string { + return fmt.Sprintf("%spending-events:%v", k.prefix, instanceSegment(instance)) +} + +func (k *keys) historyKey(instance *core.WorkflowInstance) string { + return fmt.Sprintf("%shistory:%v", k.prefix, instanceSegment(instance)) +} + +func (k *keys) futureEventsKey() string { + return fmt.Sprintf("%sfuture-events", k.prefix) +} + +func (k *keys) futureEventKey(instance *core.WorkflowInstance, scheduleEventID int64) string { + return fmt.Sprintf("%sfuture-event:%v:%v", k.prefix, instanceSegment(instance), scheduleEventID) +} + +func (k *keys) payloadKey(instance *core.WorkflowInstance) string { + return fmt.Sprintf("%spayload:%v", k.prefix, instanceSegment(instance)) +} diff --git a/backend/valkey/options.go b/backend/valkey/options.go new file mode 100644 index 00000000..b3a914c5 --- /dev/null +++ b/backend/valkey/options.go @@ -0,0 +1,59 @@ +package valkey + +import ( + "time" + + "github.com/cschleiden/go-workflows/backend" +) + +type ValkeyOptions struct { + *backend.Options + + BlockTimeout time.Duration + + AutoExpiration time.Duration + AutoExpirationContinueAsNew time.Duration + + KeyPrefix string +} + +type ValkeyBackendOption func(*ValkeyOptions) + +// WithKeyPrefix sets the prefix for all keys used in the Valkey backend. +func WithKeyPrefix(prefix string) ValkeyBackendOption { + return func(o *ValkeyOptions) { + o.KeyPrefix = prefix + } +} + +// WithBlockTimeout sets the timeout for blocking operations like dequeuing a workflow or activity task +func WithBlockTimeout(timeout time.Duration) ValkeyBackendOption { + return func(o *ValkeyOptions) { + o.BlockTimeout = timeout + } +} + +// WithAutoExpiration sets the duration after which finished runs will expire from the data store. +// If set to 0 (default), runs will never expire and need to be manually removed. +func WithAutoExpiration(expireFinishedRunsAfter time.Duration) ValkeyBackendOption { + return func(o *ValkeyOptions) { + o.AutoExpiration = expireFinishedRunsAfter + } +} + +// WithAutoExpirationContinueAsNew sets the duration after which runs that were completed with `ContinueAsNew` +// automatically expire. +// If set to 0 (default), the overall expiration setting set with `WithAutoExpiration` will be used. +func WithAutoExpirationContinueAsNew(expireContinuedAsNewRunsAfter time.Duration) ValkeyBackendOption { + return func(o *ValkeyOptions) { + o.AutoExpirationContinueAsNew = expireContinuedAsNewRunsAfter + } +} + +func WithBackendOptions(opts ...backend.BackendOption) ValkeyBackendOption { + return func(o *ValkeyOptions) { + for _, opt := range opts { + opt(o.Options) + } + } +} diff --git a/backend/valkey/queue.go b/backend/valkey/queue.go new file mode 100644 index 00000000..63356e39 --- /dev/null +++ b/backend/valkey/queue.go @@ -0,0 +1,284 @@ +package valkey + +import ( + "context" + "encoding/json" + "fmt" + "strings" + "time" + + "github.com/cschleiden/go-workflows/workflow" + "github.com/google/uuid" + "github.com/valkey-io/valkey-glide/go/v2" + "github.com/valkey-io/valkey-glide/go/v2/models" + "github.com/valkey-io/valkey-glide/go/v2/options" +) + +type taskQueue[T any] struct { + keyPrefix string + tasktype string + groupName string + workerName string + queueSetKey string +} + +type TaskItem[T any] struct { + // TaskID is the generated ID of the task item + TaskID string + + // ID is the provided id + ID string + + // Optional data stored with a task, needs to be serializable + Data T +} + +type KeyInfo struct { + StreamKey string + SetKey string +} + +func newTaskQueue[T any](ctx context.Context, client glide.Client, keyPrefix string, tasktype string, workerName string) (*taskQueue[T], error) { + // Ensure the key prefix ends with a colon + if keyPrefix != "" && keyPrefix[len(keyPrefix)-1] != ':' { + keyPrefix += ":" + } + + // Use provided worker name or generate UUID if empty + if workerName == "" { + workerName = uuid.NewString() + } + + tq := &taskQueue[T]{ + keyPrefix: keyPrefix, + tasktype: tasktype, + groupName: "task-workers", + workerName: workerName, + queueSetKey: fmt.Sprintf("%s%s:queues", keyPrefix, tasktype), + } + + return tq, nil +} + +func (q *taskQueue[T]) Prepare(ctx context.Context, client glide.Client, queues []workflow.Queue) error { + for _, queue := range queues { + streamKey := q.Keys(queue).StreamKey + groupName := q.groupName + + // Try to create consumer group + _, err := client.XGroupCreateWithOptions(ctx, streamKey, groupName, "0", options.XGroupCreateOptions{MkStream: true}) + if err != nil { + // Group might already exist, which is fine + if !strings.Contains(err.Error(), "BUSYGROUP") { + return fmt.Errorf("preparing queue %s: %w", queue, err) + } + } + } + + return nil +} + +func (q *taskQueue[T]) Keys(queue workflow.Queue) KeyInfo { + return KeyInfo{ + StreamKey: fmt.Sprintf("%stask-stream:%s:%s", q.keyPrefix, queue, q.tasktype), + SetKey: fmt.Sprintf("%stask-set:%s:%s", q.keyPrefix, queue, q.tasktype), + } +} + +func (q *taskQueue[T]) Size(ctx context.Context, client glide.Client) (map[workflow.Queue]int64, error) { + members, err := client.SMembers(ctx, q.queueSetKey) + if err != nil { + return nil, fmt.Errorf("getting queue size: %w", err) + } + + res := map[workflow.Queue]int64{} + + for queueSetKey := range members { + size, err := client.SCard(ctx, queueSetKey) + if err != nil { + return nil, fmt.Errorf("getting queue size: %w", err) + } + + // Parse queue name from key + queueName := strings.TrimPrefix(queueSetKey, q.keyPrefix) + queueName = strings.Split(queueName, ":")[1] // queue name is the third part of the key (0-indexed) + + queue := workflow.Queue(queueName) + res[queue] = size + } + + return res, nil +} + +func (q *taskQueue[T]) Enqueue(ctx context.Context, client glide.Client, queue workflow.Queue, id string, data *T) error { + ds, err := json.Marshal(data) + if err != nil { + return err + } + + keys := q.Keys(queue) + + // Add to set to track uniqueness + _, err = client.SAdd(ctx, q.queueSetKey, []string{keys.SetKey}) + if err != nil { + return err + } + + // Add to set for this queue + added, err := client.SAdd(ctx, keys.SetKey, []string{id}) + if err != nil { + return err + } + + // Only add to stream if it's a new task + if added > 0 { + var fieldValues []models.FieldValue + fieldValues = append(fieldValues, models.FieldValue{Field: "id", Value: id}) + fieldValues = append(fieldValues, models.FieldValue{Field: "data", Value: string(ds)}) + + _, err = client.XAdd(ctx, keys.StreamKey, fieldValues) + if err != nil { + return err + } + } + + return nil +} + +func (q *taskQueue[T]) Dequeue(ctx context.Context, client glide.Client, queues []workflow.Queue, lockTimeout, timeout time.Duration) (*TaskItem[T], error) { + // Try to recover abandoned tasks + task, err := q.recover(ctx, client, queues, lockTimeout) + if err != nil { + return nil, fmt.Errorf("checking for abandoned tasks: %w", err) + } + + if task != nil { + return task, nil + } + + // Check for new tasks + var keyAndIds map[string]string + for _, queue := range queues { + keys := q.Keys(queue) + keyAndIds[keys.StreamKey] = keys.SetKey + } + + // Try to dequeue from all given queues + results, err := client.XReadGroupWithOptions(ctx, q.groupName, q.workerName, keyAndIds, options.XReadGroupOptions{ + Count: 1, + Block: timeout, + }) + + if err != nil { + return nil, fmt.Errorf("error dequeueing task: %w", err) + } + + if len(results) == 0 { + return nil, nil + } + + // Get the first entry to dequeue + var entry models.StreamEntry + for _, response := range results { + if len(response.Entries) > 0 { + entry = response.Entries[0] + break + } + } + + return msgToTaskItem[T](entry) +} + +func (q *taskQueue[T]) Extend(ctx context.Context, client glide.Client, queue workflow.Queue, taskID string) error { + // Claiming a message resets the idle timer + _, err := client.XClaim(ctx, q.Keys(queue).StreamKey, q.groupName, q.workerName, 0, []string{taskID}) + if err != nil { + return fmt.Errorf("extending lease: %w", err) + } + + return nil +} + +func (q *taskQueue[T]) Complete(ctx context.Context, client glide.Client, queue workflow.Queue, taskID string) error { + keys := q.Keys(queue) + + // Get the task to find the ID + msgs, err := client.XRange(ctx, keys.StreamKey, options.NewStreamBoundary(taskID, true), options.NewStreamBoundary(taskID, true)) + if err != nil { + return fmt.Errorf("completing task: %w", err) + } + + if len(msgs) == 0 { + return nil + } + + msg := msgs[0] + var id string + for _, field := range msg.Fields { + if field.Field == "id" { + id = field.Value + } + } + + // Remove from set + _, err = client.SRem(ctx, keys.SetKey, []string{id}) + if err != nil { + return fmt.Errorf("completing task: %w", err) + } + + // Acknowledge in consumer group + _, err = client.XAck(ctx, keys.StreamKey, q.groupName, []string{taskID}) + if err != nil { + return fmt.Errorf("completing task: %w", err) + } + + // Delete from stream + _, err = client.XDel(ctx, keys.StreamKey, []string{taskID}) + if err != nil { + return fmt.Errorf("completing task: %w", err) + } + + return nil +} + +func (q *taskQueue[T]) recover(ctx context.Context, client glide.Client, queues []workflow.Queue, idleTimeout time.Duration) (*TaskItem[T], error) { + for _, queue := range queues { + streamKey := q.Keys(queue).StreamKey + + // Try to recover abandoned tasks + msgs, err := client.XAutoClaimWithOptions(ctx, streamKey, q.groupName, q.workerName, idleTimeout, "0", options.XAutoClaimOptions{Count: 1}) + if err != nil { + return nil, fmt.Errorf("recovering abandoned task: %w", err) + } + + if len(msgs.ClaimedEntries) > 0 { + return msgToTaskItem[T](msgs.ClaimedEntries[0]) + } + } + + return nil, nil +} + +func msgToTaskItem[T any](msg models.StreamEntry) (*TaskItem[T], error) { + var id, data string + for _, field := range msg.Fields { + if field.Field == "id" { + id = field.Value + } else if field.Field == "data" { + data = field.Value + } + } + + var t T + if data != "" { + if err := json.Unmarshal([]byte(data), &t); err != nil { + return nil, err + } + } + + return &TaskItem[T]{ + TaskID: msg.ID, + ID: id, + Data: t, + }, nil +} diff --git a/backend/valkey/scripts/cancel_workflow_instance.lua b/backend/valkey/scripts/cancel_workflow_instance.lua new file mode 100644 index 00000000..d3237c96 --- /dev/null +++ b/backend/valkey/scripts/cancel_workflow_instance.lua @@ -0,0 +1,23 @@ +local payloadHashKey = KEYS[1] +local pendingEventsKey = KEYS[2] +local workflowSetKey = KEYS[3] +local workflowStreamKey = KEYS[4] + +local eventId = ARGV[1] +local eventData = ARGV[2] +local payload = ARGV[3] +local instanceSegment = ARGV[4] + +-- Add event payload +redis.pcall("HSETNX", payloadHashKey, eventId, payload) + +-- Add event to pending events stream +server.call("XADD", pendingEventsKey, "*", "event", eventData) + +-- Queue workflow task +local added = server.call("SADD", workflowSetKey, instanceSegment) +if added == 1 then + server.call("XADD", workflowStreamKey, "*", "id", instanceSegment, "data", "") +end + +return true diff --git a/backend/valkey/scripts/complete_activity_task.lua b/backend/valkey/scripts/complete_activity_task.lua new file mode 100644 index 00000000..762085f0 --- /dev/null +++ b/backend/valkey/scripts/complete_activity_task.lua @@ -0,0 +1,43 @@ +-- Complete an activity task, add the result event to the workflow instance, and enqueue the workflow task +-- KEYS[1] = activity set key +-- KEYS[2] = activity stream key +-- KEYS[3] = pending events stream key +-- KEYS[4] = payload hash key +-- KEYS[5] = workflow queues set key +-- KEYS[6] = workflow set key (for specific queue) +-- KEYS[7] = workflow stream key (for specific queue) +-- ARGV[1] = task id (activity) +-- ARGV[2] = group name (activity group) +-- ARGV[3] = event id +-- ARGV[4] = event data (json, without attributes) +-- ARGV[5] = payload data (json, can be empty) +-- ARGV[6] = workflow queue group name +-- ARGV[7] = workflow instance segment id + +-- Complete the activity task (from queue/complete.lua) +local task = server.call("XRANGE", KEYS[2], ARGV[1], ARGV[1]) +if #task == 0 then + return nil +end + +local id = task[1][2][2] +server.call("SREM", KEYS[1], id) +server.call("XACK", KEYS[2], "NOMKSTREAM", ARGV[2], ARGV[1]) +server.call("XDEL", KEYS[2], ARGV[1]) + +-- Add event to pending events stream for workflow instance +server.call("XADD", KEYS[3], "*", "event", ARGV[4]) + +-- Store payload if provided (only if not empty) +if ARGV[5] ~= "" then + redis.pcall("HSETNX", KEYS[4], ARGV[3], ARGV[5]) +end + +-- Enqueue workflow task (from queue/enqueue.lua) +server.call("SADD", KEYS[5], KEYS[6]) +local added = server.call("SADD", KEYS[6], ARGV[7]) +if added == 1 then + server.call("XADD", KEYS[7], "*", "id", ARGV[7], "data", "") +end + +return true diff --git a/backend/valkey/scripts/complete_workflow_task.lua b/backend/valkey/scripts/complete_workflow_task.lua new file mode 100644 index 00000000..d112bb17 --- /dev/null +++ b/backend/valkey/scripts/complete_workflow_task.lua @@ -0,0 +1,234 @@ +local keyIdx = 1 +local argvIdx = 1 + +local getKey = function() + local key = KEYS[keyIdx] + keyIdx = keyIdx + 1 + return key +end + +local getArgv = function() + local argv = ARGV[argvIdx] + argvIdx = argvIdx + 1 + -- server.call("ECHO", argv) + return argv +end + +-- Shared keys +local instanceKey = getKey() +local historyStreamKey = getKey() +local pendingEventsKey = getKey() +local payloadHashKey = getKey() +local futureEventZSetKey = getKey() +local activeInstancesKey = getKey() +local instancesByCreation = getKey() + +local workflowSetKey = getKey() +local workflowStreamKey = getKey() +local workflowQueuesSetKey = getKey() + +local prefix = getArgv() +local instanceSegment = getArgv() + +local storePayload = function(eventId, payload) + redis.pcall("HSETNX", payloadHashKey, eventId, payload) +end + +-- Read instance +local instance = cjson.decode(server.call("GET", instanceKey)) + +-- Add executed events to history +local executedEvents = tonumber(getArgv()) +local lastSequenceId = 0 +for i = 1, executedEvents do + local eventId = getArgv() + local eventData = getArgv() + local payloadData = getArgv() + local sequenceId = getArgv() + + -- Add event to history + server.call("XADD", historyStreamKey, sequenceId, "event", eventData) + + storePayload(eventId, payloadData) + + lastSequenceId = tonumber(sequenceId) +end + +-- Remove executed pending events +local lastPendingEventMessageId = getArgv() +server.call("XTRIM", pendingEventsKey, "MINID", lastPendingEventMessageId) +server.call("XDEL", pendingEventsKey, lastPendingEventMessageId) + +-- Update instance state +local now = getArgv() +local nowUnix = tonumber(getArgv()) +local state = tonumber(getArgv()) + +-- State constants +local ContinuedAsNew = tonumber(getArgv()) +local Finished = tonumber(getArgv()) + +instance["state"] = state + +-- If workflow instance finished, remove active execution +local activeInstanceExecutionKey = getKey() +if state == ContinuedAsNew or state == Finished then + -- Remove active execution + server.call("DEL", activeInstanceExecutionKey) + + instance["completed_at"] = now + + server.call("SREM", activeInstancesKey, instanceSegment) +end + +if lastSequenceId > 0 then + instance["last_sequence_id"] = lastSequenceId +end + +server.call("SET", instanceKey, cjson.encode(instance)) + +-- Remove canceled timers +local timersToCancel = tonumber(getArgv()) +for i = 1, timersToCancel do + local futureEventKey = getKey() + + local eventRemoved = server.call("ZREM", futureEventZSetKey, futureEventKey) + -- Event might've become visible while this task was being processed, in that + -- case it would be already removed from futureEventZSetKey + if eventRemoved == 1 then + -- remove payload + local eventId = server.call("HGET", futureEventKey, "id") + server.call("HDEL", payloadHashKey, eventId) + -- remove event hash + server.call("DEL", futureEventKey) + end +end + +-- Schedule timers +local timersToSchedule = tonumber(getArgv()) +for i = 1, timersToSchedule do + local eventId = getArgv() + local timestamp = getArgv() + local eventData = getArgv() + local payloadData = getArgv() + + local futureEventKey = getKey() + + server.call("ZADD", futureEventZSetKey, timestamp, futureEventKey) + server.call("HSET", futureEventKey, "instance", instanceSegment, "id", eventId, "event", eventData, "queue", instance["queue"]) + storePayload(eventId, payloadData) +end + +-- Schedule activities +local activities = tonumber(getArgv()) + +for i = 1, activities do + local activityQueue = getArgv() + local activityId = getArgv() + local activityData = getArgv() + + local activitySetKey = prefix .. "task-set:" .. activityQueue .. ":activities" + local activityStreamKey = prefix .. "task-stream:" .. activityQueue .. ":activities" + server.call("SADD", prefix .. "activities:queues", activitySetKey) + + local added = server.call("SADD", activitySetKey, activityId) + if added == 1 then + server.call("XADD", activityStreamKey, "*", "id", activityId, "data", activityData) + end +end + +-- Send events to other workflow instances +local otherWorkflowInstances = tonumber(getArgv()) +for i = 1, otherWorkflowInstances do + local targetInstanceKey = getKey() + local targetActiveInstanceExecutionKey = getKey() + + local targetInstanceSegment = getArgv() + local targetInstanceId = getArgv() + local createNewInstance = tonumber(getArgv()) + local eventsToDeliver = tonumber(getArgv()) + local skipEvents = false + + -- Creating a new instance? + if createNewInstance == 1 then + local targetInstanceState = getArgv() + local targetActiveInstanceExecutionState = getArgv() + + local conflictEventId = getArgv() + local conflictEventData = getArgv() + local conflictEventPayloadData = getArgv() + + -- Does the instance exist already? + local instanceExists = server.call("EXISTS", targetActiveInstanceExecutionKey) + if instanceExists == 1 then + server.call("XADD", pendingEventsKey, "*", "event", conflictEventData) + storePayload(conflictEventId, conflictEventPayloadData) + server.call("ECHO", + "Conflict detected, event " .. + conflictEventId .. " was not delivered to instance " .. targetInstanceSegment .. ".") + + skipEvents = true + else + -- Create new instance + server.call("SETNX", targetInstanceKey, targetInstanceState) + + -- Set active execution + server.call("SET", targetActiveInstanceExecutionKey, targetActiveInstanceExecutionState) + + -- Track active instance + server.call("SADD", activeInstancesKey, targetInstanceSegment) + server.call("ZADD", instancesByCreation, nowUnix, targetInstanceSegment) + end + end + + local instanceQueueSetKey = getKey() + local instanceQueueStreamKey = getKey() + local instancePendingEventsKey = getKey() + local instancePayloadHashKey = getKey() + + for j = 1, eventsToDeliver do + local eventId = getArgv() + local eventData = getArgv() + local payloadData = getArgv() + + if not skipEvents then + -- Add event to pending events + server.call("XADD", instancePendingEventsKey, "*", "event", eventData) + + -- Store payload + redis.pcall("HSETNX", instancePayloadHashKey, eventId, payloadData) + end + end + + -- If events were delivered, try to queue a workflow task + if not skipEvents then + -- Enqueue workflow task + server.call("SADD", workflowQueuesSetKey, instanceQueueSetKey) + local added = server.call("SADD", instanceQueueSetKey, targetInstanceSegment) + if added == 1 then + server.call("XADD", instanceQueueStreamKey, "*", "id", targetInstanceSegment, "data", "") + end + end +end + +-- Complete workflow task and mark instance task as completed +local taskId = getArgv() +local groupName = getArgv() +local task = server.call("XRANGE", workflowStreamKey, taskId, taskId) +if #task ~= 0 then + local id = task[1][2][2] + server.call("SREM", workflowSetKey, id) + server.call("XACK", workflowStreamKey, groupName, taskId) + server.call("XDEL", workflowStreamKey, taskId) +end + +-- If there are pending events, queue the instance again +local pending_events = server.call("XLEN", pendingEventsKey) +if pending_events > 0 then + local added = server.call("SADD", workflowSetKey, instanceSegment) + if added == 1 then + server.call("XADD", workflowStreamKey, "*", "id", instanceSegment, "data", "") + end +end + +return true \ No newline at end of file diff --git a/backend/valkey/scripts/create_workflow_instance.lua b/backend/valkey/scripts/create_workflow_instance.lua new file mode 100644 index 00000000..92355341 --- /dev/null +++ b/backend/valkey/scripts/create_workflow_instance.lua @@ -0,0 +1,65 @@ +local keyIdx = 1 +local argvIdx = 1 + +local getKey = function() + local key = KEYS[keyIdx] + keyIdx = keyIdx + 1 + return key +end + +local getArgv = function() + local argv = ARGV[argvIdx] + argvIdx = argvIdx + 1 + return argv +end + +local instanceKey = getKey() +local activeInstanceExecutionKey = getKey() +local pendingEventsKey = getKey() +local payloadHashKey = getKey() + +local instancesActiveKey = getKey() +local instancesByCreation = getKey() + +local workflowSetKey = getKey() +local workflowStreamKey = getKey() +local workflowQueuesSet = getKey() + +local instanceSegment = getArgv() + +-- Is there an existing instance with active execution? +local instanceExists = server.call("EXISTS", activeInstanceExecutionKey) +if instanceExists == 1 then + return redis.error_reply("ERR InstanceAlreadyExists") +end + +-- Create new instance +local instanceState = getArgv() +server.call("SETNX", instanceKey, instanceState) + +-- Set active execution +local activeInstanceExecutionState = getArgv() +server.call("SET", activeInstanceExecutionKey, activeInstanceExecutionState) + +-- Track active instance +server.call("SADD", instancesActiveKey, instanceSegment) + +-- add initial event & payload +local eventId = getArgv() +local eventData = getArgv() +server.call("XADD", pendingEventsKey, "*", "event", eventData) + +local payload = getArgv() +redis.pcall("HSETNX", payloadHashKey, eventId, payload) + +local creationTimestamp = tonumber(getArgv()) +server.call("ZADD", instancesByCreation, creationTimestamp, instanceSegment) + +-- queue workflow task +server.call("SADD", workflowQueuesSet, workflowSetKey) -- track queue +local added = server.call("SADD", workflowSetKey, instanceSegment) +if added == 1 then + server.call("XADD", workflowStreamKey, "*", "id", instanceSegment, "data", "") +end + +return true \ No newline at end of file diff --git a/backend/valkey/scripts/delete_instance.lua b/backend/valkey/scripts/delete_instance.lua new file mode 100644 index 00000000..6538eaf5 --- /dev/null +++ b/backend/valkey/scripts/delete_instance.lua @@ -0,0 +1,14 @@ +local instanceKey = KEYS[1] +local pendingEventsKey = KEYS[2] +local historyKey = KEYS[3] +local payloadKey = KEYS[4] +local activeInstanceExecutionKey = KEYS[5] +local instancesByCreationKey = KEYS[6] + +local instanceSegment = ARGV[1] + +-- Delete all instance-related keys +server.call("DEL", instanceKey, pendingEventsKey, historyKey, payloadKey, activeInstanceExecutionKey) + +-- Remove instance from sorted set +return server.call("ZREM", instancesByCreationKey, instanceSegment) diff --git a/backend/valkey/scripts/expire_workflow_instance.lua b/backend/valkey/scripts/expire_workflow_instance.lua new file mode 100644 index 00000000..2335f5f6 --- /dev/null +++ b/backend/valkey/scripts/expire_workflow_instance.lua @@ -0,0 +1,29 @@ +-- Set the given expiration time on all keys passed in +-- KEYS[1] - instances-by-creation key +-- KEYS[2] - instances-expiring key +-- KEYS[3] - instance key +-- KEYS[4] - pending events key +-- KEYS[5] - history key +-- KEYS[6] - payload key +-- ARGV[1] - current timestamp +-- ARGV[2] - expiration time in seconds +-- ARGV[3] - expiration timestamp in unix milliseconds +-- ARGV[4] - instance segment + +-- Find instances which have already expired and remove from the index set +local expiredInstances = server.call("ZRANGE", KEYS[2], "-inf", ARGV[1], "BYSCORE") +for i = 1, #expiredInstances do + local instanceSegment = expiredInstances[i] + server.call("ZREM", KEYS[1], instanceSegment) -- index set + server.call("ZREM", KEYS[2], instanceSegment) -- expiration set +end + +-- Add expiration time for future cleanup +server.call("ZADD", KEYS[2], ARGV[3], ARGV[4]) + +-- Set expiration on all keys +for i = 3, #KEYS do + server.call("EXPIRE", KEYS[i], ARGV[2]) +end + +return 0 diff --git a/backend/valkey/scripts/queue/complete.lua b/backend/valkey/scripts/queue/complete.lua new file mode 100644 index 00000000..80e50187 --- /dev/null +++ b/backend/valkey/scripts/queue/complete.lua @@ -0,0 +1,21 @@ +-- We need TaskIDs for the stream and caller provided IDs for the set. So first look up +-- the ID in the stream using the TaskID, then remove from the set and the stream +-- KEYS[1] = set +-- KEYS[2] = stream +-- ARGV[1] = task id +-- ARGV[2] = group +-- We have to XACK _and_ XDEL here. See https://github.com/redis/redis/issues/5754 +local task = server.call("XRANGE", KEYS[2], ARGV[1], ARGV[1]) +if #task == 0 then + return nil +end + +local id = task[1][2][2] +server.call("SREM", KEYS[1], id) +server.call("XACK", KEYS[2], "NOMKSTREAM", ARGV[2], ARGV[1]) + +-- Delete the task here. Overall we'll keep the stream at a small size, so fragmentation +-- is not an issue for us. +server.call("XDEL", KEYS[2], ARGV[1]) + +return true \ No newline at end of file diff --git a/backend/valkey/scripts/queue/enqueue.lua b/backend/valkey/scripts/queue/enqueue.lua new file mode 100644 index 00000000..1cc83b14 --- /dev/null +++ b/backend/valkey/scripts/queue/enqueue.lua @@ -0,0 +1,13 @@ +-- KEYS[1] = queues set +-- KEYS[2] = set +-- KEYS[3] = stream +-- ARGV[1] = consumer group +-- ARGV[2] = caller provided id of the task +-- ARGV[3] = additional data to store with the task +server.call("SADD", KEYS[1], KEYS[2]) +local added = server.call("SADD", KEYS[2], ARGV[2]) +if added == 1 then + server.call("XADD", KEYS[3], "*", "id", ARGV[2], "data", ARGV[3]) +end + +return true \ No newline at end of file diff --git a/backend/valkey/scripts/queue/prepare.lua b/backend/valkey/scripts/queue/prepare.lua new file mode 100644 index 00000000..a2e05b7a --- /dev/null +++ b/backend/valkey/scripts/queue/prepare.lua @@ -0,0 +1,30 @@ +-- KEYS[1..n] - queue stream keys +-- ARGV[1] - group name + +for i = 1, #KEYS do + local streamKey = KEYS[i] + local groupName = ARGV[1] + local exists = false + local res = redis.pcall('XINFO', 'GROUPS', streamKey) + + if res and type(res) == 'table' then + for _, groupInfo in ipairs(res) do + if type(groupInfo) == 'table' then + for i = 1, #groupInfo, 2 do + if groupInfo[i] == 'name' and groupInfo[i + 1] == groupName then + exists = true + break + end + end + end + + if exists then + break + end + end + end + + if not exists then + server.call('XGROUP', 'CREATE', streamKey, groupName, '0', 'MKSTREAM') + end +end \ No newline at end of file diff --git a/backend/valkey/scripts/queue/recover.lua b/backend/valkey/scripts/queue/recover.lua new file mode 100644 index 00000000..4572896d --- /dev/null +++ b/backend/valkey/scripts/queue/recover.lua @@ -0,0 +1,16 @@ +-- KEYS[1..n] = queue stream keys +-- ARGV[1] = group name +-- ARGV[2] = consumer/worker name +-- ARGV[3] = min-idle time in ms +-- ARGV[4] = start + +-- Try to recover abandoned tasks +for i = 1, #KEYS do + local stream = KEYS[i] + local recovered = server.call("XAUTOCLAIM", stream, ARGV[1], ARGV[2], ARGV[3], ARGV[4], "COUNT", 1) + if #recovered > 0 then + if #recovered[1] > 0 then + return recovered + end + end +end \ No newline at end of file diff --git a/backend/valkey/scripts/queue/size.lua b/backend/valkey/scripts/queue/size.lua new file mode 100644 index 00000000..09fa3230 --- /dev/null +++ b/backend/valkey/scripts/queue/size.lua @@ -0,0 +1,13 @@ +-- Return a table with the queue name as key and the number of tasks in the queue as value +-- KEYS[1] = stream set key +local res = {} +local r = server.call("SMEMBERS", KEYS[1]) +local idx = 1 +for i = 1, #r, 1 do + local queue = r[i] + local length = server.call("SCARD", queue) + table.insert(res, queue) + table.insert(res, length) +end + +return res diff --git a/backend/valkey/scripts/schedule_future_events.lua b/backend/valkey/scripts/schedule_future_events.lua new file mode 100644 index 00000000..befdcf32 --- /dev/null +++ b/backend/valkey/scripts/schedule_future_events.lua @@ -0,0 +1,39 @@ +-- Find all due future events. For each event: +-- - Look up event data +-- - Add to pending event stream for workflow instance +-- - Try to queue workflow task for workflow instance +-- - Remove event from future event set and delete event data +-- +-- KEYS[1] - future event set key +-- ARGV[1] - current timestamp for zrange +-- ARGV[2] - redis key prefix +-- +-- Note: this does not work with Redis Cluster since not all keys are passed into the script. +-- Find events which should become visible now +local now = ARGV[1] +local events = server.call("ZRANGE", KEYS[1], "-inf", now, "BYSCORE") +local prefix = ARGV[2] +for i = 1, #events do + local instanceSegment = server.call("HGET", events[i], "instance") + local queue = server.call("HGET", events[i], "queue") + + local setKey = prefix .. "task-set:" .. queue .. ":workflows" + local streamKey = prefix .. "task-stream:" .. queue .. ":workflows" + + -- Try to queue workflow task. If a workflow task is already queued, ignore this event for now. + local added = server.call("SADD", setKey, instanceSegment) + if added == 1 then + server.call("XADD", streamKey, "*", "id", instanceSegment, "data", "") + + -- Add event to pending event stream + local eventData = server.call("HGET", events[i], "event") + local pending_events_key = prefix .. "pending-events:" .. instanceSegment + server.call("XADD", pending_events_key, "*", "event", eventData) + + -- Delete event hash data + server.call("DEL", events[i]) + server.call("ZREM", KEYS[1], events[i]) + end +end + +return #events diff --git a/backend/valkey/scripts/signal_workflow.lua b/backend/valkey/scripts/signal_workflow.lua new file mode 100644 index 00000000..ce05e822 --- /dev/null +++ b/backend/valkey/scripts/signal_workflow.lua @@ -0,0 +1,35 @@ +-- Signal a workflow instance by adding an event to its pending events stream and queuing it +-- +-- KEYS[1] - payload hash key +-- KEYS[2] - pending events stream key +-- KEYS[3] - workflow task set key +-- KEYS[4] - workflow task stream key +-- +-- ARGV[1] - event id +-- ARGV[2] - event data (JSON) +-- ARGV[3] - event payload (JSON) +-- ARGV[4] - instance segment + +local payloadHashKey = KEYS[1] +local pendingEventsKey = KEYS[2] +local workflowSetKey = KEYS[3] +local workflowStreamKey = KEYS[4] + +local eventId = ARGV[1] +local eventData = ARGV[2] +local payload = ARGV[3] +local instanceSegment = ARGV[4] + +-- Add event payload +redis.pcall("HSETNX", payloadHashKey, eventId, payload) + +-- Add event to pending events stream +server.call("XADD", pendingEventsKey, "*", "event", eventData) + +-- Queue workflow task +local added = server.call("SADD", workflowSetKey, instanceSegment) +if added == 1 then + server.call("XADD", workflowStreamKey, "*", "id", instanceSegment, "data", "") +end + +return true diff --git a/backend/valkey/signal.go b/backend/valkey/signal.go new file mode 100644 index 00000000..aae1f230 --- /dev/null +++ b/backend/valkey/signal.go @@ -0,0 +1,61 @@ +package valkey + +import ( + "context" + "fmt" + + "github.com/cschleiden/go-workflows/backend" + "github.com/cschleiden/go-workflows/backend/history" + "github.com/cschleiden/go-workflows/workflow" + "github.com/valkey-io/valkey-glide/go/v2/options" +) + +func (vb *valkeyBackend) SignalWorkflow(ctx context.Context, instanceID string, event *history.Event) error { + // Get current execution of the instance + instance, err := vb.readActiveInstanceExecution(ctx, instanceID) + if err != nil { + return fmt.Errorf("reading active instance execution: %w", err) + } + + if instance == nil { + return backend.ErrInstanceNotFound + } + + instanceState, err := readInstance(ctx, vb.client, vb.keys.instanceKey(instance)) + if err != nil { + return err + } + + eventData, payload, err := marshalEvent(event) + if err != nil { + return fmt.Errorf("marshaling event: %w", err) + } + + queue := workflow.Queue(instanceState.Queue) + queueKeys := vb.workflowQueue.Keys(queue) + + keys := []string{ + vb.keys.payloadKey(instanceState.Instance), + vb.keys.pendingEventsKey(instanceState.Instance), + queueKeys.SetKey, + queueKeys.StreamKey, + } + + args := []string{ + event.ID, + eventData, + payload, + instanceSegment(instanceState.Instance), + } + + // Execute the Lua script + _, err = vb.client.InvokeScriptWithOptions(ctx, signalWorkflowScript, options.ScriptOptions{ + Keys: keys, + Args: args, + }) + if err != nil { + return fmt.Errorf("signaling workflow: %w", err) + } + + return nil +} diff --git a/backend/valkey/stats.go b/backend/valkey/stats.go new file mode 100644 index 00000000..9246da40 --- /dev/null +++ b/backend/valkey/stats.go @@ -0,0 +1,38 @@ +package valkey + +import ( + "context" + "fmt" + + "github.com/cschleiden/go-workflows/backend" +) + +func (vb *valkeyBackend) GetStats(ctx context.Context) (*backend.Stats, error) { + s := &backend.Stats{} + + // get workflow instances + activeInstances, err := vb.client.SCard(ctx, vb.keys.instancesActive()) + if err != nil { + return nil, fmt.Errorf("getting active instances: %w", err) + } + + s.ActiveWorkflowInstances = activeInstances + + // get pending workflow tasks + pendingWorkflows, err := vb.workflowQueue.Size(ctx, vb.client) + if err != nil { + return nil, fmt.Errorf("getting active workflows: %w", err) + } + + s.PendingWorkflowTasks = pendingWorkflows + + // get pending activities + pendingActivities, err := vb.activityQueue.Size(ctx, vb.client) + if err != nil { + return nil, fmt.Errorf("getting active activities: %w", err) + } + + s.PendingActivityTasks = pendingActivities + + return s, nil +} diff --git a/backend/valkey/valkey.go b/backend/valkey/valkey.go new file mode 100644 index 00000000..a25b4b24 --- /dev/null +++ b/backend/valkey/valkey.go @@ -0,0 +1,140 @@ +package valkey + +import ( + "context" + "embed" + "fmt" + "io/fs" + "time" + + "github.com/cschleiden/go-workflows/backend" + "github.com/cschleiden/go-workflows/backend/history" + "github.com/cschleiden/go-workflows/backend/metrics" + "github.com/cschleiden/go-workflows/core" + "github.com/cschleiden/go-workflows/internal/metrickeys" + "github.com/valkey-io/valkey-glide/go/v2" + "github.com/valkey-io/valkey-glide/go/v2/options" + "go.opentelemetry.io/otel/trace" +) + +var _ backend.Backend = (*valkeyBackend)(nil) + +//go:embed scripts +var luaScripts embed.FS + +var ( + createWorkflowInstanceScript options.Script + completeWorkflowTaskScript options.Script + completeActivityTaskScript options.Script + deleteInstanceScript options.Script + futureEventsScript options.Script + expireWorkflowInstanceScript options.Script + cancelWorkflowInstanceScript options.Script + signalWorkflowScript options.Script +) + +func NewValkeyBackend(client glide.Client, opts ...ValkeyBackendOption) (backend.Backend, error) { + // Default options + vopts := &ValkeyOptions{ + Options: backend.ApplyOptions(), + BlockTimeout: time.Second * 2, + } + + for _, opt := range opts { + opt(vopts) + } + + ctx := context.Background() + + workflowQueue, err := newTaskQueue[workflowData](ctx, client, vopts.KeyPrefix, "workflows", vopts.WorkerName) + if err != nil { + return nil, fmt.Errorf("creating workflow task queue: %w", err) + } + + activityQueue, err := newTaskQueue[activityData](ctx, client, vopts.KeyPrefix, "activities", vopts.WorkerName) + if err != nil { + return nil, fmt.Errorf("creating activity task queue: %w", err) + } + + vb := &valkeyBackend{ + client: client, + options: vopts, + keys: newKeys(vopts.KeyPrefix), + workflowQueue: workflowQueue, + activityQueue: activityQueue, + } + + // Load all Lua scripts + scriptMapping := map[string]*options.Script{ + "cancel_workflow_instance.lua": &cancelWorkflowInstanceScript, + "complete_activity_task.lua": &completeActivityTaskScript, + "complete_workflow_task.lua": &completeWorkflowTaskScript, + "create_workflow_instance.lua": &createWorkflowInstanceScript, + "delete_instance.lua": &deleteInstanceScript, + "expire_workflow_instance.lua": &expireWorkflowInstanceScript, + "schedule_future_events.lua": &futureEventsScript, + "signal_workflow.lua": &signalWorkflowScript, + } + + if err := loadScripts(scriptMapping); err != nil { + return nil, fmt.Errorf("loading Lua scripts: %w", err) + } + + return vb, nil +} + +func loadScripts(scriptMapping map[string]*options.Script) error { + for scriptFile, scriptVar := range scriptMapping { + scriptContent, err := fs.ReadFile(luaScripts, "scripts/"+scriptFile) + if err != nil { + return fmt.Errorf("reading Lua script %s: %w", scriptFile, err) + } + + *scriptVar = *options.NewScript(string(scriptContent)) + } + + return nil +} + +type valkeyBackend struct { + client glide.Client + options *ValkeyOptions + keys *keys + workflowQueue *taskQueue[workflowData] + activityQueue *taskQueue[activityData] +} + +type workflowData struct{} + +type activityData struct { + Instance *core.WorkflowInstance `json:"instance,omitempty"` + Queue string `json:"queue,omitempty"` + ID string `json:"id,omitempty"` + Event *history.Event `json:"event,omitempty"` +} + +func (vb *valkeyBackend) Metrics() metrics.Client { + return vb.options.Metrics.WithTags(metrics.Tags{metrickeys.Backend: "valkey"}) +} + +func (vb *valkeyBackend) Tracer() trace.Tracer { + return vb.options.TracerProvider.Tracer(backend.TracerName) +} + +func (vb *valkeyBackend) Options() *backend.Options { + return vb.options.Options +} + +func (vb *valkeyBackend) Close() error { + vb.client.Close() + return nil +} + +func (vb *valkeyBackend) FeatureSupported(feature backend.Feature) bool { + switch feature { + case backend.Feature_Expiration: + return false + } + + return true +} diff --git a/backend/valkey/workflow.go b/backend/valkey/workflow.go new file mode 100644 index 00000000..5293482f --- /dev/null +++ b/backend/valkey/workflow.go @@ -0,0 +1,335 @@ +package valkey + +import ( + "context" + "encoding/json" + "fmt" + "strconv" + "time" + + "github.com/cschleiden/go-workflows/backend" + "github.com/cschleiden/go-workflows/backend/history" + "github.com/cschleiden/go-workflows/core" + "github.com/cschleiden/go-workflows/internal/log" + "github.com/cschleiden/go-workflows/internal/propagators" + "github.com/cschleiden/go-workflows/internal/workflowerrors" + "github.com/cschleiden/go-workflows/workflow" + "github.com/valkey-io/valkey-glide/go/v2/options" +) + +func (vb *valkeyBackend) PrepareWorkflowQueues(ctx context.Context, queues []workflow.Queue) error { + return vb.workflowQueue.Prepare(ctx, vb.client, queues) +} + +func (vb *valkeyBackend) GetWorkflowTask(ctx context.Context, queues []workflow.Queue) (*backend.WorkflowTask, error) { + if err := scheduleFutureEvents(ctx, vb); err != nil { + return nil, fmt.Errorf("scheduling future events: %w", err) + } + + // Try to get a workflow task, this locks the instance when it dequeues one + instanceTask, err := vb.workflowQueue.Dequeue(ctx, vb.client, queues, vb.options.WorkflowLockTimeout, vb.options.BlockTimeout) + if err != nil { + return nil, err + } + + if instanceTask == nil { + return nil, nil + } + + instanceState, err := readInstance(ctx, vb.client, vb.keys.instanceKeyFromSegment(instanceTask.ID)) + if err != nil { + return nil, fmt.Errorf("reading workflow instance: %w", err) + } + + // Read all pending events for this instance + msgs, err := vb.client.XRange(ctx, vb.keys.pendingEventsKey(instanceState.Instance), "-", "+") + if err != nil { + return nil, fmt.Errorf("reading event stream: %w", err) + } + + payloadKeys := make([]string, 0, len(msgs)) + newEvents := make([]*history.Event, 0, len(msgs)) + for _, msg := range msgs { + var eventStr string + for _, field := range msg.Fields { + if field.Field == "event" { + eventStr = field.Value + break + } + } + + var event *history.Event + if err := json.Unmarshal([]byte(eventStr), &event); err != nil { + return nil, fmt.Errorf("unmarshaling event: %w", err) + } + + payloadKeys = append(payloadKeys, event.ID) + newEvents = append(newEvents, event) + } + + // Fetch event payloads + if len(payloadKeys) > 0 { + res, err := vb.client.HMGet(ctx, vb.keys.payloadKey(instanceState.Instance), payloadKeys) + if err != nil { + return nil, fmt.Errorf("reading payloads: %w", err) + } + + for i, event := range newEvents { + event.Attributes, err = history.DeserializeAttributes(event.Type, []byte(res[i].Value())) + if err != nil { + return nil, fmt.Errorf("deserializing attributes for event %v: %w", event.Type, err) + } + } + } + + return &backend.WorkflowTask{ + ID: instanceTask.TaskID, + Queue: core.Queue(instanceState.Queue), + WorkflowInstance: instanceState.Instance, + WorkflowInstanceState: instanceState.State, + Metadata: instanceState.Metadata, + LastSequenceID: instanceState.LastSequenceID, + NewEvents: newEvents, + CustomData: msgs[len(msgs)-1].ID, + }, nil +} + +func (vb *valkeyBackend) ExtendWorkflowTask(ctx context.Context, task *backend.WorkflowTask) error { + return vb.workflowQueue.Extend(ctx, vb.client, task.Queue, task.ID) +} + +func (vb *valkeyBackend) CompleteWorkflowTask( + ctx context.Context, + task *backend.WorkflowTask, + state core.WorkflowInstanceState, + executedEvents, activityEvents, timerEvents []*history.Event, + workflowEvents []*history.WorkflowEvent, +) error { + keys := make([]string, 0) + args := make([]string, 0) + + instance := task.WorkflowInstance + + queueKeys := vb.workflowQueue.Keys(task.Queue) + keys = append(keys, + vb.keys.instanceKey(instance), + vb.keys.historyKey(instance), + vb.keys.pendingEventsKey(instance), + vb.keys.payloadKey(instance), + vb.keys.futureEventsKey(), + vb.keys.instancesActive(), + vb.keys.instancesByCreation(), + queueKeys.SetKey, + queueKeys.StreamKey, + vb.workflowQueue.queueSetKey, + ) + args = append(args, vb.keys.prefix, instanceSegment(instance)) + + // Add executed events to the history + args = append(args, fmt.Sprintf("%d", len(executedEvents))) + + for _, event := range executedEvents { + eventData, payloadData, err := marshalEvent(event) + if err != nil { + return err + } + + args = append(args, event.ID, eventData, payloadData, fmt.Sprintf("%d", event.SequenceID)) + } + + // Remove executed pending events + lastPendingEventMessageID := task.CustomData.(string) + args = append(args, lastPendingEventMessageID) + + // Update instance state and update active execution + now := time.Now().UTC() + nowStr := now.Format(time.RFC3339) + nowUnix := now.Unix() + args = append( + args, + nowStr, + fmt.Sprintf("%d", nowUnix), + fmt.Sprintf("%d", int(state)), + fmt.Sprintf("%d", int(core.WorkflowInstanceStateContinuedAsNew)), + fmt.Sprintf("%d", int(core.WorkflowInstanceStateFinished)), + ) + keys = append(keys, vb.keys.activeInstanceExecutionKey(instance.InstanceID)) + + // Remove canceled timers + timersToCancel := make([]*history.Event, 0) + for _, event := range executedEvents { + switch event.Type { + case history.EventType_TimerCanceled: + timersToCancel = append(timersToCancel, event) + default: + return fmt.Errorf("unexpected event type %v", event.Type) + } + } + + args = append(args, fmt.Sprintf("%d", len(timersToCancel))) + for _, event := range timersToCancel { + keys = append(keys, vb.keys.futureEventKey(instance, event.ScheduleEventID)) + } + + // Schedule timers + args = append(args, fmt.Sprintf("%d", len(timerEvents))) + for _, timerEvent := range timerEvents { + eventData, payloadEventData, err := marshalEvent(timerEvent) + if err != nil { + return err + } + + args = append(args, timerEvent.ID, strconv.FormatInt(timerEvent.VisibleAt.UnixMilli(), 10), eventData, payloadEventData) + keys = append(keys, vb.keys.futureEventKey(instance, timerEvent.ScheduleEventID)) + } + + // Schedule activities + args = append(args, fmt.Sprintf("%d", len(activityEvents))) + for _, activityEvent := range activityEvents { + a := activityEvent.Attributes.(*history.ActivityScheduledAttributes) + queue := a.Queue + if queue == "" { + // Default to workflow queue + queue = task.Queue + } + + activityData, err := json.Marshal(&activityData{ + Instance: instance, + ID: activityEvent.ID, + Event: activityEvent, + Queue: string(queue), + }) + if err != nil { + return fmt.Errorf("marshaling activity data: %w", err) + } + + activityQueue := string(queue) + args = append(args, activityQueue, activityEvent.ID, string(activityData)) + } + + // Send new workflow events to the respective streams + groupedEvents := history.EventsByWorkflowInstance(workflowEvents) + args = append(args, fmt.Sprintf("%d", len(groupedEvents))) + for targetInstance, events := range groupedEvents { + keys = append(keys, vb.keys.instanceKey(&targetInstance), vb.keys.activeInstanceExecutionKey(targetInstance.InstanceID)) + args = append(args, instanceSegment(&targetInstance), targetInstance.InstanceID) + + // Are we creating a new workflow instance? + m := events[0] + createNewInstance := m.HistoryEvent.Type == history.EventType_WorkflowExecutionStarted + args = append(args, fmt.Sprintf("%v", createNewInstance)) + args = append(args, fmt.Sprintf("%d", len(events))) + + if createNewInstance { + a := m.HistoryEvent.Attributes.(*history.ExecutionStartedAttributes) + + queue := a.Queue + if queue == "" { + queue = task.Queue + } + + isb, err := json.Marshal(&instanceState{ + Queue: string(queue), + Instance: &targetInstance, + State: core.WorkflowInstanceStateActive, + Metadata: a.Metadata, + CreatedAt: time.Now(), + }) + if err != nil { + return fmt.Errorf("marshaling new instance state: %w", err) + } + + ib, err := json.Marshal(targetInstance) + if err != nil { + return fmt.Errorf("marshaling instance: %w", err) + } + + args = append(args, string(isb), string(ib)) + + // Create pending event for conflicts + pfe := history.NewPendingEvent(time.Now(), history.EventType_SubWorkflowFailed, &history.SubWorkflowFailedAttributes{ + Error: workflowerrors.FromError(backend.ErrInstanceAlreadyExists), + }, history.ScheduleEventID(m.WorkflowInstance.ParentEventID)) + eventData, payloadEventData, err := marshalEvent(pfe) + if err != nil { + return fmt.Errorf("marshaling event: %w", err) + } + + args = append(args, pfe.ID, eventData, payloadEventData) + + queueKeys := vb.workflowQueue.Keys(queue) + keys = append(keys, queueKeys.SetKey, queueKeys.StreamKey) + } else { + targetInstanceState, err := readInstance(ctx, vb.client, vb.keys.instanceKey(&targetInstance)) + if err != nil { + return fmt.Errorf("reading target instance: %w", err) + } + + queueKeys := vb.workflowQueue.Keys(core.Queue(targetInstanceState.Queue)) + keys = append(keys, queueKeys.SetKey, queueKeys.StreamKey) + } + + keys = append(keys, vb.keys.pendingEventsKey(&targetInstance), vb.keys.payloadKey(&targetInstance)) + for _, m := range events { + eventData, payloadEventData, err := marshalEvent(m.HistoryEvent) + if err != nil { + return fmt.Errorf("marshaling event: %w", err) + } + + args = append(args, m.HistoryEvent.ID, eventData, payloadEventData) + } + } + + // Complete workflow task and unlock instance. + args = append(args, task.ID, vb.workflowQueue.groupName) + + // Run script + _, err := vb.client.InvokeScriptWithOptions(ctx, completeWorkflowTaskScript, options.ScriptOptions{ + Keys: keys, + Args: args, + }) + if err != nil { + return fmt.Errorf("completing workflow task: %w", err) + } + + if state == core.WorkflowInstanceStateFinished || state == core.WorkflowInstanceStateContinuedAsNew { + // Trace workflow completion + ctx, err := (&propagators.TracingContextPropagator{}).Extract(ctx, task.Metadata) + if err != nil { + vb.options.Logger.Error("extracting tracing context", log.ErrorKey, err) + } + + // Auto expiration + expiration := vb.options.AutoExpiration + if state == core.WorkflowInstanceStateContinuedAsNew && vb.options.AutoExpirationContinueAsNew > 0 { + expiration = vb.options.AutoExpirationContinueAsNew + } + + if expiration > 0 { + if err := vb.setWorkflowInstanceExpiration(ctx, instance, expiration); err != nil { + return fmt.Errorf("setting workflow instance expiration: %w", err) + } + } + + if vb.options.RemoveContinuedAsNewInstances && state == core.WorkflowInstanceStateContinuedAsNew { + if err := vb.RemoveWorkflowInstance(ctx, instance); err != nil { + return fmt.Errorf("removing workflow instance: %w", err) + } + } + } + + return nil +} + +func marshalEvent(event *history.Event) (string, string, error) { + eventData, err := marshalEventWithoutAttributes(event) + if err != nil { + return "", "", fmt.Errorf("marshaling event payload: %w", err) + } + + payloadEventData, err := json.Marshal(event.Attributes) + if err != nil { + return "", "", fmt.Errorf("marshaling event payload: %w", err) + } + return eventData, string(payloadEventData), nil +} diff --git a/go.mod b/go.mod index 3ee7dc00..e541c2be 100644 --- a/go.mod +++ b/go.mod @@ -13,6 +13,7 @@ require ( github.com/jellydator/ttlcache/v3 v3.0.0 github.com/redis/go-redis/v9 v9.0.2 github.com/stretchr/testify v1.10.0 + github.com/valkey-io/valkey-glide/go/v2 v2.1.1 go.opentelemetry.io/otel v1.31.0 go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.31.0 go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.31.0 @@ -24,6 +25,7 @@ require ( require ( github.com/dustin/go-humanize v1.0.1 // indirect + github.com/google/go-cmp v0.7.0 // indirect github.com/grpc-ecosystem/grpc-gateway/v2 v2.22.0 // indirect github.com/jackc/pgpassfile v1.0.0 // indirect github.com/jackc/pgservicefile v0.0.0-20221227161230-091c0ba34f0a // indirect diff --git a/go.sum b/go.sum index a0e1c03b..04201178 100644 --- a/go.sum +++ b/go.sum @@ -43,8 +43,8 @@ github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69 github.com/golang-migrate/migrate/v4 v4.16.2 h1:8coYbMKUyInrFk1lfGfRovTLAW7PhWp8qQDT2iKfuoA= github.com/golang-migrate/migrate/v4 v4.16.2/go.mod h1:pfcJX4nPHaVdc5nmdCikFBWtm+UBpiZjRNNsyBbp0/o= github.com/google/go-cmp v0.5.7/go.mod h1:n+brtR0CgQNWTVd5ZUFpTBC8YFBDLK/h/bpaJ8/DtOE= -github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= -github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= +github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= github.com/google/pprof v0.0.0-20221118152302-e6195bd50e26 h1:Xim43kblpZXfIBQsbuBVKCudVG457BR2GZFIz3uw3hQ= github.com/google/pprof v0.0.0-20221118152302-e6195bd50e26/go.mod h1:dDKJzRmX4S37WGHujM7tX//fmj1uioxKzKxz3lo4HJo= github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= @@ -107,6 +107,8 @@ github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UV github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA= github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +github.com/valkey-io/valkey-glide/go/v2 v2.1.1 h1:78eoWXIYLbse0ZpspKRMwbREj0+Tkoc/qkSR8H9iRsc= +github.com/valkey-io/valkey-glide/go/v2 v2.1.1/go.mod h1:LK5zmODJa5xnxZndarh1trntExb3GVGJXz4GwDCagho= go.opentelemetry.io/otel v1.31.0 h1:NsJcKPIW0D0H3NgzPDHmo0WW6SptzPdqg/L1zsIm2hY= go.opentelemetry.io/otel v1.31.0/go.mod h1:O0C14Yl9FgkjqcCZAsE053C13OaddMYr/hz6clDkEJE= go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.31.0 h1:K0XaT3DwHAcV4nKLzcQvwAgSyisUghWoY20I7huthMk= From 3cdb78e4edbd2415654b0c900323619969ebc84d Mon Sep 17 00:00:00 2001 From: Derk Schooltink Date: Tue, 25 Nov 2025 15:40:15 +0100 Subject: [PATCH 02/23] add queue tests for valkey integration --- backend/valkey/activity.go | 4 +- backend/valkey/instance.go | 2 +- backend/valkey/keys_test.go | 24 ++++ backend/valkey/options.go | 24 ++-- backend/valkey/queue.go | 36 +++--- backend/valkey/queue_test.go | 243 +++++++++++++++++++++++++++++++++++ backend/valkey/valkey.go | 13 +- 7 files changed, 303 insertions(+), 43 deletions(-) create mode 100644 backend/valkey/keys_test.go create mode 100644 backend/valkey/queue_test.go diff --git a/backend/valkey/activity.go b/backend/valkey/activity.go index 733a5123..6ac726f5 100644 --- a/backend/valkey/activity.go +++ b/backend/valkey/activity.go @@ -42,7 +42,7 @@ func (vb *valkeyBackend) ExtendActivityTask(ctx context.Context, task *backend.A } func (vb *valkeyBackend) CompleteActivityTask(ctx context.Context, task *backend.ActivityTask, result *history.Event) error { - instanceState, err := readInstance(ctx, vb.client, vb.keys.instanceKey(task.WorkflowInstance)) + instance, err := readInstance(ctx, vb.client, vb.keys.instanceKey(task.WorkflowInstance)) if err != nil { return err } @@ -54,7 +54,7 @@ func (vb *valkeyBackend) CompleteActivityTask(ctx context.Context, task *backend } activityQueueKeys := vb.activityQueue.Keys(task.Queue) - workflowQueueKeys := vb.workflowQueue.Keys(workflow.Queue(instanceState.Queue)) + workflowQueueKeys := vb.workflowQueue.Keys(workflow.Queue(instance.Queue)) _, err = vb.client.InvokeScriptWithOptions(ctx, completeActivityTaskScript, options.ScriptOptions{ Keys: []string{ diff --git a/backend/valkey/instance.go b/backend/valkey/instance.go index e816e737..dbcd37d6 100644 --- a/backend/valkey/instance.go +++ b/backend/valkey/instance.go @@ -192,7 +192,7 @@ func (vb *valkeyBackend) RemoveWorkflowInstance(ctx context.Context, instance *c return vb.deleteInstance(ctx, instance) } -func (vb *valkeyBackend) RemoveWorkflowInstances(ctx context.Context, options ...backend.RemovalOption) error { +func (vb *valkeyBackend) RemoveWorkflowInstances(_ context.Context, _ ...backend.RemovalOption) error { return backend.ErrNotSupported{ Message: "not supported, use auto-expiration", } diff --git a/backend/valkey/keys_test.go b/backend/valkey/keys_test.go new file mode 100644 index 00000000..867c045d --- /dev/null +++ b/backend/valkey/keys_test.go @@ -0,0 +1,24 @@ +package valkey + +import ( + "testing" + + "github.com/stretchr/testify/require" +) + +func Test_newKeys(t *testing.T) { + t.Run("WithEmptyPrefix", func(t *testing.T) { + k := newKeys("") + require.Empty(t, k.prefix) + }) + + t.Run("WithNonEmptyPrefixWithoutColon", func(t *testing.T) { + k := newKeys("prefix") + require.Equal(t, "prefix:", k.prefix) + }) + + t.Run("WithNonEmptyPrefixWithColon", func(t *testing.T) { + k := newKeys("prefix:") + require.Equal(t, "prefix:", k.prefix) + }) +} diff --git a/backend/valkey/options.go b/backend/valkey/options.go index b3a914c5..37404983 100644 --- a/backend/valkey/options.go +++ b/backend/valkey/options.go @@ -6,7 +6,7 @@ import ( "github.com/cschleiden/go-workflows/backend" ) -type ValkeyOptions struct { +type Options struct { *backend.Options BlockTimeout time.Duration @@ -17,26 +17,26 @@ type ValkeyOptions struct { KeyPrefix string } -type ValkeyBackendOption func(*ValkeyOptions) +type BackendOption func(*Options) // WithKeyPrefix sets the prefix for all keys used in the Valkey backend. -func WithKeyPrefix(prefix string) ValkeyBackendOption { - return func(o *ValkeyOptions) { +func WithKeyPrefix(prefix string) BackendOption { + return func(o *Options) { o.KeyPrefix = prefix } } // WithBlockTimeout sets the timeout for blocking operations like dequeuing a workflow or activity task -func WithBlockTimeout(timeout time.Duration) ValkeyBackendOption { - return func(o *ValkeyOptions) { +func WithBlockTimeout(timeout time.Duration) BackendOption { + return func(o *Options) { o.BlockTimeout = timeout } } // WithAutoExpiration sets the duration after which finished runs will expire from the data store. // If set to 0 (default), runs will never expire and need to be manually removed. -func WithAutoExpiration(expireFinishedRunsAfter time.Duration) ValkeyBackendOption { - return func(o *ValkeyOptions) { +func WithAutoExpiration(expireFinishedRunsAfter time.Duration) BackendOption { + return func(o *Options) { o.AutoExpiration = expireFinishedRunsAfter } } @@ -44,14 +44,14 @@ func WithAutoExpiration(expireFinishedRunsAfter time.Duration) ValkeyBackendOpti // WithAutoExpirationContinueAsNew sets the duration after which runs that were completed with `ContinueAsNew` // automatically expire. // If set to 0 (default), the overall expiration setting set with `WithAutoExpiration` will be used. -func WithAutoExpirationContinueAsNew(expireContinuedAsNewRunsAfter time.Duration) ValkeyBackendOption { - return func(o *ValkeyOptions) { +func WithAutoExpirationContinueAsNew(expireContinuedAsNewRunsAfter time.Duration) BackendOption { + return func(o *Options) { o.AutoExpirationContinueAsNew = expireContinuedAsNewRunsAfter } } -func WithBackendOptions(opts ...backend.BackendOption) ValkeyBackendOption { - return func(o *ValkeyOptions) { +func WithBackendOptions(opts ...backend.BackendOption) BackendOption { + return func(o *Options) { for _, opt := range opts { opt(o.Options) } diff --git a/backend/valkey/queue.go b/backend/valkey/queue.go index 63356e39..7d24de68 100644 --- a/backend/valkey/queue.go +++ b/backend/valkey/queue.go @@ -38,13 +38,12 @@ type KeyInfo struct { SetKey string } -func newTaskQueue[T any](ctx context.Context, client glide.Client, keyPrefix string, tasktype string, workerName string) (*taskQueue[T], error) { +func newTaskQueue[T any](keyPrefix, tasktype, workerName string) (*taskQueue[T], error) { // Ensure the key prefix ends with a colon if keyPrefix != "" && keyPrefix[len(keyPrefix)-1] != ':' { keyPrefix += ":" } - // Use provided worker name or generate UUID if empty if workerName == "" { workerName = uuid.NewString() } @@ -63,12 +62,8 @@ func newTaskQueue[T any](ctx context.Context, client glide.Client, keyPrefix str func (q *taskQueue[T]) Prepare(ctx context.Context, client glide.Client, queues []workflow.Queue) error { for _, queue := range queues { streamKey := q.Keys(queue).StreamKey - groupName := q.groupName - - // Try to create consumer group - _, err := client.XGroupCreateWithOptions(ctx, streamKey, groupName, "0", options.XGroupCreateOptions{MkStream: true}) - if err != nil { - // Group might already exist, which is fine + if _, err := client.XGroupCreateWithOptions(ctx, streamKey, q.groupName, "0", options.XGroupCreateOptions{MkStream: true}); err != nil { + // Group might already exist, which is fine, consider prepare successful if !strings.Contains(err.Error(), "BUSYGROUP") { return fmt.Errorf("preparing queue %s: %w", queue, err) } @@ -92,7 +87,6 @@ func (q *taskQueue[T]) Size(ctx context.Context, client glide.Client) (map[workf } res := map[workflow.Queue]int64{} - for queueSetKey := range members { size, err := client.SCard(ctx, queueSetKey) if err != nil { @@ -101,9 +95,11 @@ func (q *taskQueue[T]) Size(ctx context.Context, client glide.Client) (map[workf // Parse queue name from key queueName := strings.TrimPrefix(queueSetKey, q.keyPrefix) - queueName = strings.Split(queueName, ":")[1] // queue name is the third part of the key (0-indexed) - - queue := workflow.Queue(queueName) + parts := strings.Split(queueName, ":") // task-set:: + if len(parts) < 3 { + return nil, fmt.Errorf("unexpected set key format: %s", queueSetKey) + } + queue := workflow.Queue(parts[1]) res[queue] = size } @@ -157,10 +153,10 @@ func (q *taskQueue[T]) Dequeue(ctx context.Context, client glide.Client, queues } // Check for new tasks - var keyAndIds map[string]string + keyAndIds := make(map[string]string) for _, queue := range queues { - keys := q.Keys(queue) - keyAndIds[keys.StreamKey] = keys.SetKey + keyInfo := q.Keys(queue) + keyAndIds[keyInfo.StreamKey] = ">" } // Try to dequeue from all given queues @@ -200,10 +196,10 @@ func (q *taskQueue[T]) Extend(ctx context.Context, client glide.Client, queue wo } func (q *taskQueue[T]) Complete(ctx context.Context, client glide.Client, queue workflow.Queue, taskID string) error { - keys := q.Keys(queue) + keyInfo := q.Keys(queue) // Get the task to find the ID - msgs, err := client.XRange(ctx, keys.StreamKey, options.NewStreamBoundary(taskID, true), options.NewStreamBoundary(taskID, true)) + msgs, err := client.XRange(ctx, keyInfo.StreamKey, options.NewStreamBoundary(taskID, true), options.NewStreamBoundary(taskID, true)) if err != nil { return fmt.Errorf("completing task: %w", err) } @@ -221,19 +217,19 @@ func (q *taskQueue[T]) Complete(ctx context.Context, client glide.Client, queue } // Remove from set - _, err = client.SRem(ctx, keys.SetKey, []string{id}) + _, err = client.SRem(ctx, keyInfo.SetKey, []string{id}) if err != nil { return fmt.Errorf("completing task: %w", err) } // Acknowledge in consumer group - _, err = client.XAck(ctx, keys.StreamKey, q.groupName, []string{taskID}) + _, err = client.XAck(ctx, keyInfo.StreamKey, q.groupName, []string{taskID}) if err != nil { return fmt.Errorf("completing task: %w", err) } // Delete from stream - _, err = client.XDel(ctx, keys.StreamKey, []string{taskID}) + _, err = client.XDel(ctx, keyInfo.StreamKey, []string{taskID}) if err != nil { return fmt.Errorf("completing task: %w", err) } diff --git a/backend/valkey/queue_test.go b/backend/valkey/queue_test.go new file mode 100644 index 00000000..06640ee1 --- /dev/null +++ b/backend/valkey/queue_test.go @@ -0,0 +1,243 @@ +package valkey + +import ( + "context" + "testing" + "time" + + "github.com/cschleiden/go-workflows/core" + "github.com/cschleiden/go-workflows/workflow" + "github.com/stretchr/testify/assert" + "github.com/valkey-io/valkey-glide/go/v2" + "github.com/valkey-io/valkey-glide/go/v2/config" +) + +func Test_TaskQueue(t *testing.T) { + // These tests rely on a Valkey server on localhost:6379. + // Skip when running with -short. + if testing.Short() { + t.Skip() + } + + taskType := "taskType" + + cfg := config.NewClientConfiguration(). + WithAddress(&config.NodeAddress{ + Host: "localhost", + Port: 6379, + }). + WithDatabaseId(1). + WithCredentials(config.NewServerCredentials("", "ValkeyPassw0rd")) + + // Create client (update the client construction if API changes) + client, err := glide.NewClient(cfg) + assert.NoError(t, err) + t.Cleanup(func() { client.Close() }) + + lockTimeout := time.Millisecond * 10 + blockTimeout := time.Millisecond * 10 + + tests := []struct { + name string + f func(t *testing.T, q *taskQueue[any]) + }{ + { + name: "Simple enqueue/dequeue", + f: func(t *testing.T, q *taskQueue[any]) { + ctx := context.Background() + + assert.NoError(t, q.Enqueue(ctx, *client, workflow.QueueDefault, "t1", nil)) + + task, err := q.Dequeue(ctx, *client, []workflow.Queue{workflow.QueueDefault}, lockTimeout, blockTimeout) + assert.NoError(t, err) + assert.NotNil(t, task) + assert.Equal(t, "t1", task.ID) + }, + }, + { + name: "Size", + f: func(t *testing.T, q *taskQueue[any]) { + ctx := context.Background() + + assert.NoError(t, q.Enqueue(ctx, *client, workflow.QueueDefault, "t1", nil)) + + s1, err := q.Size(ctx, *client) + assert.NoError(t, err) + assert.Equal(t, map[workflow.Queue]int64{workflow.QueueDefault: 1}, s1) + }, + }, + { + name: "Guarantee uniqueness", + f: func(t *testing.T, q *taskQueue[any]) { + ctx := context.Background() + + assert.NoError(t, q.Enqueue(ctx, *client, workflow.QueueDefault, "t1", nil)) + assert.NoError(t, q.Enqueue(ctx, *client, workflow.QueueDefault, "t1", nil)) + + task, err := q.Dequeue(ctx, *client, []workflow.Queue{workflow.QueueDefault}, lockTimeout, blockTimeout) + assert.NoError(t, err) + assert.NotNil(t, task) + + assert.NoError(t, q.Complete(ctx, *client, workflow.QueueDefault, task.TaskID)) + + // After completion, the same id can be enqueued again + assert.NoError(t, q.Enqueue(ctx, *client, workflow.QueueDefault, "t1", nil)) + }, + }, + { + name: "Store custom data", + f: func(t *testing.T, _ *taskQueue[any]) { + type foo struct { + Count int `json:"count"` + Name string `json:"name"` + } + + ctx := context.Background() + + q, err := newTaskQueue[foo]("prefix", taskType, "") + assert.NoError(t, err) + + assert.NoError(t, q.Enqueue(ctx, *client, workflow.QueueDefault, "t1", &foo{ + Count: 1, + Name: "bar", + })) + + task, err := q.Dequeue(ctx, *client, []workflow.Queue{workflow.QueueDefault}, lockTimeout, blockTimeout) + assert.NoError(t, err) + assert.NotNil(t, task) + assert.Equal(t, "t1", task.ID) + assert.Equal(t, 1, task.Data.Count) + assert.Equal(t, "bar", task.Data.Name) + }, + }, + { + name: "Simple enqueue/dequeue different worker", + f: func(t *testing.T, q *taskQueue[any]) { + ctx := context.Background() + + assert.NoError(t, q.Enqueue(ctx, *client, workflow.QueueDefault, "t1", nil)) + + q2, _ := newTaskQueue[any]("prefix", taskType, "") + assert.NoError(t, err) + + // Dequeue using second worker + task, err := q2.Dequeue(ctx, *client, []workflow.Queue{workflow.QueueDefault}, lockTimeout, blockTimeout) + assert.NoError(t, err) + assert.NotNil(t, task) + assert.Equal(t, "t1", task.ID) + }, + }, + { + name: "Complete removes task", + f: func(t *testing.T, q *taskQueue[any]) { + q2, _ := newTaskQueue[any]("prefix", taskType, "") + + ctx := context.Background() + + assert.NoError(t, q.Enqueue(ctx, *client, workflow.QueueDefault, "t1", nil)) + + task, err := q.Dequeue(ctx, *client, []workflow.Queue{workflow.QueueDefault}, lockTimeout, blockTimeout) + assert.NoError(t, err) + assert.NotNil(t, task) + + // Complete task using second worker + assert.NoError(t, q2.Complete(ctx, *client, workflow.QueueDefault, task.TaskID)) + + time.Sleep(time.Millisecond * 10) + + // Try to recover using second worker; should not find anything + task2, err := q2.Dequeue(ctx, *client, []workflow.Queue{workflow.QueueDefault}, lockTimeout, blockTimeout) + assert.NoError(t, err) + assert.Nil(t, task2) + }, + }, + { + name: "Recover task", + f: func(t *testing.T, _ *taskQueue[any]) { + type taskData struct { + Count int `json:"count"` + } + q, _ := newTaskQueue[taskData]("prefix", taskType, "") + + ctx := context.Background() + + assert.NoError(t, q.Enqueue(ctx, *client, workflow.QueueDefault, "t1", &taskData{Count: 42})) + + q2, _ := newTaskQueue[taskData]("prefix", taskType, "") + assert.NoError(t, err) + + task, err := q2.Dequeue(ctx, *client, []workflow.Queue{workflow.QueueDefault}, lockTimeout, blockTimeout) + assert.NoError(t, err) + assert.NotNil(t, task) + assert.Equal(t, "t1", task.ID) + + time.Sleep(time.Millisecond * 10) + + // Assume q2 crashed, recover from other worker + recoveredTask, err := q.Dequeue(ctx, *client, []workflow.Queue{workflow.QueueDefault}, time.Millisecond*1, blockTimeout) + assert.NoError(t, err) + assert.NotNil(t, recoveredTask) + assert.Equal(t, task, recoveredTask) + }, + }, + { + name: "Extending task prevents recovering", + f: func(t *testing.T, q *taskQueue[any]) { + ctx := context.Background() + + assert.NoError(t, q.Enqueue(ctx, *client, workflow.QueueDefault, "t1", nil)) + + // Create second worker (with different name) + q2, _ := newTaskQueue[any]("prefix", taskType, "") + assert.NoError(t, err) + + task, err := q2.Dequeue(ctx, *client, []workflow.Queue{workflow.QueueDefault}, lockTimeout, blockTimeout) + assert.NoError(t, err) + assert.NotNil(t, task) + assert.Equal(t, "t1", task.ID) + + time.Sleep(time.Millisecond * 5) + + assert.NoError(t, q2.Extend(ctx, *client, workflow.QueueDefault, task.TaskID)) + + // Use large lock timeout; should not recover + recoveredTask, err := q.Dequeue(ctx, *client, []workflow.Queue{workflow.QueueDefault}, time.Second*2, blockTimeout) + assert.NoError(t, err) + assert.Nil(t, recoveredTask) + }, + }, + { + name: "Will only dequeue from given queue", + f: func(t *testing.T, q *taskQueue[any]) { + ctx := context.Background() + + assert.NoError(t, q.Enqueue(ctx, *client, workflow.QueueDefault, "t1", nil)) + + assert.NoError(t, q.Prepare(ctx, *client, []workflow.Queue{core.QueueSystem, workflow.QueueDefault})) + + task, err := q.Dequeue(ctx, *client, []workflow.Queue{core.QueueSystem}, lockTimeout, blockTimeout) + assert.NoError(t, err) + assert.Nil(t, task) + + task, err = q.Dequeue(ctx, *client, []workflow.Queue{workflow.QueueDefault}, lockTimeout, blockTimeout) + assert.NoError(t, err) + assert.NotNil(t, task) + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + ctx := context.Background() + // Best-effort cleanup between tests + _, _ = client.FlushDB(ctx) + + q, err := newTaskQueue[any]("prefix", taskType, "") + assert.NoError(t, err) + + assert.NoError(t, q.Prepare(ctx, *client, []workflow.Queue{workflow.QueueDefault})) + + tt.f(t, q) + }) + } +} diff --git a/backend/valkey/valkey.go b/backend/valkey/valkey.go index a25b4b24..01ffab55 100644 --- a/backend/valkey/valkey.go +++ b/backend/valkey/valkey.go @@ -1,7 +1,6 @@ package valkey import ( - "context" "embed" "fmt" "io/fs" @@ -33,9 +32,9 @@ var ( signalWorkflowScript options.Script ) -func NewValkeyBackend(client glide.Client, opts ...ValkeyBackendOption) (backend.Backend, error) { +func NewValkeyBackend(client glide.Client, opts ...BackendOption) (backend.Backend, error) { // Default options - vopts := &ValkeyOptions{ + vopts := &Options{ Options: backend.ApplyOptions(), BlockTimeout: time.Second * 2, } @@ -44,14 +43,12 @@ func NewValkeyBackend(client glide.Client, opts ...ValkeyBackendOption) (backend opt(vopts) } - ctx := context.Background() - - workflowQueue, err := newTaskQueue[workflowData](ctx, client, vopts.KeyPrefix, "workflows", vopts.WorkerName) + workflowQueue, err := newTaskQueue[workflowData](vopts.KeyPrefix, "workflows", vopts.WorkerName) if err != nil { return nil, fmt.Errorf("creating workflow task queue: %w", err) } - activityQueue, err := newTaskQueue[activityData](ctx, client, vopts.KeyPrefix, "activities", vopts.WorkerName) + activityQueue, err := newTaskQueue[activityData](vopts.KeyPrefix, "activities", vopts.WorkerName) if err != nil { return nil, fmt.Errorf("creating activity task queue: %w", err) } @@ -98,7 +95,7 @@ func loadScripts(scriptMapping map[string]*options.Script) error { type valkeyBackend struct { client glide.Client - options *ValkeyOptions + options *Options keys *keys workflowQueue *taskQueue[workflowData] activityQueue *taskQueue[activityData] From 08de4daba4f6619dcfe446248db02015c353c776 Mon Sep 17 00:00:00 2001 From: Derk Schooltink Date: Tue, 25 Nov 2025 15:40:16 +0100 Subject: [PATCH 03/23] add workflow test for valkey integration --- .github/workflows/go.yml | 40 ++++++++++++++++++++++++++++++++++++++++ .gitignore | 1 - 2 files changed, 40 insertions(+), 1 deletion(-) diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml index 35ffd304..f793ce8c 100644 --- a/.github/workflows/go.yml +++ b/.github/workflows/go.yml @@ -86,6 +86,46 @@ jobs: ${{ github.workspace }}/report.xml if: always() + test_valkey: + runs-on: ubuntu-latest + needs: build + + steps: + - uses: actions/checkout@v4 + + - name: Set up Go + uses: actions/setup-go@v5 + with: + go-version: 1.24 + check-latest: true + cache: true + + - name: Start Valkey (Docker) + run: | + docker run -d --name valkey -p 6379:6379 valkey/valkey:latest valkey-server --requirepass ValkeyPassw0rd + + - name: Wait for Valkey readiness + run: | + for i in {1..60}; do + if docker exec valkey valkey-cli -a RedisPassw0rd PING | grep -q PONG; then + echo "Valkey is ready"; exit 0; fi; + sleep 1; + done + echo "Valkey did not become ready in time"; + docker logs valkey || true + exit 1 + + - name: Tests (valkey backend, integration) + run: | + go test -tags=valkey_integration -timeout 240s -race -count 1 -v github.com/cschleiden/go-workflows/backend/valkey 2>&1 | go tool go-junit-report -set-exit-code -iocopy -out "${{ github.workspace }}/report.xml" + + - name: Test Summary + uses: test-summary/action@v2 + with: + paths: | + ${{ github.workspace }}/report.xml + if: always() + test_sqlite: runs-on: ubuntu-latest needs: build diff --git a/.gitignore b/.gitignore index 17f7e9d3..4f21a677 100644 --- a/.gitignore +++ b/.gitignore @@ -12,4 +12,3 @@ custom-gcl # Docs build artifacts docs/build/ -.aider* From c487fc77f6aba44690f090956d83a4092e952f66 Mon Sep 17 00:00:00 2001 From: Derk Schooltink Date: Tue, 25 Nov 2025 15:40:16 +0100 Subject: [PATCH 04/23] change type of NewValkeyBackend to be compatible with signal backend interface --- backend/valkey/valkey.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/valkey/valkey.go b/backend/valkey/valkey.go index 01ffab55..9f1c3ac6 100644 --- a/backend/valkey/valkey.go +++ b/backend/valkey/valkey.go @@ -32,7 +32,7 @@ var ( signalWorkflowScript options.Script ) -func NewValkeyBackend(client glide.Client, opts ...BackendOption) (backend.Backend, error) { +func NewValkeyBackend(client glide.Client, opts ...BackendOption) (*valkeyBackend, error) { // Default options vopts := &Options{ Options: backend.ApplyOptions(), From 728e4e9f4b55f88db7be0ccc4198e811533e3bb3 Mon Sep 17 00:00:00 2001 From: Derk Schooltink Date: Tue, 25 Nov 2025 15:40:16 +0100 Subject: [PATCH 05/23] attempt to solve crossslot problems in valkey clusters when dequeueing --- backend/valkey/queue.go | 30 ++++++++++++++++++++++-------- 1 file changed, 22 insertions(+), 8 deletions(-) diff --git a/backend/valkey/queue.go b/backend/valkey/queue.go index 7d24de68..bd3e32cf 100644 --- a/backend/valkey/queue.go +++ b/backend/valkey/queue.go @@ -20,6 +20,11 @@ type taskQueue[T any] struct { groupName string workerName string queueSetKey string + // hashTag is a Redis Cluster hash tag ensuring all keys used together + // (across different queues for the same task type) map to the same slot. + // This avoids CrossSlot errors when Valkey is running in clustered/serverless modes + // and XREADGROUP is called on multiple stream keys. + hashTag string } type TaskItem[T any] struct { @@ -48,12 +53,22 @@ func newTaskQueue[T any](keyPrefix, tasktype, workerName string) (*taskQueue[T], workerName = uuid.NewString() } + // Use a stable Redis Cluster hash tag so that all keys for this task type + // hash to the same slot regardless of the specific queue name. Only the + // substring within {...} is used for hashing. + // Example generated keys: + // {task:}:task-stream: + // {task:}:task-set: + // {task:}::queues + hashTag := fmt.Sprintf("{task:%s}", tasktype) + tq := &taskQueue[T]{ keyPrefix: keyPrefix, tasktype: tasktype, groupName: "task-workers", workerName: workerName, - queueSetKey: fmt.Sprintf("%s%s:queues", keyPrefix, tasktype), + queueSetKey: fmt.Sprintf("%s%s:%s:queues", keyPrefix, hashTag, tasktype), + hashTag: hashTag, } return tq, nil @@ -75,8 +90,8 @@ func (q *taskQueue[T]) Prepare(ctx context.Context, client glide.Client, queues func (q *taskQueue[T]) Keys(queue workflow.Queue) KeyInfo { return KeyInfo{ - StreamKey: fmt.Sprintf("%stask-stream:%s:%s", q.keyPrefix, queue, q.tasktype), - SetKey: fmt.Sprintf("%stask-set:%s:%s", q.keyPrefix, queue, q.tasktype), + StreamKey: fmt.Sprintf("%s%s:task-stream:%s", q.keyPrefix, q.hashTag, queue), + SetKey: fmt.Sprintf("%s%s:task-set:%s", q.keyPrefix, q.hashTag, queue), } } @@ -93,13 +108,12 @@ func (q *taskQueue[T]) Size(ctx context.Context, client glide.Client) (map[workf return nil, fmt.Errorf("getting queue size: %w", err) } - // Parse queue name from key - queueName := strings.TrimPrefix(queueSetKey, q.keyPrefix) - parts := strings.Split(queueName, ":") // task-set:: - if len(parts) < 3 { + trimmed := strings.TrimPrefix(queueSetKey, q.keyPrefix) + lastIdx := strings.LastIndex(trimmed, ":") + if lastIdx == -1 || lastIdx == len(trimmed)-1 { return nil, fmt.Errorf("unexpected set key format: %s", queueSetKey) } - queue := workflow.Queue(parts[1]) + queue := workflow.Queue(trimmed[lastIdx+1:]) res[queue] = size } From 87708bcca2d9e34e319e6ae7a5e340c7c75c7cb1 Mon Sep 17 00:00:00 2001 From: Derk Schooltink Date: Tue, 25 Nov 2025 15:40:16 +0100 Subject: [PATCH 06/23] swap glide SDK for valkey-go SDK --- backend/redis/redis.go | 4 +- backend/valkey/activity.go | 38 ++++----- backend/valkey/delete.go | 22 +++-- backend/valkey/diagnostics.go | 31 ++----- backend/valkey/events_future.go | 9 +- backend/valkey/expire.go | 30 +++---- backend/valkey/instance.go | 109 +++++++++++------------- backend/valkey/queue.go | 142 ++++++++++++++++++-------------- backend/valkey/queue_test.go | 90 ++++++++++---------- backend/valkey/signal.go | 15 +--- backend/valkey/stats.go | 2 +- backend/valkey/valkey.go | 34 ++++---- backend/valkey/workflow.go | 26 +++--- go.mod | 3 +- go.sum | 6 +- 15 files changed, 260 insertions(+), 301 deletions(-) diff --git a/backend/redis/redis.go b/backend/redis/redis.go index 379f4194..dabda50f 100644 --- a/backend/redis/redis.go +++ b/backend/redis/redis.go @@ -19,7 +19,7 @@ import ( var _ backend.Backend = (*redisBackend)(nil) //go:embed scripts -var luaScripts embed.FS +var Luas embed.FS var ( createWorkflowInstanceCmd *redis.Script @@ -85,7 +85,7 @@ func NewRedisBackend(client redis.UniversalClient, opts ...RedisBackendOption) ( func loadScripts(ctx context.Context, rdb redis.UniversalClient, cmdMapping map[string]**redis.Script) error { for scriptFile, cmd := range cmdMapping { - scriptContent, err := fs.ReadFile(luaScripts, "scripts/"+scriptFile) + scriptContent, err := fs.ReadFile(Luas, "scripts/"+scriptFile) if err != nil { return fmt.Errorf("reading Lua script %s: %w", scriptFile, err) } diff --git a/backend/valkey/activity.go b/backend/valkey/activity.go index 6ac726f5..33a9c2b1 100644 --- a/backend/valkey/activity.go +++ b/backend/valkey/activity.go @@ -7,7 +7,6 @@ import ( "github.com/cschleiden/go-workflows/backend" "github.com/cschleiden/go-workflows/backend/history" "github.com/cschleiden/go-workflows/workflow" - "github.com/valkey-io/valkey-glide/go/v2/options" ) func (vb *valkeyBackend) PrepareActivityQueues(ctx context.Context, queues []workflow.Queue) error { @@ -56,26 +55,23 @@ func (vb *valkeyBackend) CompleteActivityTask(ctx context.Context, task *backend activityQueueKeys := vb.activityQueue.Keys(task.Queue) workflowQueueKeys := vb.workflowQueue.Keys(workflow.Queue(instance.Queue)) - _, err = vb.client.InvokeScriptWithOptions(ctx, completeActivityTaskScript, options.ScriptOptions{ - Keys: []string{ - activityQueueKeys.SetKey, - activityQueueKeys.StreamKey, - vb.keys.pendingEventsKey(task.WorkflowInstance), - vb.keys.payloadKey(task.WorkflowInstance), - vb.workflowQueue.queueSetKey, - workflowQueueKeys.SetKey, - workflowQueueKeys.StreamKey, - }, - Args: []string{ - task.ID, - vb.activityQueue.groupName, - result.ID, - eventData, - payload, - vb.workflowQueue.groupName, - instanceSegment(task.WorkflowInstance), - }, - }) + err = completeActivityTaskScript.Exec(ctx, vb.client, []string{ + activityQueueKeys.SetKey, + activityQueueKeys.StreamKey, + vb.keys.pendingEventsKey(task.WorkflowInstance), + vb.keys.payloadKey(task.WorkflowInstance), + vb.workflowQueue.queueSetKey, + workflowQueueKeys.SetKey, + workflowQueueKeys.StreamKey, + }, []string{ + task.ID, + vb.activityQueue.groupName, + result.ID, + eventData, + payload, + vb.workflowQueue.groupName, + instanceSegment(task.WorkflowInstance), + }).Error() if err != nil { return fmt.Errorf("completing activity task: %w", err) diff --git a/backend/valkey/delete.go b/backend/valkey/delete.go index 4cdbbb3a..9e23a450 100644 --- a/backend/valkey/delete.go +++ b/backend/valkey/delete.go @@ -5,7 +5,6 @@ import ( "fmt" "github.com/cschleiden/go-workflows/core" - "github.com/valkey-io/valkey-glide/go/v2/options" ) // deleteInstance deletes an instance from Valkey. It does not attempt to remove any future events or pending @@ -13,17 +12,16 @@ import ( // // Note: might want to revisit this in the future if we want to support removing hung instances. func (vb *valkeyBackend) deleteInstance(ctx context.Context, instance *core.WorkflowInstance) error { - _, err := vb.client.InvokeScriptWithOptions(ctx, deleteInstanceScript, options.ScriptOptions{ - Keys: []string{ - vb.keys.instanceKey(instance), - vb.keys.pendingEventsKey(instance), - vb.keys.historyKey(instance), - vb.keys.payloadKey(instance), - vb.keys.activeInstanceExecutionKey(instance.InstanceID), - vb.keys.instancesByCreation(), - }, - Args: []string{instanceSegment(instance)}, - }) + err := deleteInstanceScript.Exec(ctx, vb.client, []string{ + vb.keys.instanceKey(instance), + vb.keys.pendingEventsKey(instance), + vb.keys.historyKey(instance), + vb.keys.payloadKey(instance), + vb.keys.activeInstanceExecutionKey(instance.InstanceID), + vb.keys.instancesByCreation(), + }, []string{ + instanceSegment(instance), + }).Error() if err != nil { return fmt.Errorf("failed to delete instance: %w", err) diff --git a/backend/valkey/diagnostics.go b/backend/valkey/diagnostics.go index 6fc0b575..c9a1dc57 100644 --- a/backend/valkey/diagnostics.go +++ b/backend/valkey/diagnostics.go @@ -8,34 +8,21 @@ import ( "github.com/cschleiden/go-workflows/core" "github.com/cschleiden/go-workflows/diag" "github.com/cschleiden/go-workflows/internal/log" - "github.com/valkey-io/valkey-glide/go/v2/constants" - "github.com/valkey-io/valkey-glide/go/v2/options" ) var _ diag.Backend = (*valkeyBackend)(nil) func (vb *valkeyBackend) GetWorkflowInstances(ctx context.Context, afterInstanceID, afterExecutionID string, count int) ([]*diag.WorkflowInstanceRef, error) { - start := options.NewInclusiveScoreBoundary(0) - end := options.NewInfiniteScoreBoundary(constants.PositiveInfinity) - - zrangeInput := &options.RangeByScore{ - Start: start, - End: end, - Reverse: true, - Limit: &options.Limit{ - Offset: 0, - Count: int64(count), - }, - } + zrangeCmd := vb.client.B().Zrange().Key(vb.keys.instancesByCreation()).Min("0").Max("-1").Rev().Limit(0, int64(count)) if afterInstanceID != "" { afterSegmentID := instanceSegment(core.NewWorkflowInstance(afterInstanceID, afterExecutionID)) - scores, err := vb.client.ZMScore(ctx, vb.keys.instancesByCreation(), []string{afterSegmentID}) + scores, err := vb.client.Do(ctx, vb.client.B().Zscore().Key(vb.keys.instancesByCreation()).Member(afterSegmentID).Build()).AsFloat64() if err != nil { return nil, fmt.Errorf("getting instance score for %v: %w", afterSegmentID, err) } - if len(scores) == 0 { + if scores == 0 { vb.Options().Logger.Error("could not find instance %v", log.NamespaceKey+".valkey.afterInstanceID", afterInstanceID, log.NamespaceKey+".valkey.afterExecutionID", afterExecutionID, @@ -43,11 +30,10 @@ func (vb *valkeyBackend) GetWorkflowInstances(ctx context.Context, afterInstance return nil, nil } - end := options.NewScoreBoundary(scores[0].Value(), false) - zrangeInput.End = end + zrangeCmd = vb.client.B().Zrange().Key(vb.keys.instancesByCreation()).Min("-inf").Max(fmt.Sprintf("(%f", scores)).Rev().Limit(0, int64(count)) } - instanceSegments, err := vb.client.ZRange(ctx, vb.keys.instancesByCreation(), zrangeInput) + instanceSegments, err := vb.client.Do(ctx, zrangeCmd.Build()).AsStrSlice() if err != nil { return nil, fmt.Errorf("getting instances: %w", err) } @@ -61,19 +47,20 @@ func (vb *valkeyBackend) GetWorkflowInstances(ctx context.Context, afterInstance instanceKeys = append(instanceKeys, vb.keys.instanceKeyFromSegment(r)) } - instances, err := vb.client.MGet(ctx, instanceKeys) + cmd := vb.client.B().Mget().Key(instanceKeys...) + instances, err := vb.client.Do(ctx, cmd.Build()).AsStrSlice() if err != nil { return nil, fmt.Errorf("getting instances: %w", err) } instanceRefs := make([]*diag.WorkflowInstanceRef, 0, len(instances)) for _, instance := range instances { - if instance.IsNil() { + if instance == "" { continue } var state instanceState - if err := json.Unmarshal([]byte(instance.Value()), &state); err != nil { + if err := json.Unmarshal([]byte(instance), &state); err != nil { return nil, fmt.Errorf("unmarshaling instance state: %w", err) } diff --git a/backend/valkey/events_future.go b/backend/valkey/events_future.go index 1dd5ab95..d348f6a3 100644 --- a/backend/valkey/events_future.go +++ b/backend/valkey/events_future.go @@ -5,19 +5,12 @@ import ( "fmt" "strconv" "time" - - "github.com/valkey-io/valkey-glide/go/v2/options" ) func scheduleFutureEvents(ctx context.Context, vb *valkeyBackend) error { now := time.Now().UnixMilli() nowStr := strconv.FormatInt(now, 10) - _, err := vb.client.InvokeScriptWithOptions(ctx, futureEventsScript, options.ScriptOptions{ - Keys: []string{ - vb.keys.futureEventsKey(), - }, - Args: []string{nowStr, vb.keys.prefix}, - }) + err := futureEventsScript.Exec(ctx, vb.client, []string{vb.keys.futureEventsKey()}, []string{nowStr, vb.keys.prefix}).Error() if err != nil { return fmt.Errorf("checking future events: %w", err) diff --git a/backend/valkey/expire.go b/backend/valkey/expire.go index ce167caf..27f362ec 100644 --- a/backend/valkey/expire.go +++ b/backend/valkey/expire.go @@ -7,7 +7,6 @@ import ( "time" "github.com/cschleiden/go-workflows/core" - "github.com/valkey-io/valkey-glide/go/v2/options" ) func (vb *valkeyBackend) setWorkflowInstanceExpiration(ctx context.Context, instance *core.WorkflowInstance, expiration time.Duration) error { @@ -17,22 +16,19 @@ func (vb *valkeyBackend) setWorkflowInstanceExpiration(ctx context.Context, inst exp := time.Now().Add(expiration).UnixMilli() expStr := strconv.FormatInt(exp, 10) - _, err := vb.client.InvokeScriptWithOptions(ctx, expireWorkflowInstanceScript, options.ScriptOptions{ - Keys: []string{ - vb.keys.instancesByCreation(), - vb.keys.instancesExpiring(), - vb.keys.instanceKey(instance), - vb.keys.pendingEventsKey(instance), - vb.keys.historyKey(instance), - vb.keys.payloadKey(instance), - }, - Args: []string{ - nowStr, - fmt.Sprintf("%.0f", expiration.Seconds()), - expStr, - instanceSegment(instance), - }, - }) + err := expireWorkflowInstanceScript.Exec(ctx, vb.client, []string{ + vb.keys.instancesByCreation(), + vb.keys.instancesExpiring(), + vb.keys.instanceKey(instance), + vb.keys.pendingEventsKey(instance), + vb.keys.historyKey(instance), + vb.keys.payloadKey(instance), + }, []string{ + nowStr, + fmt.Sprintf("%.0f", expiration.Seconds()), + expStr, + instanceSegment(instance), + }).Error() return err } diff --git a/backend/valkey/instance.go b/backend/valkey/instance.go index dbcd37d6..c0846f9f 100644 --- a/backend/valkey/instance.go +++ b/backend/valkey/instance.go @@ -12,9 +12,7 @@ import ( "github.com/cschleiden/go-workflows/backend/metadata" "github.com/cschleiden/go-workflows/core" "github.com/cschleiden/go-workflows/workflow" - "github.com/valkey-io/valkey-glide/go/v2" - "github.com/valkey-io/valkey-glide/go/v2/constants" - "github.com/valkey-io/valkey-glide/go/v2/options" + "github.com/valkey-io/valkey-go" ) func (vb *valkeyBackend) CreateWorkflowInstance(ctx context.Context, instance *workflow.Instance, event *history.Event) error { @@ -44,28 +42,25 @@ func (vb *valkeyBackend) CreateWorkflowInstance(ctx context.Context, instance *w keyInfo := vb.workflowQueue.Keys(a.Queue) // Execute Lua script for atomic creation - result, err := vb.client.InvokeScriptWithOptions(ctx, createWorkflowInstanceScript, options.ScriptOptions{ - Keys: []string{ - vb.keys.instanceKey(instance), - vb.keys.activeInstanceExecutionKey(instance.InstanceID), - vb.keys.pendingEventsKey(instance), - vb.keys.payloadKey(instance), - vb.keys.instancesActive(), - vb.keys.instancesByCreation(), - keyInfo.SetKey, - keyInfo.StreamKey, - vb.workflowQueue.queueSetKey, - }, - Args: []string{ - instanceSegment(instance), - string(instanceState), - string(activeInstance), - event.ID, - eventData, - payloadData, - fmt.Sprintf("%d", time.Now().UTC().UnixNano()), - }, - }) + err = createWorkflowInstanceScript.Exec(ctx, vb.client, []string{ + vb.keys.instanceKey(instance), + vb.keys.activeInstanceExecutionKey(instance.InstanceID), + vb.keys.pendingEventsKey(instance), + vb.keys.payloadKey(instance), + vb.keys.instancesActive(), + vb.keys.instancesByCreation(), + keyInfo.SetKey, + keyInfo.StreamKey, + vb.workflowQueue.queueSetKey, + }, []string{ + instanceSegment(instance), + string(instanceState), + string(activeInstance), + event.ID, + eventData, + payloadData, + fmt.Sprintf("%d", time.Now().UTC().UnixNano()), + }).Error() if err != nil { if err.Error() == "ERR InstanceAlreadyExists" { @@ -74,20 +69,16 @@ func (vb *valkeyBackend) CreateWorkflowInstance(ctx context.Context, instance *w return fmt.Errorf("creating workflow instance: %w", err) } - if result == nil { - return fmt.Errorf("unexpected nil result from create workflow instance script") - } - return nil } func (vb *valkeyBackend) GetWorkflowInstanceHistory(ctx context.Context, instance *core.WorkflowInstance, lastSequenceID *int64) ([]*history.Event, error) { - boundary := options.NewInfiniteStreamBoundary(constants.NegativeInfinity) + start := "-" if lastSequenceID != nil { - boundary = options.NewStreamBoundary(strconv.FormatInt(*lastSequenceID, 10), false) + start = strconv.FormatInt(*lastSequenceID, 10) } - msgs, err := vb.client.XRange(ctx, vb.keys.historyKey(instance), boundary, "+") + msgs, err := vb.client.Do(ctx, vb.client.B().Xrange().Key(vb.keys.historyKey(instance)).Start(start).End("+").Build()).AsXRange() if err != nil { return nil, err } @@ -95,14 +86,8 @@ func (vb *valkeyBackend) GetWorkflowInstanceHistory(ctx context.Context, instanc payloadKeys := make([]string, 0, len(msgs)) events := make([]*history.Event, 0, len(msgs)) for _, msg := range msgs { - var eventStr string - for _, field := range msg.Fields { - if field.Field == "event" { - eventStr = field.Value - break - } - } - if eventStr == "" { + eventStr, ok := msg.FieldValues["event"] + if !ok || eventStr == "" { continue } @@ -116,13 +101,14 @@ func (vb *valkeyBackend) GetWorkflowInstanceHistory(ctx context.Context, instanc } if len(payloadKeys) > 0 { - res, err := vb.client.HMGet(ctx, vb.keys.payloadKey(instance), payloadKeys) + cmd := vb.client.B().Hmget().Key(vb.keys.payloadKey(instance)).Field(payloadKeys...) + res, err := vb.client.Do(ctx, cmd.Build()).AsStrSlice() if err != nil { return nil, fmt.Errorf("reading payloads: %w", err) } for i, event := range events { - event.Attributes, err = history.DeserializeAttributes(event.Type, []byte(res[i].Value())) + event.Attributes, err = history.DeserializeAttributes(event.Type, []byte(res[i])) if err != nil { return nil, fmt.Errorf("deserializing attributes for event %v: %w", event.Type, err) } @@ -157,20 +143,17 @@ func (vb *valkeyBackend) CancelWorkflowInstance(ctx context.Context, instance *c keyInfo := vb.workflowQueue.Keys(workflow.Queue(instanceState.Queue)) // Cancel instance - _, err = vb.client.InvokeScriptWithOptions(ctx, cancelWorkflowInstanceScript, options.ScriptOptions{ - Keys: []string{ - vb.keys.payloadKey(instance), - vb.keys.pendingEventsKey(instance), - keyInfo.SetKey, - keyInfo.StreamKey, - }, - Args: []string{ - event.ID, - eventData, - payloadData, - instanceSegment(instance), - }, - }) + err = cancelWorkflowInstanceScript.Exec(ctx, vb.client, []string{ + vb.keys.payloadKey(instance), + vb.keys.pendingEventsKey(instance), + keyInfo.SetKey, + keyInfo.StreamKey, + }, []string{ + event.ID, + eventData, + payloadData, + instanceSegment(instance), + }).Error() if err != nil { return fmt.Errorf("canceling workflow instance: %w", err) @@ -212,18 +195,18 @@ type instanceState struct { LastSequenceID int64 `json:"last_sequence_id,omitempty"` } -func readInstance(ctx context.Context, client glide.Client, instanceKey string) (*instanceState, error) { - val, err := client.Get(ctx, instanceKey) +func readInstance(ctx context.Context, client valkey.Client, instanceKey string) (*instanceState, error) { + val, err := client.Do(ctx, client.B().Get().Key(instanceKey).Build()).ToString() if err != nil { return nil, fmt.Errorf("reading instance: %w", err) } - if val.IsNil() { + if val == "" { return nil, backend.ErrInstanceNotFound } var state instanceState - if err := json.Unmarshal([]byte(val.Value()), &state); err != nil { + if err := json.Unmarshal([]byte(val), &state); err != nil { return nil, fmt.Errorf("unmarshaling instance state: %w", err) } @@ -231,17 +214,17 @@ func readInstance(ctx context.Context, client glide.Client, instanceKey string) } func (vb *valkeyBackend) readActiveInstanceExecution(ctx context.Context, instanceID string) (*core.WorkflowInstance, error) { - val, err := vb.client.Get(ctx, vb.keys.activeInstanceExecutionKey(instanceID)) + val, err := vb.client.Do(ctx, vb.client.B().Get().Key(vb.keys.activeInstanceExecutionKey(instanceID)).Build()).ToString() if err != nil { return nil, err } - if val.IsNil() { + if val == "" { return nil, nil } var instance *core.WorkflowInstance - if err := json.Unmarshal([]byte(val.Value()), &instance); err != nil { + if err := json.Unmarshal([]byte(val), &instance); err != nil { return nil, fmt.Errorf("unmarshaling instance: %w", err) } diff --git a/backend/valkey/queue.go b/backend/valkey/queue.go index bd3e32cf..8bc4719f 100644 --- a/backend/valkey/queue.go +++ b/backend/valkey/queue.go @@ -4,14 +4,13 @@ import ( "context" "encoding/json" "fmt" + "strconv" "strings" "time" "github.com/cschleiden/go-workflows/workflow" "github.com/google/uuid" - "github.com/valkey-io/valkey-glide/go/v2" - "github.com/valkey-io/valkey-glide/go/v2/models" - "github.com/valkey-io/valkey-glide/go/v2/options" + "github.com/valkey-io/valkey-go" ) type taskQueue[T any] struct { @@ -20,7 +19,7 @@ type taskQueue[T any] struct { groupName string workerName string queueSetKey string - // hashTag is a Redis Cluster hash tag ensuring all keys used together + // hashTag is a Valkey Cluster hash tag ensuring all keys used together // (across different queues for the same task type) map to the same slot. // This avoids CrossSlot errors when Valkey is running in clustered/serverless modes // and XREADGROUP is called on multiple stream keys. @@ -53,7 +52,7 @@ func newTaskQueue[T any](keyPrefix, tasktype, workerName string) (*taskQueue[T], workerName = uuid.NewString() } - // Use a stable Redis Cluster hash tag so that all keys for this task type + // Use a stable Valkey Cluster hash tag so that all keys for this task type // hash to the same slot regardless of the specific queue name. Only the // substring within {...} is used for hashing. // Example generated keys: @@ -74,10 +73,11 @@ func newTaskQueue[T any](keyPrefix, tasktype, workerName string) (*taskQueue[T], return tq, nil } -func (q *taskQueue[T]) Prepare(ctx context.Context, client glide.Client, queues []workflow.Queue) error { +func (q *taskQueue[T]) Prepare(ctx context.Context, client valkey.Client, queues []workflow.Queue) error { for _, queue := range queues { streamKey := q.Keys(queue).StreamKey - if _, err := client.XGroupCreateWithOptions(ctx, streamKey, q.groupName, "0", options.XGroupCreateOptions{MkStream: true}); err != nil { + err := client.Do(ctx, client.B().XgroupCreate().Key(streamKey).Group(q.groupName).Id("0").Mkstream().Build()).Error() + if err != nil { // Group might already exist, which is fine, consider prepare successful if !strings.Contains(err.Error(), "BUSYGROUP") { return fmt.Errorf("preparing queue %s: %w", queue, err) @@ -95,15 +95,15 @@ func (q *taskQueue[T]) Keys(queue workflow.Queue) KeyInfo { } } -func (q *taskQueue[T]) Size(ctx context.Context, client glide.Client) (map[workflow.Queue]int64, error) { - members, err := client.SMembers(ctx, q.queueSetKey) +func (q *taskQueue[T]) Size(ctx context.Context, client valkey.Client) (map[workflow.Queue]int64, error) { + members, err := client.Do(ctx, client.B().Smembers().Key(q.queueSetKey).Build()).AsStrSlice() if err != nil { return nil, fmt.Errorf("getting queue size: %w", err) } res := map[workflow.Queue]int64{} - for queueSetKey := range members { - size, err := client.SCard(ctx, queueSetKey) + for _, queueSetKey := range members { + size, err := client.Do(ctx, client.B().Scard().Key(queueSetKey).Build()).AsInt64() if err != nil { return nil, fmt.Errorf("getting queue size: %w", err) } @@ -120,7 +120,7 @@ func (q *taskQueue[T]) Size(ctx context.Context, client glide.Client) (map[workf return res, nil } -func (q *taskQueue[T]) Enqueue(ctx context.Context, client glide.Client, queue workflow.Queue, id string, data *T) error { +func (q *taskQueue[T]) Enqueue(ctx context.Context, client valkey.Client, queue workflow.Queue, id string, data *T) error { ds, err := json.Marshal(data) if err != nil { return err @@ -129,24 +129,20 @@ func (q *taskQueue[T]) Enqueue(ctx context.Context, client glide.Client, queue w keys := q.Keys(queue) // Add to set to track uniqueness - _, err = client.SAdd(ctx, q.queueSetKey, []string{keys.SetKey}) + err = client.Do(ctx, client.B().Sadd().Key(q.queueSetKey).Member(keys.SetKey).Build()).Error() if err != nil { return err } // Add to set for this queue - added, err := client.SAdd(ctx, keys.SetKey, []string{id}) + added, err := client.Do(ctx, client.B().Sadd().Key(keys.SetKey).Member(id).Build()).AsInt64() if err != nil { return err } // Only add to stream if it's a new task if added > 0 { - var fieldValues []models.FieldValue - fieldValues = append(fieldValues, models.FieldValue{Field: "id", Value: id}) - fieldValues = append(fieldValues, models.FieldValue{Field: "data", Value: string(ds)}) - - _, err = client.XAdd(ctx, keys.StreamKey, fieldValues) + err = client.Do(ctx, client.B().Xadd().Key(keys.StreamKey).Id("*").FieldValue().FieldValue("id", id).FieldValue("data", string(ds)).Build()).Error() if err != nil { return err } @@ -155,7 +151,7 @@ func (q *taskQueue[T]) Enqueue(ctx context.Context, client glide.Client, queue w return nil } -func (q *taskQueue[T]) Dequeue(ctx context.Context, client glide.Client, queues []workflow.Queue, lockTimeout, timeout time.Duration) (*TaskItem[T], error) { +func (q *taskQueue[T]) Dequeue(ctx context.Context, client valkey.Client, queues []workflow.Queue, lockTimeout, timeout time.Duration) (*TaskItem[T], error) { // Try to recover abandoned tasks task, err := q.recover(ctx, client, queues, lockTimeout) if err != nil { @@ -167,19 +163,20 @@ func (q *taskQueue[T]) Dequeue(ctx context.Context, client glide.Client, queues } // Check for new tasks - keyAndIds := make(map[string]string) + streamKeys := make([]string, 0, len(queues)) for _, queue := range queues { keyInfo := q.Keys(queue) - keyAndIds[keyInfo.StreamKey] = ">" + streamKeys = append(streamKeys, keyInfo.StreamKey) } // Try to dequeue from all given queues - results, err := client.XReadGroupWithOptions(ctx, q.groupName, q.workerName, keyAndIds, options.XReadGroupOptions{ - Count: 1, - Block: timeout, - }) - + cmd := client.B().Xreadgroup().Group(q.groupName, q.workerName).Streams().Key(streamKeys...).Id(">") + results, err := client.Do(ctx, cmd.Build()).AsXRead() if err != nil { + // Check if error is due to no data available (nil response) + if valkey.IsValkeyNil(err) { + return nil, nil + } return nil, fmt.Errorf("error dequeueing task: %w", err) } @@ -187,34 +184,40 @@ func (q *taskQueue[T]) Dequeue(ctx context.Context, client glide.Client, queues return nil, nil } - // Get the first entry to dequeue - var entry models.StreamEntry - for _, response := range results { - if len(response.Entries) > 0 { - entry = response.Entries[0] - break + // Get the first entry from the first stream + for _, streamResult := range results { + if len(streamResult) > 0 { + return msgToTaskItem[T](streamResult[0]) } } - return msgToTaskItem[T](entry) + return nil, nil } -func (q *taskQueue[T]) Extend(ctx context.Context, client glide.Client, queue workflow.Queue, taskID string) error { +func (q *taskQueue[T]) Extend(ctx context.Context, client valkey.Client, queue workflow.Queue, taskID string) error { // Claiming a message resets the idle timer - _, err := client.XClaim(ctx, q.Keys(queue).StreamKey, q.groupName, q.workerName, 0, []string{taskID}) + err := client.Do(ctx, client.B().Xclaim().Key(q.Keys(queue).StreamKey).Group(q.groupName).Consumer(q.workerName).MinIdleTime("0").Id(taskID).Build()).Error() if err != nil { + // Check if error is due to no data available (nil response) + if valkey.IsValkeyNil(err) { + return nil + } return fmt.Errorf("extending lease: %w", err) } return nil } -func (q *taskQueue[T]) Complete(ctx context.Context, client glide.Client, queue workflow.Queue, taskID string) error { +func (q *taskQueue[T]) Complete(ctx context.Context, client valkey.Client, queue workflow.Queue, taskID string) error { keyInfo := q.Keys(queue) // Get the task to find the ID - msgs, err := client.XRange(ctx, keyInfo.StreamKey, options.NewStreamBoundary(taskID, true), options.NewStreamBoundary(taskID, true)) + msgs, err := client.Do(ctx, client.B().Xrange().Key(keyInfo.StreamKey).Start(taskID).End(taskID).Build()).AsXRange() if err != nil { + // Check if error is due to no data available (nil response) + if valkey.IsValkeyNil(err) { + return nil + } return fmt.Errorf("completing task: %w", err) } @@ -223,27 +226,25 @@ func (q *taskQueue[T]) Complete(ctx context.Context, client glide.Client, queue } msg := msgs[0] - var id string - for _, field := range msg.Fields { - if field.Field == "id" { - id = field.Value - } + id, ok := msg.FieldValues["id"] + if !ok { + return fmt.Errorf("completing task: missing id field") } // Remove from set - _, err = client.SRem(ctx, keyInfo.SetKey, []string{id}) + err = client.Do(ctx, client.B().Srem().Key(keyInfo.SetKey).Member(id).Build()).Error() if err != nil { return fmt.Errorf("completing task: %w", err) } // Acknowledge in consumer group - _, err = client.XAck(ctx, keyInfo.StreamKey, q.groupName, []string{taskID}) + err = client.Do(ctx, client.B().Xack().Key(keyInfo.StreamKey).Group(q.groupName).Id(taskID).Build()).Error() if err != nil { return fmt.Errorf("completing task: %w", err) } // Delete from stream - _, err = client.XDel(ctx, keyInfo.StreamKey, []string{taskID}) + err = client.Do(ctx, client.B().Xdel().Key(keyInfo.StreamKey).Id(taskID).Build()).Error() if err != nil { return fmt.Errorf("completing task: %w", err) } @@ -251,41 +252,62 @@ func (q *taskQueue[T]) Complete(ctx context.Context, client glide.Client, queue return nil } -func (q *taskQueue[T]) recover(ctx context.Context, client glide.Client, queues []workflow.Queue, idleTimeout time.Duration) (*TaskItem[T], error) { +func (q *taskQueue[T]) recover(ctx context.Context, client valkey.Client, queues []workflow.Queue, idleTimeout time.Duration) (*TaskItem[T], error) { for _, queue := range queues { streamKey := q.Keys(queue).StreamKey // Try to recover abandoned tasks - msgs, err := client.XAutoClaimWithOptions(ctx, streamKey, q.groupName, q.workerName, idleTimeout, "0", options.XAutoClaimOptions{Count: 1}) + cmd := client.B().Xautoclaim().Key(streamKey).Group(q.groupName).Consumer(q.workerName).MinIdleTime(strconv.FormatInt(idleTimeout.Milliseconds(), 10)).Start("0").Count(1) + msgs, err := client.Do(ctx, cmd.Build()).ToArray() if err != nil { + // Check if error is due to no data available (nil response) + if valkey.IsValkeyNil(err) { + continue + } return nil, fmt.Errorf("recovering abandoned task: %w", err) } - if len(msgs.ClaimedEntries) > 0 { - return msgToTaskItem[T](msgs.ClaimedEntries[0]) + if len(msgs) >= 2 { + entries, _ := msgs[1].ToArray() + for _, entry := range entries { + arr, _ := entry.ToArray() + if len(arr) == 2 { + id, _ := arr[0].ToString() + fieldsArr, _ := arr[1].ToArray() + fieldValues := map[string]string{} + for i := 0; i+1 < len(fieldsArr); i += 2 { + key, _ := fieldsArr[i].ToString() + val, _ := fieldsArr[i+1].ToString() + fieldValues[key] = val + } + xEntry := valkey.XRangeEntry{ + ID: id, + FieldValues: fieldValues, + } + return msgToTaskItem[T](xEntry) + } + } } } return nil, nil } -func msgToTaskItem[T any](msg models.StreamEntry) (*TaskItem[T], error) { - var id, data string - for _, field := range msg.Fields { - if field.Field == "id" { - id = field.Value - } else if field.Field == "data" { - data = field.Value - } - } +func msgToTaskItem[T any](msg valkey.XRangeEntry) (*TaskItem[T], error) { + id, idOk := msg.FieldValues["id"] + data, dataOk := msg.FieldValues["data"] var t T - if data != "" { + if dataOk && data != "" { if err := json.Unmarshal([]byte(data), &t); err != nil { return nil, err } } + if !idOk { + return nil, fmt.Errorf("message missing id field") + } + return &TaskItem[T]{ TaskID: msg.ID, ID: id, diff --git a/backend/valkey/queue_test.go b/backend/valkey/queue_test.go index 06640ee1..156c12a7 100644 --- a/backend/valkey/queue_test.go +++ b/backend/valkey/queue_test.go @@ -8,8 +8,7 @@ import ( "github.com/cschleiden/go-workflows/core" "github.com/cschleiden/go-workflows/workflow" "github.com/stretchr/testify/assert" - "github.com/valkey-io/valkey-glide/go/v2" - "github.com/valkey-io/valkey-glide/go/v2/config" + "github.com/valkey-io/valkey-go" ) func Test_TaskQueue(t *testing.T) { @@ -21,16 +20,11 @@ func Test_TaskQueue(t *testing.T) { taskType := "taskType" - cfg := config.NewClientConfiguration(). - WithAddress(&config.NodeAddress{ - Host: "localhost", - Port: 6379, - }). - WithDatabaseId(1). - WithCredentials(config.NewServerCredentials("", "ValkeyPassw0rd")) - - // Create client (update the client construction if API changes) - client, err := glide.NewClient(cfg) + client, err := valkey.NewClient(valkey.ClientOption{ + InitAddress: []string{"localhost:6379"}, + Password: "ValkeyPassw0rd", + SelectDB: 0, + }) assert.NoError(t, err) t.Cleanup(func() { client.Close() }) @@ -46,9 +40,9 @@ func Test_TaskQueue(t *testing.T) { f: func(t *testing.T, q *taskQueue[any]) { ctx := context.Background() - assert.NoError(t, q.Enqueue(ctx, *client, workflow.QueueDefault, "t1", nil)) + assert.NoError(t, q.Enqueue(ctx, client, workflow.QueueDefault, "t1", nil)) - task, err := q.Dequeue(ctx, *client, []workflow.Queue{workflow.QueueDefault}, lockTimeout, blockTimeout) + task, err := q.Dequeue(ctx, client, []workflow.Queue{workflow.QueueDefault}, lockTimeout, blockTimeout) assert.NoError(t, err) assert.NotNil(t, task) assert.Equal(t, "t1", task.ID) @@ -59,9 +53,9 @@ func Test_TaskQueue(t *testing.T) { f: func(t *testing.T, q *taskQueue[any]) { ctx := context.Background() - assert.NoError(t, q.Enqueue(ctx, *client, workflow.QueueDefault, "t1", nil)) + assert.NoError(t, q.Enqueue(ctx, client, workflow.QueueDefault, "t1", nil)) - s1, err := q.Size(ctx, *client) + s1, err := q.Size(ctx, client) assert.NoError(t, err) assert.Equal(t, map[workflow.Queue]int64{workflow.QueueDefault: 1}, s1) }, @@ -71,17 +65,17 @@ func Test_TaskQueue(t *testing.T) { f: func(t *testing.T, q *taskQueue[any]) { ctx := context.Background() - assert.NoError(t, q.Enqueue(ctx, *client, workflow.QueueDefault, "t1", nil)) - assert.NoError(t, q.Enqueue(ctx, *client, workflow.QueueDefault, "t1", nil)) + assert.NoError(t, q.Enqueue(ctx, client, workflow.QueueDefault, "t1", nil)) + assert.NoError(t, q.Enqueue(ctx, client, workflow.QueueDefault, "t1", nil)) - task, err := q.Dequeue(ctx, *client, []workflow.Queue{workflow.QueueDefault}, lockTimeout, blockTimeout) + task, err := q.Dequeue(ctx, client, []workflow.Queue{workflow.QueueDefault}, lockTimeout, blockTimeout) assert.NoError(t, err) assert.NotNil(t, task) - assert.NoError(t, q.Complete(ctx, *client, workflow.QueueDefault, task.TaskID)) + assert.NoError(t, q.Complete(ctx, client, workflow.QueueDefault, task.TaskID)) // After completion, the same id can be enqueued again - assert.NoError(t, q.Enqueue(ctx, *client, workflow.QueueDefault, "t1", nil)) + assert.NoError(t, q.Enqueue(ctx, client, workflow.QueueDefault, "t1", nil)) }, }, { @@ -97,12 +91,12 @@ func Test_TaskQueue(t *testing.T) { q, err := newTaskQueue[foo]("prefix", taskType, "") assert.NoError(t, err) - assert.NoError(t, q.Enqueue(ctx, *client, workflow.QueueDefault, "t1", &foo{ + assert.NoError(t, q.Enqueue(ctx, client, workflow.QueueDefault, "t1", &foo{ Count: 1, Name: "bar", })) - task, err := q.Dequeue(ctx, *client, []workflow.Queue{workflow.QueueDefault}, lockTimeout, blockTimeout) + task, err := q.Dequeue(ctx, client, []workflow.Queue{workflow.QueueDefault}, lockTimeout, blockTimeout) assert.NoError(t, err) assert.NotNil(t, task) assert.Equal(t, "t1", task.ID) @@ -115,13 +109,13 @@ func Test_TaskQueue(t *testing.T) { f: func(t *testing.T, q *taskQueue[any]) { ctx := context.Background() - assert.NoError(t, q.Enqueue(ctx, *client, workflow.QueueDefault, "t1", nil)) + assert.NoError(t, q.Enqueue(ctx, client, workflow.QueueDefault, "t1", nil)) - q2, _ := newTaskQueue[any]("prefix", taskType, "") + q2, err := newTaskQueue[any]("prefix", taskType, "") assert.NoError(t, err) // Dequeue using second worker - task, err := q2.Dequeue(ctx, *client, []workflow.Queue{workflow.QueueDefault}, lockTimeout, blockTimeout) + task, err := q2.Dequeue(ctx, client, []workflow.Queue{workflow.QueueDefault}, lockTimeout, blockTimeout) assert.NoError(t, err) assert.NotNil(t, task) assert.Equal(t, "t1", task.ID) @@ -130,23 +124,24 @@ func Test_TaskQueue(t *testing.T) { { name: "Complete removes task", f: func(t *testing.T, q *taskQueue[any]) { - q2, _ := newTaskQueue[any]("prefix", taskType, "") + q2, err := newTaskQueue[any]("prefix", taskType, "") + assert.NoError(t, err) ctx := context.Background() - assert.NoError(t, q.Enqueue(ctx, *client, workflow.QueueDefault, "t1", nil)) + assert.NoError(t, q.Enqueue(ctx, client, workflow.QueueDefault, "t1", nil)) - task, err := q.Dequeue(ctx, *client, []workflow.Queue{workflow.QueueDefault}, lockTimeout, blockTimeout) + task, err := q.Dequeue(ctx, client, []workflow.Queue{workflow.QueueDefault}, lockTimeout, blockTimeout) assert.NoError(t, err) assert.NotNil(t, task) // Complete task using second worker - assert.NoError(t, q2.Complete(ctx, *client, workflow.QueueDefault, task.TaskID)) + assert.NoError(t, q2.Complete(ctx, client, workflow.QueueDefault, task.TaskID)) time.Sleep(time.Millisecond * 10) // Try to recover using second worker; should not find anything - task2, err := q2.Dequeue(ctx, *client, []workflow.Queue{workflow.QueueDefault}, lockTimeout, blockTimeout) + task2, err := q2.Dequeue(ctx, client, []workflow.Queue{workflow.QueueDefault}, lockTimeout, blockTimeout) assert.NoError(t, err) assert.Nil(t, task2) }, @@ -157,16 +152,17 @@ func Test_TaskQueue(t *testing.T) { type taskData struct { Count int `json:"count"` } - q, _ := newTaskQueue[taskData]("prefix", taskType, "") + q, err := newTaskQueue[taskData]("prefix", taskType, "") + assert.NoError(t, err) ctx := context.Background() - assert.NoError(t, q.Enqueue(ctx, *client, workflow.QueueDefault, "t1", &taskData{Count: 42})) + assert.NoError(t, q.Enqueue(ctx, client, workflow.QueueDefault, "t1", &taskData{Count: 42})) - q2, _ := newTaskQueue[taskData]("prefix", taskType, "") + q2, err := newTaskQueue[taskData]("prefix", taskType, "") assert.NoError(t, err) - task, err := q2.Dequeue(ctx, *client, []workflow.Queue{workflow.QueueDefault}, lockTimeout, blockTimeout) + task, err := q2.Dequeue(ctx, client, []workflow.Queue{workflow.QueueDefault}, lockTimeout, blockTimeout) assert.NoError(t, err) assert.NotNil(t, task) assert.Equal(t, "t1", task.ID) @@ -174,7 +170,7 @@ func Test_TaskQueue(t *testing.T) { time.Sleep(time.Millisecond * 10) // Assume q2 crashed, recover from other worker - recoveredTask, err := q.Dequeue(ctx, *client, []workflow.Queue{workflow.QueueDefault}, time.Millisecond*1, blockTimeout) + recoveredTask, err := q.Dequeue(ctx, client, []workflow.Queue{workflow.QueueDefault}, time.Millisecond*1, blockTimeout) assert.NoError(t, err) assert.NotNil(t, recoveredTask) assert.Equal(t, task, recoveredTask) @@ -185,23 +181,23 @@ func Test_TaskQueue(t *testing.T) { f: func(t *testing.T, q *taskQueue[any]) { ctx := context.Background() - assert.NoError(t, q.Enqueue(ctx, *client, workflow.QueueDefault, "t1", nil)) + assert.NoError(t, q.Enqueue(ctx, client, workflow.QueueDefault, "t1", nil)) // Create second worker (with different name) - q2, _ := newTaskQueue[any]("prefix", taskType, "") + q2, err := newTaskQueue[any]("prefix", taskType, "") assert.NoError(t, err) - task, err := q2.Dequeue(ctx, *client, []workflow.Queue{workflow.QueueDefault}, lockTimeout, blockTimeout) + task, err := q2.Dequeue(ctx, client, []workflow.Queue{workflow.QueueDefault}, lockTimeout, blockTimeout) assert.NoError(t, err) assert.NotNil(t, task) assert.Equal(t, "t1", task.ID) time.Sleep(time.Millisecond * 5) - assert.NoError(t, q2.Extend(ctx, *client, workflow.QueueDefault, task.TaskID)) + assert.NoError(t, q2.Extend(ctx, client, workflow.QueueDefault, task.TaskID)) // Use large lock timeout; should not recover - recoveredTask, err := q.Dequeue(ctx, *client, []workflow.Queue{workflow.QueueDefault}, time.Second*2, blockTimeout) + recoveredTask, err := q.Dequeue(ctx, client, []workflow.Queue{workflow.QueueDefault}, time.Second*2, blockTimeout) assert.NoError(t, err) assert.Nil(t, recoveredTask) }, @@ -211,15 +207,15 @@ func Test_TaskQueue(t *testing.T) { f: func(t *testing.T, q *taskQueue[any]) { ctx := context.Background() - assert.NoError(t, q.Enqueue(ctx, *client, workflow.QueueDefault, "t1", nil)) + assert.NoError(t, q.Enqueue(ctx, client, workflow.QueueDefault, "t1", nil)) - assert.NoError(t, q.Prepare(ctx, *client, []workflow.Queue{core.QueueSystem, workflow.QueueDefault})) + assert.NoError(t, q.Prepare(ctx, client, []workflow.Queue{core.QueueSystem, workflow.QueueDefault})) - task, err := q.Dequeue(ctx, *client, []workflow.Queue{core.QueueSystem}, lockTimeout, blockTimeout) + task, err := q.Dequeue(ctx, client, []workflow.Queue{core.QueueSystem}, lockTimeout, blockTimeout) assert.NoError(t, err) assert.Nil(t, task) - task, err = q.Dequeue(ctx, *client, []workflow.Queue{workflow.QueueDefault}, lockTimeout, blockTimeout) + task, err = q.Dequeue(ctx, client, []workflow.Queue{workflow.QueueDefault}, lockTimeout, blockTimeout) assert.NoError(t, err) assert.NotNil(t, task) }, @@ -230,12 +226,12 @@ func Test_TaskQueue(t *testing.T) { t.Run(tt.name, func(t *testing.T) { ctx := context.Background() // Best-effort cleanup between tests - _, _ = client.FlushDB(ctx) + client.Do(ctx, client.B().Flushdb().Build()) q, err := newTaskQueue[any]("prefix", taskType, "") assert.NoError(t, err) - assert.NoError(t, q.Prepare(ctx, *client, []workflow.Queue{workflow.QueueDefault})) + assert.NoError(t, q.Prepare(ctx, client, []workflow.Queue{workflow.QueueDefault})) tt.f(t, q) }) diff --git a/backend/valkey/signal.go b/backend/valkey/signal.go index aae1f230..d81b5787 100644 --- a/backend/valkey/signal.go +++ b/backend/valkey/signal.go @@ -7,7 +7,6 @@ import ( "github.com/cschleiden/go-workflows/backend" "github.com/cschleiden/go-workflows/backend/history" "github.com/cschleiden/go-workflows/workflow" - "github.com/valkey-io/valkey-glide/go/v2/options" ) func (vb *valkeyBackend) SignalWorkflow(ctx context.Context, instanceID string, event *history.Event) error { @@ -34,25 +33,19 @@ func (vb *valkeyBackend) SignalWorkflow(ctx context.Context, instanceID string, queue := workflow.Queue(instanceState.Queue) queueKeys := vb.workflowQueue.Keys(queue) - keys := []string{ + // Execute the Lua script + err = signalWorkflowScript.Exec(ctx, vb.client, []string{ vb.keys.payloadKey(instanceState.Instance), vb.keys.pendingEventsKey(instanceState.Instance), queueKeys.SetKey, queueKeys.StreamKey, - } - - args := []string{ + }, []string{ event.ID, eventData, payload, instanceSegment(instanceState.Instance), - } + }).Error() - // Execute the Lua script - _, err = vb.client.InvokeScriptWithOptions(ctx, signalWorkflowScript, options.ScriptOptions{ - Keys: keys, - Args: args, - }) if err != nil { return fmt.Errorf("signaling workflow: %w", err) } diff --git a/backend/valkey/stats.go b/backend/valkey/stats.go index 9246da40..27a1db69 100644 --- a/backend/valkey/stats.go +++ b/backend/valkey/stats.go @@ -11,7 +11,7 @@ func (vb *valkeyBackend) GetStats(ctx context.Context) (*backend.Stats, error) { s := &backend.Stats{} // get workflow instances - activeInstances, err := vb.client.SCard(ctx, vb.keys.instancesActive()) + activeInstances, err := vb.client.Do(ctx, vb.client.B().Scard().Key(vb.keys.instancesActive()).Build()).AsInt64() if err != nil { return nil, fmt.Errorf("getting active instances: %w", err) } diff --git a/backend/valkey/valkey.go b/backend/valkey/valkey.go index 9f1c3ac6..9eeefce3 100644 --- a/backend/valkey/valkey.go +++ b/backend/valkey/valkey.go @@ -11,29 +11,27 @@ import ( "github.com/cschleiden/go-workflows/backend/metrics" "github.com/cschleiden/go-workflows/core" "github.com/cschleiden/go-workflows/internal/metrickeys" - "github.com/valkey-io/valkey-glide/go/v2" - "github.com/valkey-io/valkey-glide/go/v2/options" + "github.com/valkey-io/valkey-go" "go.opentelemetry.io/otel/trace" ) var _ backend.Backend = (*valkeyBackend)(nil) //go:embed scripts -var luaScripts embed.FS +var Luas embed.FS var ( - createWorkflowInstanceScript options.Script - completeWorkflowTaskScript options.Script - completeActivityTaskScript options.Script - deleteInstanceScript options.Script - futureEventsScript options.Script - expireWorkflowInstanceScript options.Script - cancelWorkflowInstanceScript options.Script - signalWorkflowScript options.Script + createWorkflowInstanceScript *valkey.Lua + completeWorkflowTaskScript *valkey.Lua + completeActivityTaskScript *valkey.Lua + deleteInstanceScript *valkey.Lua + futureEventsScript *valkey.Lua + expireWorkflowInstanceScript *valkey.Lua + cancelWorkflowInstanceScript *valkey.Lua + signalWorkflowScript *valkey.Lua ) -func NewValkeyBackend(client glide.Client, opts ...BackendOption) (*valkeyBackend, error) { - // Default options +func NewValkeyBackend(client valkey.Client, opts ...BackendOption) (*valkeyBackend, error) { vopts := &Options{ Options: backend.ApplyOptions(), BlockTimeout: time.Second * 2, @@ -62,7 +60,7 @@ func NewValkeyBackend(client glide.Client, opts ...BackendOption) (*valkeyBacken } // Load all Lua scripts - scriptMapping := map[string]*options.Script{ + scriptMapping := map[string]**valkey.Lua{ "cancel_workflow_instance.lua": &cancelWorkflowInstanceScript, "complete_activity_task.lua": &completeActivityTaskScript, "complete_workflow_task.lua": &completeWorkflowTaskScript, @@ -80,21 +78,21 @@ func NewValkeyBackend(client glide.Client, opts ...BackendOption) (*valkeyBacken return vb, nil } -func loadScripts(scriptMapping map[string]*options.Script) error { +func loadScripts(scriptMapping map[string]**valkey.Lua) error { for scriptFile, scriptVar := range scriptMapping { - scriptContent, err := fs.ReadFile(luaScripts, "scripts/"+scriptFile) + scriptContent, err := fs.ReadFile(Luas, "scripts/"+scriptFile) if err != nil { return fmt.Errorf("reading Lua script %s: %w", scriptFile, err) } - *scriptVar = *options.NewScript(string(scriptContent)) + *scriptVar = valkey.NewLuaScript(string(scriptContent)) } return nil } type valkeyBackend struct { - client glide.Client + client valkey.Client options *Options keys *keys workflowQueue *taskQueue[workflowData] diff --git a/backend/valkey/workflow.go b/backend/valkey/workflow.go index 5293482f..7e9a18f9 100644 --- a/backend/valkey/workflow.go +++ b/backend/valkey/workflow.go @@ -14,7 +14,6 @@ import ( "github.com/cschleiden/go-workflows/internal/propagators" "github.com/cschleiden/go-workflows/internal/workflowerrors" "github.com/cschleiden/go-workflows/workflow" - "github.com/valkey-io/valkey-glide/go/v2/options" ) func (vb *valkeyBackend) PrepareWorkflowQueues(ctx context.Context, queues []workflow.Queue) error { @@ -42,20 +41,18 @@ func (vb *valkeyBackend) GetWorkflowTask(ctx context.Context, queues []workflow. } // Read all pending events for this instance - msgs, err := vb.client.XRange(ctx, vb.keys.pendingEventsKey(instanceState.Instance), "-", "+") + msgs, err := vb.client.Do(ctx, vb.client.B().Xrange().Key(vb.keys.pendingEventsKey(instanceState.Instance)).Start("-").End("+").Build()).AsXRange() if err != nil { return nil, fmt.Errorf("reading event stream: %w", err) } payloadKeys := make([]string, 0, len(msgs)) newEvents := make([]*history.Event, 0, len(msgs)) + lastMessageID := "" for _, msg := range msgs { - var eventStr string - for _, field := range msg.Fields { - if field.Field == "event" { - eventStr = field.Value - break - } + eventStr, ok := msg.FieldValues["event"] + if !ok || eventStr == "" { + continue } var event *history.Event @@ -65,17 +62,19 @@ func (vb *valkeyBackend) GetWorkflowTask(ctx context.Context, queues []workflow. payloadKeys = append(payloadKeys, event.ID) newEvents = append(newEvents, event) + lastMessageID = msg.ID } // Fetch event payloads if len(payloadKeys) > 0 { - res, err := vb.client.HMGet(ctx, vb.keys.payloadKey(instanceState.Instance), payloadKeys) + cmd := vb.client.B().Hmget().Key(vb.keys.payloadKey(instanceState.Instance)).Field(payloadKeys...) + res, err := vb.client.Do(ctx, cmd.Build()).AsStrSlice() if err != nil { return nil, fmt.Errorf("reading payloads: %w", err) } for i, event := range newEvents { - event.Attributes, err = history.DeserializeAttributes(event.Type, []byte(res[i].Value())) + event.Attributes, err = history.DeserializeAttributes(event.Type, []byte(res[i])) if err != nil { return nil, fmt.Errorf("deserializing attributes for event %v: %w", event.Type, err) } @@ -90,7 +89,7 @@ func (vb *valkeyBackend) GetWorkflowTask(ctx context.Context, queues []workflow. Metadata: instanceState.Metadata, LastSequenceID: instanceState.LastSequenceID, NewEvents: newEvents, - CustomData: msgs[len(msgs)-1].ID, + CustomData: lastMessageID, }, nil } @@ -284,10 +283,7 @@ func (vb *valkeyBackend) CompleteWorkflowTask( args = append(args, task.ID, vb.workflowQueue.groupName) // Run script - _, err := vb.client.InvokeScriptWithOptions(ctx, completeWorkflowTaskScript, options.ScriptOptions{ - Keys: keys, - Args: args, - }) + err := completeWorkflowTaskScript.Exec(ctx, vb.client, keys, args).Error() if err != nil { return fmt.Errorf("completing workflow task: %w", err) } diff --git a/go.mod b/go.mod index e541c2be..a7fc2a9b 100644 --- a/go.mod +++ b/go.mod @@ -13,7 +13,7 @@ require ( github.com/jellydator/ttlcache/v3 v3.0.0 github.com/redis/go-redis/v9 v9.0.2 github.com/stretchr/testify v1.10.0 - github.com/valkey-io/valkey-glide/go/v2 v2.1.1 + github.com/valkey-io/valkey-go v1.0.68 go.opentelemetry.io/otel v1.31.0 go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.31.0 go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.31.0 @@ -25,7 +25,6 @@ require ( require ( github.com/dustin/go-humanize v1.0.1 // indirect - github.com/google/go-cmp v0.7.0 // indirect github.com/grpc-ecosystem/grpc-gateway/v2 v2.22.0 // indirect github.com/jackc/pgpassfile v1.0.0 // indirect github.com/jackc/pgservicefile v0.0.0-20221227161230-091c0ba34f0a // indirect diff --git a/go.sum b/go.sum index 04201178..51fbfa4b 100644 --- a/go.sum +++ b/go.sum @@ -84,6 +84,8 @@ github.com/moby/term v0.5.0 h1:xt8Q1nalod/v7BqbG21f8mQPqH+xAaC9C3N3wfWbVP0= github.com/moby/term v0.5.0/go.mod h1:8FzsFHVUBGZdbDsJw/ot+X+d5HLUbvklYLJ9uGfcI3Y= github.com/morikuni/aec v1.0.0 h1:nP9CBfwrvYnBRgY6qfDQkygYDmYwOilePFkwzv4dU8A= github.com/morikuni/aec v1.0.0/go.mod h1:BbKIizmSmc5MMPqRYbxO4ZU0S0+P200+tUnFx7PXmsc= +github.com/onsi/gomega v1.36.2 h1:koNYke6TVk6ZmnyHrCXba/T/MoLBXFjeC1PtvYgw0A8= +github.com/onsi/gomega v1.36.2/go.mod h1:DdwyADRjrc825LhMEkD76cHR5+pUnjhUN8GlHlRPHzY= github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U= github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM= github.com/opencontainers/image-spec v1.0.2 h1:9yCKha/T5XdGtO0q9Q9a6T5NUCsTn/DrBg0D7ufOcFM= @@ -107,8 +109,8 @@ github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UV github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA= github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= -github.com/valkey-io/valkey-glide/go/v2 v2.1.1 h1:78eoWXIYLbse0ZpspKRMwbREj0+Tkoc/qkSR8H9iRsc= -github.com/valkey-io/valkey-glide/go/v2 v2.1.1/go.mod h1:LK5zmODJa5xnxZndarh1trntExb3GVGJXz4GwDCagho= +github.com/valkey-io/valkey-go v1.0.68 h1:bTbfonp49b41DqrF30q+y2JL3gcbjd2IiacFAtO4JBA= +github.com/valkey-io/valkey-go v1.0.68/go.mod h1:bHmwjIEOrGq/ubOJfh5uMRs7Xj6mV3mQ/ZXUbmqpjqY= go.opentelemetry.io/otel v1.31.0 h1:NsJcKPIW0D0H3NgzPDHmo0WW6SptzPdqg/L1zsIm2hY= go.opentelemetry.io/otel v1.31.0/go.mod h1:O0C14Yl9FgkjqcCZAsE053C13OaddMYr/hz6clDkEJE= go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.31.0 h1:K0XaT3DwHAcV4nKLzcQvwAgSyisUghWoY20I7huthMk= From f6696cde09af219d883b65a72e94f3aa98abed69 Mon Sep 17 00:00:00 2001 From: Derk Schooltink Date: Tue, 25 Nov 2025 15:40:16 +0100 Subject: [PATCH 07/23] pass equal amount of ids as keys in xreadgroup --- backend/valkey/queue.go | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/backend/valkey/queue.go b/backend/valkey/queue.go index 8bc4719f..070b22de 100644 --- a/backend/valkey/queue.go +++ b/backend/valkey/queue.go @@ -169,8 +169,13 @@ func (q *taskQueue[T]) Dequeue(ctx context.Context, client valkey.Client, queues streamKeys = append(streamKeys, keyInfo.StreamKey) } + ids := make([]string, len(streamKeys)) + for i := range ids { + ids[i] = ">" + } + // Try to dequeue from all given queues - cmd := client.B().Xreadgroup().Group(q.groupName, q.workerName).Streams().Key(streamKeys...).Id(">") + cmd := client.B().Xreadgroup().Group(q.groupName, q.workerName).Streams().Key(streamKeys...).Id(ids...) results, err := client.Do(ctx, cmd.Build()).AsXRead() if err != nil { // Check if error is due to no data available (nil response) From f6440ec4aecc001d727dba11a981755dc8d3604c Mon Sep 17 00:00:00 2001 From: Derk Schooltink Date: Tue, 25 Nov 2025 15:40:16 +0100 Subject: [PATCH 08/23] make keys cluster-safe (and keep compatibility with non-cluster-mode) --- backend/valkey/keys.go | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/backend/valkey/keys.go b/backend/valkey/keys.go index 30abeb9f..7469ec38 100644 --- a/backend/valkey/keys.go +++ b/backend/valkey/keys.go @@ -21,7 +21,7 @@ func newKeys(prefix string) *keys { // activeInstanceExecutionKey returns the key for the latest execution of the given instance func (k *keys) activeInstanceExecutionKey(instanceID string) string { - return fmt.Sprintf("%sactive-instance-execution:%v", k.prefix, instanceID) + return fmt.Sprintf("%sactive-instance-execution:{%v}", k.prefix, instanceID) } func instanceSegment(instance *core.WorkflowInstance) string { @@ -33,41 +33,41 @@ func (k *keys) instanceKey(instance *core.WorkflowInstance) string { } func (k *keys) instanceKeyFromSegment(segment string) string { - return fmt.Sprintf("%sinstance:%v", k.prefix, segment) + return fmt.Sprintf("%sinstance:{%v}", k.prefix, segment) } // instancesByCreation returns the key for the ZSET that contains all instances sorted by creation date. The score is the // creation time as a unix timestamp. Used for listing all workflow instances in the diagnostics UI. func (k *keys) instancesByCreation() string { - return fmt.Sprintf("%sinstances-by-creation", k.prefix) + return fmt.Sprintf("%sinstances-by-creation:{global}", k.prefix) } // instancesActive returns the key for the SET that contains all active instances. Used for reporting active workflow // instances in stats. func (k *keys) instancesActive() string { - return fmt.Sprintf("%sinstances-active", k.prefix) + return fmt.Sprintf("%sinstances-active:{global}", k.prefix) } func (k *keys) instancesExpiring() string { - return fmt.Sprintf("%sinstances-expiring", k.prefix) + return fmt.Sprintf("%sinstances-expiring:{global}", k.prefix) } func (k *keys) pendingEventsKey(instance *core.WorkflowInstance) string { - return fmt.Sprintf("%spending-events:%v", k.prefix, instanceSegment(instance)) + return fmt.Sprintf("%spending-events:{%v}", k.prefix, instanceSegment(instance)) } func (k *keys) historyKey(instance *core.WorkflowInstance) string { - return fmt.Sprintf("%shistory:%v", k.prefix, instanceSegment(instance)) + return fmt.Sprintf("%shistory:{%v}", k.prefix, instanceSegment(instance)) } func (k *keys) futureEventsKey() string { - return fmt.Sprintf("%sfuture-events", k.prefix) + return fmt.Sprintf("%sfuture-events:{global}", k.prefix) } func (k *keys) futureEventKey(instance *core.WorkflowInstance, scheduleEventID int64) string { - return fmt.Sprintf("%sfuture-event:%v:%v", k.prefix, instanceSegment(instance), scheduleEventID) + return fmt.Sprintf("%sfuture-event:{%v}:%v", k.prefix, instanceSegment(instance), scheduleEventID) } func (k *keys) payloadKey(instance *core.WorkflowInstance) string { - return fmt.Sprintf("%spayload:%v", k.prefix, instanceSegment(instance)) + return fmt.Sprintf("%spayload:{%v}", k.prefix, instanceSegment(instance)) } From 94aeb80064cdf7c1c496125e201e5b7f06488c5e Mon Sep 17 00:00:00 2001 From: Derk Schooltink Date: Tue, 25 Nov 2025 15:40:16 +0100 Subject: [PATCH 09/23] Revert "make keys cluster-safe (and keep compatibility with non-cluster-mode)" This reverts commit 534b23fc301fa0142768036e30407a4c5f8c188a. --- backend/valkey/keys.go | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/backend/valkey/keys.go b/backend/valkey/keys.go index 7469ec38..30abeb9f 100644 --- a/backend/valkey/keys.go +++ b/backend/valkey/keys.go @@ -21,7 +21,7 @@ func newKeys(prefix string) *keys { // activeInstanceExecutionKey returns the key for the latest execution of the given instance func (k *keys) activeInstanceExecutionKey(instanceID string) string { - return fmt.Sprintf("%sactive-instance-execution:{%v}", k.prefix, instanceID) + return fmt.Sprintf("%sactive-instance-execution:%v", k.prefix, instanceID) } func instanceSegment(instance *core.WorkflowInstance) string { @@ -33,41 +33,41 @@ func (k *keys) instanceKey(instance *core.WorkflowInstance) string { } func (k *keys) instanceKeyFromSegment(segment string) string { - return fmt.Sprintf("%sinstance:{%v}", k.prefix, segment) + return fmt.Sprintf("%sinstance:%v", k.prefix, segment) } // instancesByCreation returns the key for the ZSET that contains all instances sorted by creation date. The score is the // creation time as a unix timestamp. Used for listing all workflow instances in the diagnostics UI. func (k *keys) instancesByCreation() string { - return fmt.Sprintf("%sinstances-by-creation:{global}", k.prefix) + return fmt.Sprintf("%sinstances-by-creation", k.prefix) } // instancesActive returns the key for the SET that contains all active instances. Used for reporting active workflow // instances in stats. func (k *keys) instancesActive() string { - return fmt.Sprintf("%sinstances-active:{global}", k.prefix) + return fmt.Sprintf("%sinstances-active", k.prefix) } func (k *keys) instancesExpiring() string { - return fmt.Sprintf("%sinstances-expiring:{global}", k.prefix) + return fmt.Sprintf("%sinstances-expiring", k.prefix) } func (k *keys) pendingEventsKey(instance *core.WorkflowInstance) string { - return fmt.Sprintf("%spending-events:{%v}", k.prefix, instanceSegment(instance)) + return fmt.Sprintf("%spending-events:%v", k.prefix, instanceSegment(instance)) } func (k *keys) historyKey(instance *core.WorkflowInstance) string { - return fmt.Sprintf("%shistory:{%v}", k.prefix, instanceSegment(instance)) + return fmt.Sprintf("%shistory:%v", k.prefix, instanceSegment(instance)) } func (k *keys) futureEventsKey() string { - return fmt.Sprintf("%sfuture-events:{global}", k.prefix) + return fmt.Sprintf("%sfuture-events", k.prefix) } func (k *keys) futureEventKey(instance *core.WorkflowInstance, scheduleEventID int64) string { - return fmt.Sprintf("%sfuture-event:{%v}:%v", k.prefix, instanceSegment(instance), scheduleEventID) + return fmt.Sprintf("%sfuture-event:%v:%v", k.prefix, instanceSegment(instance), scheduleEventID) } func (k *keys) payloadKey(instance *core.WorkflowInstance) string { - return fmt.Sprintf("%spayload:{%v}", k.prefix, instanceSegment(instance)) + return fmt.Sprintf("%spayload:%v", k.prefix, instanceSegment(instance)) } From 6ad766078d8d45a9e19b07eb038d49619e4ee6c9 Mon Sep 17 00:00:00 2001 From: Derk Schooltink Date: Tue, 25 Nov 2025 15:40:16 +0100 Subject: [PATCH 10/23] dont return completing workflow when removing canceled timers --- backend/valkey/workflow.go | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/backend/valkey/workflow.go b/backend/valkey/workflow.go index 7e9a18f9..ff66d338 100644 --- a/backend/valkey/workflow.go +++ b/backend/valkey/workflow.go @@ -157,11 +157,8 @@ func (vb *valkeyBackend) CompleteWorkflowTask( // Remove canceled timers timersToCancel := make([]*history.Event, 0) for _, event := range executedEvents { - switch event.Type { - case history.EventType_TimerCanceled: + if event.Type == history.EventType_TimerCanceled { timersToCancel = append(timersToCancel, event) - default: - return fmt.Errorf("unexpected event type %v", event.Type) } } From 549d44b9a0fb7930d24afecac3726debc83cb93a Mon Sep 17 00:00:00 2001 From: Derk Schooltink Date: Tue, 25 Nov 2025 15:40:17 +0100 Subject: [PATCH 11/23] add valkey nil check to readInstance --- backend/valkey/instance.go | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/backend/valkey/instance.go b/backend/valkey/instance.go index c0846f9f..422809b0 100644 --- a/backend/valkey/instance.go +++ b/backend/valkey/instance.go @@ -198,13 +198,12 @@ type instanceState struct { func readInstance(ctx context.Context, client valkey.Client, instanceKey string) (*instanceState, error) { val, err := client.Do(ctx, client.B().Get().Key(instanceKey).Build()).ToString() if err != nil { + if valkey.IsValkeyNil(err) { + return nil, backend.ErrInstanceNotFound + } return nil, fmt.Errorf("reading instance: %w", err) } - if val == "" { - return nil, backend.ErrInstanceNotFound - } - var state instanceState if err := json.Unmarshal([]byte(val), &state); err != nil { return nil, fmt.Errorf("unmarshaling instance state: %w", err) From 611f099721eb60662be88a88de17c5cd19c5e35a Mon Sep 17 00:00:00 2001 From: Derk Schooltink Date: Tue, 25 Nov 2025 15:40:17 +0100 Subject: [PATCH 12/23] add block to xreadgroup in dequeing, and add some debug in the error when no instance is found in the workflow --- backend/valkey/queue.go | 2 +- backend/valkey/workflow.go | 18 +++++++++--------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/backend/valkey/queue.go b/backend/valkey/queue.go index 070b22de..eb4d4b2a 100644 --- a/backend/valkey/queue.go +++ b/backend/valkey/queue.go @@ -175,7 +175,7 @@ func (q *taskQueue[T]) Dequeue(ctx context.Context, client valkey.Client, queues } // Try to dequeue from all given queues - cmd := client.B().Xreadgroup().Group(q.groupName, q.workerName).Streams().Key(streamKeys...).Id(ids...) + cmd := client.B().Xreadgroup().Group(q.groupName, q.workerName).Block(timeout.Milliseconds()).Streams().Key(streamKeys...).Id(ids...) results, err := client.Do(ctx, cmd.Build()).AsXRead() if err != nil { // Check if error is due to no data available (nil response) diff --git a/backend/valkey/workflow.go b/backend/valkey/workflow.go index ff66d338..6008a84b 100644 --- a/backend/valkey/workflow.go +++ b/backend/valkey/workflow.go @@ -35,13 +35,13 @@ func (vb *valkeyBackend) GetWorkflowTask(ctx context.Context, queues []workflow. return nil, nil } - instanceState, err := readInstance(ctx, vb.client, vb.keys.instanceKeyFromSegment(instanceTask.ID)) + state, err := readInstance(ctx, vb.client, vb.keys.instanceKeyFromSegment(instanceTask.ID)) if err != nil { - return nil, fmt.Errorf("reading workflow instance: %w", err) + return nil, fmt.Errorf("reading workflow instance with ID %s: %w", instanceTask.ID, err) } // Read all pending events for this instance - msgs, err := vb.client.Do(ctx, vb.client.B().Xrange().Key(vb.keys.pendingEventsKey(instanceState.Instance)).Start("-").End("+").Build()).AsXRange() + msgs, err := vb.client.Do(ctx, vb.client.B().Xrange().Key(vb.keys.pendingEventsKey(state.Instance)).Start("-").End("+").Build()).AsXRange() if err != nil { return nil, fmt.Errorf("reading event stream: %w", err) } @@ -67,7 +67,7 @@ func (vb *valkeyBackend) GetWorkflowTask(ctx context.Context, queues []workflow. // Fetch event payloads if len(payloadKeys) > 0 { - cmd := vb.client.B().Hmget().Key(vb.keys.payloadKey(instanceState.Instance)).Field(payloadKeys...) + cmd := vb.client.B().Hmget().Key(vb.keys.payloadKey(state.Instance)).Field(payloadKeys...) res, err := vb.client.Do(ctx, cmd.Build()).AsStrSlice() if err != nil { return nil, fmt.Errorf("reading payloads: %w", err) @@ -83,11 +83,11 @@ func (vb *valkeyBackend) GetWorkflowTask(ctx context.Context, queues []workflow. return &backend.WorkflowTask{ ID: instanceTask.TaskID, - Queue: core.Queue(instanceState.Queue), - WorkflowInstance: instanceState.Instance, - WorkflowInstanceState: instanceState.State, - Metadata: instanceState.Metadata, - LastSequenceID: instanceState.LastSequenceID, + Queue: core.Queue(state.Queue), + WorkflowInstance: state.Instance, + WorkflowInstanceState: state.State, + Metadata: state.Metadata, + LastSequenceID: state.LastSequenceID, NewEvents: newEvents, CustomData: lastMessageID, }, nil From fb9cb3d8ff43590b6cf530f572842a9a0709ce07 Mon Sep 17 00:00:00 2001 From: Derk Schooltink Date: Tue, 25 Nov 2025 15:40:17 +0100 Subject: [PATCH 13/23] return internal errors in diag endpoints --- diag/diag.go | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/diag/diag.go b/diag/diag.go index e5f36bd8..cd296bd9 100644 --- a/diag/diag.go +++ b/diag/diag.go @@ -34,12 +34,14 @@ func NewServeMux(backend Backend) *http.ServeMux { stats, err := backend.GetStats(r.Context()) if err != nil { w.WriteHeader(http.StatusInternalServerError) + _, _ = w.Write([]byte(err.Error())) return } w.Header().Add("Content-Type", "application/json") if err := json.NewEncoder(w).Encode(stats); err != nil { w.WriteHeader(http.StatusInternalServerError) + _, _ = w.Write([]byte(err.Error())) return } @@ -72,12 +74,14 @@ func NewServeMux(backend Backend) *http.ServeMux { instances, err := backend.GetWorkflowInstances(r.Context(), afterInstanceID, afterExecutionID, count) if err != nil { w.WriteHeader(http.StatusInternalServerError) + _, _ = w.Write([]byte(err.Error())) return } w.Header().Add("Content-Type", "application/json") if err := json.NewEncoder(w).Encode(instances); err != nil { w.WriteHeader(http.StatusInternalServerError) + _, _ = w.Write([]byte(err.Error())) return } @@ -106,6 +110,7 @@ func NewServeMux(backend Backend) *http.ServeMux { history, err := backend.GetWorkflowInstanceHistory(r.Context(), instanceRef.Instance, nil) if err != nil { w.WriteHeader(http.StatusInternalServerError) + _, _ = w.Write([]byte(err.Error())) return } @@ -130,6 +135,7 @@ func NewServeMux(backend Backend) *http.ServeMux { w.Header().Add("Content-Type", "application/json") if err := json.NewEncoder(w).Encode(result); err != nil { w.WriteHeader(http.StatusInternalServerError) + _, _ = w.Write([]byte(err.Error())) return } @@ -160,6 +166,7 @@ func NewServeMux(backend Backend) *http.ServeMux { w.Header().Add("Content-Type", "application/json") if err := json.NewEncoder(w).Encode(tree); err != nil { w.WriteHeader(http.StatusInternalServerError) + _, _ = w.Write([]byte(err.Error())) return } From f6519b40983dc50b4009d009798073c85a53ee80 Mon Sep 17 00:00:00 2001 From: Derk Schooltink Date: Tue, 25 Nov 2025 15:40:17 +0100 Subject: [PATCH 14/23] modify GetWorkflowInstances to use BYSCORE limitting --- backend/valkey/diagnostics.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/backend/valkey/diagnostics.go b/backend/valkey/diagnostics.go index c9a1dc57..c69f046f 100644 --- a/backend/valkey/diagnostics.go +++ b/backend/valkey/diagnostics.go @@ -13,7 +13,7 @@ import ( var _ diag.Backend = (*valkeyBackend)(nil) func (vb *valkeyBackend) GetWorkflowInstances(ctx context.Context, afterInstanceID, afterExecutionID string, count int) ([]*diag.WorkflowInstanceRef, error) { - zrangeCmd := vb.client.B().Zrange().Key(vb.keys.instancesByCreation()).Min("0").Max("-1").Rev().Limit(0, int64(count)) + zrangeCmd := vb.client.B().Zrangebyscore().Key(vb.keys.instancesByCreation()).Min("0").Max("-1").Limit(0, int64(count)) if afterInstanceID != "" { afterSegmentID := instanceSegment(core.NewWorkflowInstance(afterInstanceID, afterExecutionID)) @@ -30,7 +30,7 @@ func (vb *valkeyBackend) GetWorkflowInstances(ctx context.Context, afterInstance return nil, nil } - zrangeCmd = vb.client.B().Zrange().Key(vb.keys.instancesByCreation()).Min("-inf").Max(fmt.Sprintf("(%f", scores)).Rev().Limit(0, int64(count)) + zrangeCmd = vb.client.B().Zrangebyscore().Key(vb.keys.instancesByCreation()).Min("-inf").Max(fmt.Sprintf("(%f", scores)).Limit(0, int64(count)) } instanceSegments, err := vb.client.Do(ctx, zrangeCmd.Build()).AsStrSlice() From bd6ead115eb35e73ed2bff16a97aa877ba7440ba Mon Sep 17 00:00:00 2001 From: Derk Schooltink Date: Tue, 25 Nov 2025 15:40:17 +0100 Subject: [PATCH 15/23] simplify dequeue --- backend/valkey/queue.go | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/backend/valkey/queue.go b/backend/valkey/queue.go index eb4d4b2a..892c5675 100644 --- a/backend/valkey/queue.go +++ b/backend/valkey/queue.go @@ -177,26 +177,20 @@ func (q *taskQueue[T]) Dequeue(ctx context.Context, client valkey.Client, queues // Try to dequeue from all given queues cmd := client.B().Xreadgroup().Group(q.groupName, q.workerName).Block(timeout.Milliseconds()).Streams().Key(streamKeys...).Id(ids...) results, err := client.Do(ctx, cmd.Build()).AsXRead() - if err != nil { - // Check if error is due to no data available (nil response) - if valkey.IsValkeyNil(err) { - return nil, nil - } - return nil, fmt.Errorf("error dequeueing task: %w", err) + if err != nil && !valkey.IsValkeyNil(err) { + return nil, fmt.Errorf("dequeueing task: %w", err) } - if len(results) == 0 { - return nil, nil + var msgs []valkey.XRangeEntry + for _, streamResult := range results { + msgs = append(msgs, streamResult...) } - // Get the first entry from the first stream - for _, streamResult := range results { - if len(streamResult) > 0 { - return msgToTaskItem[T](streamResult[0]) - } + if len(results) == 0 || len(msgs) == 0 || valkey.IsValkeyNil(err) { + return nil, nil } - return nil, nil + return msgToTaskItem[T](msgs[0]) } func (q *taskQueue[T]) Extend(ctx context.Context, client valkey.Client, queue workflow.Queue, taskID string) error { From e0586d74259e38441c2358f5ac97f561473085fa Mon Sep 17 00:00:00 2001 From: Derk Schooltink Date: Tue, 25 Nov 2025 15:40:17 +0100 Subject: [PATCH 16/23] reverse zrange scoring of getworkflow instances --- backend/valkey/diagnostics.go | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/backend/valkey/diagnostics.go b/backend/valkey/diagnostics.go index c69f046f..e7db4419 100644 --- a/backend/valkey/diagnostics.go +++ b/backend/valkey/diagnostics.go @@ -13,8 +13,7 @@ import ( var _ diag.Backend = (*valkeyBackend)(nil) func (vb *valkeyBackend) GetWorkflowInstances(ctx context.Context, afterInstanceID, afterExecutionID string, count int) ([]*diag.WorkflowInstanceRef, error) { - zrangeCmd := vb.client.B().Zrangebyscore().Key(vb.keys.instancesByCreation()).Min("0").Max("-1").Limit(0, int64(count)) - + zrangeCmd := vb.client.B().Zrange().Key(vb.keys.instancesByCreation()).Min("0").Max("-inf").Byscore().Rev().Limit(0, int64(count)) if afterInstanceID != "" { afterSegmentID := instanceSegment(core.NewWorkflowInstance(afterInstanceID, afterExecutionID)) scores, err := vb.client.Do(ctx, vb.client.B().Zscore().Key(vb.keys.instancesByCreation()).Member(afterSegmentID).Build()).AsFloat64() @@ -30,7 +29,7 @@ func (vb *valkeyBackend) GetWorkflowInstances(ctx context.Context, afterInstance return nil, nil } - zrangeCmd = vb.client.B().Zrangebyscore().Key(vb.keys.instancesByCreation()).Min("-inf").Max(fmt.Sprintf("(%f", scores)).Limit(0, int64(count)) + zrangeCmd = vb.client.B().Zrange().Key(vb.keys.instancesByCreation()).Min("0").Max(fmt.Sprintf("(%f", scores)).Byscore().Rev().Limit(0, int64(count)) } instanceSegments, err := vb.client.Do(ctx, zrangeCmd.Build()).AsStrSlice() From 167baa5c880c8ff1789857be4425442cc64c3ad2 Mon Sep 17 00:00:00 2001 From: Derk Schooltink Date: Tue, 25 Nov 2025 15:40:17 +0100 Subject: [PATCH 17/23] use zmscore, and also use proper min values for zrange --- backend/valkey/diagnostics.go | 6 +- backend/valkey/diagnostics_test.go | 117 +++++++++++++++++++++++++++++ backend/valkey/queue_test.go | 9 +-- 3 files changed, 121 insertions(+), 11 deletions(-) create mode 100644 backend/valkey/diagnostics_test.go diff --git a/backend/valkey/diagnostics.go b/backend/valkey/diagnostics.go index e7db4419..c561a976 100644 --- a/backend/valkey/diagnostics.go +++ b/backend/valkey/diagnostics.go @@ -13,10 +13,10 @@ import ( var _ diag.Backend = (*valkeyBackend)(nil) func (vb *valkeyBackend) GetWorkflowInstances(ctx context.Context, afterInstanceID, afterExecutionID string, count int) ([]*diag.WorkflowInstanceRef, error) { - zrangeCmd := vb.client.B().Zrange().Key(vb.keys.instancesByCreation()).Min("0").Max("-inf").Byscore().Rev().Limit(0, int64(count)) + zrangeCmd := vb.client.B().Zrange().Key(vb.keys.instancesByCreation()).Min("+inf").Max("-inf").Byscore().Rev().Limit(0, int64(count)) if afterInstanceID != "" { afterSegmentID := instanceSegment(core.NewWorkflowInstance(afterInstanceID, afterExecutionID)) - scores, err := vb.client.Do(ctx, vb.client.B().Zscore().Key(vb.keys.instancesByCreation()).Member(afterSegmentID).Build()).AsFloat64() + scores, err := vb.client.Do(ctx, vb.client.B().Zmscore().Key(vb.keys.instancesByCreation()).Member(afterSegmentID).Build()).AsFloat64() if err != nil { return nil, fmt.Errorf("getting instance score for %v: %w", afterSegmentID, err) } @@ -29,7 +29,7 @@ func (vb *valkeyBackend) GetWorkflowInstances(ctx context.Context, afterInstance return nil, nil } - zrangeCmd = vb.client.B().Zrange().Key(vb.keys.instancesByCreation()).Min("0").Max(fmt.Sprintf("(%f", scores)).Byscore().Rev().Limit(0, int64(count)) + zrangeCmd = vb.client.B().Zrange().Key(vb.keys.instancesByCreation()).Min("+inf").Max(fmt.Sprintf("(%f", scores)).Byscore().Rev().Limit(0, int64(count)) } instanceSegments, err := vb.client.Do(ctx, zrangeCmd.Build()).AsStrSlice() diff --git a/backend/valkey/diagnostics_test.go b/backend/valkey/diagnostics_test.go new file mode 100644 index 00000000..998bfba6 --- /dev/null +++ b/backend/valkey/diagnostics_test.go @@ -0,0 +1,117 @@ +package valkey + +import ( + "context" + "encoding/json" + "fmt" + "strings" + "testing" + "time" + + "github.com/cschleiden/go-workflows/backend" + "github.com/cschleiden/go-workflows/backend/history" + "github.com/cschleiden/go-workflows/backend/test" + "github.com/cschleiden/go-workflows/client" + "github.com/cschleiden/go-workflows/diag" + "github.com/stretchr/testify/require" + "github.com/valkey-io/valkey-go" +) + +func getClient() valkey.Client { + newClient, _ := valkey.NewClient(valkey.ClientOption{ + InitAddress: []string{"localhost:6379"}, + Password: "ValkeyPassw0rd", + SelectDB: 0, + }) + return newClient +} + +func getCreateBackend(client valkey.Client, additionalOptions ...BackendOption) func(options ...backend.BackendOption) test.TestBackend { + return func(options ...backend.BackendOption) test.TestBackend { + // Flush database + if err := client.Do(context.Background(), client.B().Flushdb().Build()).Error(); err != nil { + panic(err) + } + + r, err := client.Do(context.Background(), client.B().Keys().Pattern("*").Build()).AsStrSlice() + if err != nil { + panic(err) + } + + if len(r) > 0 { + panic("Keys should've been empty" + strings.Join(r, ", ")) + } + + redisOptions := []BackendOption{ + WithBlockTimeout(time.Millisecond * 10), + WithBackendOptions(options...), + } + + redisOptions = append(redisOptions, additionalOptions...) + + b, err := NewValkeyBackend(client, redisOptions...) + if err != nil { + panic(err) + } + + return b + } +} + +var _ test.TestBackend = (*valkeyBackend)(nil) + +// GetFutureEvents +func (vb *valkeyBackend) GetFutureEvents(ctx context.Context) ([]*history.Event, error) { + r, err := vb.client.Do(ctx, vb.client.B().Zrangebyscore().Key(vb.keys.futureEventsKey()).Min("-inf").Max("+inf").Build()).AsStrSlice() + if err != nil { + return nil, fmt.Errorf("getting future events: %w", err) + } + + events := make([]*history.Event, 0) + + for _, eventID := range r { + eventStr, err := vb.client.Do(ctx, vb.client.B().Hget().Key(eventID).Field("event").Build()).AsBytes() + if err != nil { + return nil, fmt.Errorf("getting event %v: %w", eventID, err) + } + + var event *history.Event + if err := json.Unmarshal(eventStr, &event); err != nil { + return nil, fmt.Errorf("unmarshaling event %v: %w", eventID, err) + } + + events = append(events, event) + } + + return events, nil +} + +func Test_Diag_GetWorkflowInstances(t *testing.T) { + if testing.Short() { + t.Skip() + } + + c := getClient() + t.Cleanup(func() { c.Close() }) + + vc := getCreateBackend(c)() + + bd := vc.(diag.Backend) + + ctx := context.Background() + instances, err := bd.GetWorkflowInstances(ctx, "", "", 5) + require.NoError(t, err) + require.Empty(t, instances) + + cl := client.New(bd) + + _, err = cl.CreateWorkflowInstance(ctx, client.WorkflowInstanceOptions{ + InstanceID: "ex1", + }, "some-workflow") + require.NoError(t, err) + + instances, err = bd.GetWorkflowInstances(ctx, "", "", 5) + require.NoError(t, err) + require.Len(t, instances, 1) + require.Equal(t, "ex1", instances[0].Instance.InstanceID) +} diff --git a/backend/valkey/queue_test.go b/backend/valkey/queue_test.go index 156c12a7..8f6c329d 100644 --- a/backend/valkey/queue_test.go +++ b/backend/valkey/queue_test.go @@ -8,7 +8,6 @@ import ( "github.com/cschleiden/go-workflows/core" "github.com/cschleiden/go-workflows/workflow" "github.com/stretchr/testify/assert" - "github.com/valkey-io/valkey-go" ) func Test_TaskQueue(t *testing.T) { @@ -20,13 +19,7 @@ func Test_TaskQueue(t *testing.T) { taskType := "taskType" - client, err := valkey.NewClient(valkey.ClientOption{ - InitAddress: []string{"localhost:6379"}, - Password: "ValkeyPassw0rd", - SelectDB: 0, - }) - assert.NoError(t, err) - t.Cleanup(func() { client.Close() }) + client := getClient() lockTimeout := time.Millisecond * 10 blockTimeout := time.Millisecond * 10 From f45557d8d2f4bde59a9f80a49041ba29319946a9 Mon Sep 17 00:00:00 2001 From: Derk Schooltink Date: Tue, 25 Nov 2025 15:40:17 +0100 Subject: [PATCH 18/23] use prepare lua script instead of sdk directly --- backend/valkey/queue.go | 32 ++++++++++++++++++++++++-------- backend/valkey/queue_test.go | 7 ++++++- 2 files changed, 30 insertions(+), 9 deletions(-) diff --git a/backend/valkey/queue.go b/backend/valkey/queue.go index 892c5675..658518e5 100644 --- a/backend/valkey/queue.go +++ b/backend/valkey/queue.go @@ -13,6 +13,14 @@ import ( "github.com/valkey-io/valkey-go" ) +var ( + prepareCmd *valkey.Lua + //enqueueCmd *valkey.Lua + //completeCmd *valkey.Lua + //recoverCmd *valkey.Lua + //sizeCmd *valkey.Lua +) + type taskQueue[T any] struct { keyPrefix string tasktype string @@ -70,19 +78,27 @@ func newTaskQueue[T any](keyPrefix, tasktype, workerName string) (*taskQueue[T], hashTag: hashTag, } + // Load all Lua scripts + cmdMapping := map[string]**valkey.Lua{ + "queue/prepare.lua": &prepareCmd, + } + + if err := loadScripts(cmdMapping); err != nil { + return nil, fmt.Errorf("loading Lua scripts: %w", err) + } + return tq, nil } func (q *taskQueue[T]) Prepare(ctx context.Context, client valkey.Client, queues []workflow.Queue) error { + var queueStreamKeys []string for _, queue := range queues { - streamKey := q.Keys(queue).StreamKey - err := client.Do(ctx, client.B().XgroupCreate().Key(streamKey).Group(q.groupName).Id("0").Mkstream().Build()).Error() - if err != nil { - // Group might already exist, which is fine, consider prepare successful - if !strings.Contains(err.Error(), "BUSYGROUP") { - return fmt.Errorf("preparing queue %s: %w", queue, err) - } - } + queueStreamKeys = append(queueStreamKeys, q.Keys(queue).StreamKey) + } + + err := prepareCmd.Exec(ctx, client, queueStreamKeys, []string{q.groupName}).Error() + if err != nil && !valkey.IsValkeyNil(err) { + return fmt.Errorf("preparing queues: %w", err) } return nil diff --git a/backend/valkey/queue_test.go b/backend/valkey/queue_test.go index 8f6c329d..5b396668 100644 --- a/backend/valkey/queue_test.go +++ b/backend/valkey/queue_test.go @@ -47,10 +47,15 @@ func Test_TaskQueue(t *testing.T) { ctx := context.Background() assert.NoError(t, q.Enqueue(ctx, client, workflow.QueueDefault, "t1", nil)) + assert.NoError(t, q.Enqueue(ctx, client, workflow.QueueDefault, "t2", nil)) + assert.NoError(t, q.Enqueue(ctx, client, "OtherQueue", "t3", nil)) s1, err := q.Size(ctx, client) assert.NoError(t, err) - assert.Equal(t, map[workflow.Queue]int64{workflow.QueueDefault: 1}, s1) + assert.Equal(t, map[workflow.Queue]int64{ + workflow.QueueDefault: 2, + "OtherQueue": 1, + }, s1) }, }, { From ac2060962a8512b5e1fd77a6e2ff2fcdb0079313 Mon Sep 17 00:00:00 2001 From: Derk Schooltink Date: Tue, 25 Nov 2025 15:40:17 +0100 Subject: [PATCH 19/23] remove hashtag (in favor of optional keyprefix hashing) --- backend/valkey/queue.go | 21 +++------------------ 1 file changed, 3 insertions(+), 18 deletions(-) diff --git a/backend/valkey/queue.go b/backend/valkey/queue.go index 658518e5..9cce1f2b 100644 --- a/backend/valkey/queue.go +++ b/backend/valkey/queue.go @@ -27,11 +27,6 @@ type taskQueue[T any] struct { groupName string workerName string queueSetKey string - // hashTag is a Valkey Cluster hash tag ensuring all keys used together - // (across different queues for the same task type) map to the same slot. - // This avoids CrossSlot errors when Valkey is running in clustered/serverless modes - // and XREADGROUP is called on multiple stream keys. - hashTag string } type TaskItem[T any] struct { @@ -60,22 +55,12 @@ func newTaskQueue[T any](keyPrefix, tasktype, workerName string) (*taskQueue[T], workerName = uuid.NewString() } - // Use a stable Valkey Cluster hash tag so that all keys for this task type - // hash to the same slot regardless of the specific queue name. Only the - // substring within {...} is used for hashing. - // Example generated keys: - // {task:}:task-stream: - // {task:}:task-set: - // {task:}::queues - hashTag := fmt.Sprintf("{task:%s}", tasktype) - tq := &taskQueue[T]{ keyPrefix: keyPrefix, tasktype: tasktype, groupName: "task-workers", workerName: workerName, - queueSetKey: fmt.Sprintf("%s%s:%s:queues", keyPrefix, hashTag, tasktype), - hashTag: hashTag, + queueSetKey: fmt.Sprintf("%s%s:queues", keyPrefix, tasktype), } // Load all Lua scripts @@ -106,8 +91,8 @@ func (q *taskQueue[T]) Prepare(ctx context.Context, client valkey.Client, queues func (q *taskQueue[T]) Keys(queue workflow.Queue) KeyInfo { return KeyInfo{ - StreamKey: fmt.Sprintf("%s%s:task-stream:%s", q.keyPrefix, q.hashTag, queue), - SetKey: fmt.Sprintf("%s%s:task-set:%s", q.keyPrefix, q.hashTag, queue), + StreamKey: fmt.Sprintf("%stask-stream:%s:%s", q.keyPrefix, queue, q.tasktype), + SetKey: fmt.Sprintf("%stask-set:%s:%s", q.keyPrefix, queue, q.tasktype), } } From 58b37bf471a2f400ef00ffa7730809b71b930321 Mon Sep 17 00:00:00 2001 From: Derk Schooltink Date: Tue, 25 Nov 2025 15:40:18 +0100 Subject: [PATCH 20/23] bring back lua scripts for queues --- backend/valkey/queue.go | 166 +++++++++++++++++----------------------- 1 file changed, 70 insertions(+), 96 deletions(-) diff --git a/backend/valkey/queue.go b/backend/valkey/queue.go index 9cce1f2b..2e6c45d8 100644 --- a/backend/valkey/queue.go +++ b/backend/valkey/queue.go @@ -14,11 +14,11 @@ import ( ) var ( - prepareCmd *valkey.Lua - //enqueueCmd *valkey.Lua - //completeCmd *valkey.Lua - //recoverCmd *valkey.Lua - //sizeCmd *valkey.Lua + prepareCmd *valkey.Lua + enqueueCmd *valkey.Lua + completeCmd *valkey.Lua + recoverCmd *valkey.Lua + sizeCmd *valkey.Lua ) type taskQueue[T any] struct { @@ -65,7 +65,11 @@ func newTaskQueue[T any](keyPrefix, tasktype, workerName string) (*taskQueue[T], // Load all Lua scripts cmdMapping := map[string]**valkey.Lua{ - "queue/prepare.lua": &prepareCmd, + "queue/prepare.lua": &prepareCmd, + "queue/size.lua": &sizeCmd, + "queue/recover.lua": &recoverCmd, + "queue/enqueue.lua": &enqueueCmd, + "queue/complete.lua": &completeCmd, } if err := loadScripts(cmdMapping); err != nil { @@ -97,24 +101,27 @@ func (q *taskQueue[T]) Keys(queue workflow.Queue) KeyInfo { } func (q *taskQueue[T]) Size(ctx context.Context, client valkey.Client) (map[workflow.Queue]int64, error) { - members, err := client.Do(ctx, client.B().Smembers().Key(q.queueSetKey).Build()).AsStrSlice() + sizeData, err := sizeCmd.Exec(ctx, client, []string{q.queueSetKey}, []string{}).ToArray() if err != nil { return nil, fmt.Errorf("getting queue size: %w", err) } res := map[workflow.Queue]int64{} - for _, queueSetKey := range members { - size, err := client.Do(ctx, client.B().Scard().Key(queueSetKey).Build()).AsInt64() + for i := 0; i < len(sizeData); i += 2 { + queueName, err := sizeData[i].ToString() if err != nil { - return nil, fmt.Errorf("getting queue size: %w", err) + return nil, fmt.Errorf("parsing queue name: %w", err) } - trimmed := strings.TrimPrefix(queueSetKey, q.keyPrefix) - lastIdx := strings.LastIndex(trimmed, ":") - if lastIdx == -1 || lastIdx == len(trimmed)-1 { - return nil, fmt.Errorf("unexpected set key format: %s", queueSetKey) + queueName = strings.TrimPrefix(queueName, q.keyPrefix) + queueName = strings.Split(queueName, ":")[1] // queue name is the third part of the key (0-indexed) + + queue := workflow.Queue(queueName) + size, err := sizeData[i+1].AsInt64() + if err != nil { + return nil, fmt.Errorf("parsing queue size: %w", err) } - queue := workflow.Queue(trimmed[lastIdx+1:]) + res[queue] = size } @@ -129,24 +136,8 @@ func (q *taskQueue[T]) Enqueue(ctx context.Context, client valkey.Client, queue keys := q.Keys(queue) - // Add to set to track uniqueness - err = client.Do(ctx, client.B().Sadd().Key(q.queueSetKey).Member(keys.SetKey).Build()).Error() - if err != nil { - return err - } - - // Add to set for this queue - added, err := client.Do(ctx, client.B().Sadd().Key(keys.SetKey).Member(id).Build()).AsInt64() - if err != nil { - return err - } - - // Only add to stream if it's a new task - if added > 0 { - err = client.Do(ctx, client.B().Xadd().Key(keys.StreamKey).Id("*").FieldValue().FieldValue("id", id).FieldValue("data", string(ds)).Build()).Error() - if err != nil { - return err - } + if err := enqueueCmd.Exec(ctx, client, []string{q.queueSetKey, keys.SetKey, keys.StreamKey}, []string{q.groupName, id, string(ds)}).Error(); err != nil { + return fmt.Errorf("enqueueing task: %w", err) } return nil @@ -209,84 +200,67 @@ func (q *taskQueue[T]) Extend(ctx context.Context, client valkey.Client, queue w } func (q *taskQueue[T]) Complete(ctx context.Context, client valkey.Client, queue workflow.Queue, taskID string) error { - keyInfo := q.Keys(queue) - - // Get the task to find the ID - msgs, err := client.Do(ctx, client.B().Xrange().Key(keyInfo.StreamKey).Start(taskID).End(taskID).Build()).AsXRange() - if err != nil { - // Check if error is due to no data available (nil response) - if valkey.IsValkeyNil(err) { - return nil - } + err := completeCmd.Exec(ctx, client, []string{ + q.Keys(queue).SetKey, + q.Keys(queue).StreamKey, + }, []string{taskID, q.groupName}).Error() + if err != nil && !valkey.IsValkeyNil(err) { return fmt.Errorf("completing task: %w", err) } - if len(msgs) == 0 { - return nil - } - - msg := msgs[0] - id, ok := msg.FieldValues["id"] - if !ok { - return fmt.Errorf("completing task: missing id field") - } + return nil +} - // Remove from set - err = client.Do(ctx, client.B().Srem().Key(keyInfo.SetKey).Member(id).Build()).Error() - if err != nil { - return fmt.Errorf("completing task: %w", err) +func (q *taskQueue[T]) recover(ctx context.Context, client valkey.Client, queues []workflow.Queue, idleTimeout time.Duration) (*TaskItem[T], error) { + var keys []string + for _, queue := range queues { + keys = append(keys, q.Keys(queue).StreamKey) } - // Acknowledge in consumer group - err = client.Do(ctx, client.B().Xack().Key(keyInfo.StreamKey).Group(q.groupName).Id(taskID).Build()).Error() + r, err := recoverCmd.Exec(ctx, client, keys, []string{q.groupName, q.workerName, strconv.FormatInt(idleTimeout.Milliseconds(), 10), "0"}).ToArray() if err != nil { - return fmt.Errorf("completing task: %w", err) - } + if valkey.IsValkeyNil(err) { + return nil, nil + } - // Delete from stream - err = client.Do(ctx, client.B().Xdel().Key(keyInfo.StreamKey).Id(taskID).Build()).Error() - if err != nil { - return fmt.Errorf("completing task: %w", err) + return nil, fmt.Errorf("recovering abandoned task: %w", err) } - return nil -} - -func (q *taskQueue[T]) recover(ctx context.Context, client valkey.Client, queues []workflow.Queue, idleTimeout time.Duration) (*TaskItem[T], error) { - for _, queue := range queues { - streamKey := q.Keys(queue).StreamKey - - // Try to recover abandoned tasks - cmd := client.B().Xautoclaim().Key(streamKey).Group(q.groupName).Consumer(q.workerName).MinIdleTime(strconv.FormatInt(idleTimeout.Milliseconds(), 10)).Start("0").Count(1) - msgs, err := client.Do(ctx, cmd.Build()).ToArray() + if len(r) > 1 { + msgs, err := r[1].ToArray() if err != nil { - // Check if error is due to no data available (nil response) - if valkey.IsValkeyNil(err) { - continue - } return nil, fmt.Errorf("recovering abandoned task: %w", err) } - - if len(msgs) >= 2 { - entries, _ := msgs[1].ToArray() - for _, entry := range entries { - arr, _ := entry.ToArray() - if len(arr) == 2 { - id, _ := arr[0].ToString() - fieldsArr, _ := arr[1].ToArray() - fieldValues := map[string]string{} - for i := 0; i+1 < len(fieldsArr); i += 2 { - key, _ := fieldsArr[i].ToString() - val, _ := fieldsArr[i+1].ToString() - fieldValues[key] = val - } - xEntry := valkey.XRangeEntry{ - ID: id, - FieldValues: fieldValues, - } - return msgToTaskItem[T](xEntry) + if len(msgs) > 0 && !msgs[0].IsNil() { + msgData, err := msgs[0].ToArray() + if err != nil { + return nil, fmt.Errorf("recovering abandoned task: %w", err) + } + id, err := msgData[0].ToString() + if err != nil { + return nil, fmt.Errorf("recovering abandoned task: %w", err) + } + rawValues, err := msgData[1].ToArray() + if err != nil { + return nil, fmt.Errorf("recovering abandoned task: %w", err) + } + values := make(map[string]string) + for i := 0; i < len(rawValues); i += 2 { + key, err := rawValues[i].ToString() + if err != nil { + return nil, fmt.Errorf("recovering abandoned task: %w", err) + } + value, err := rawValues[i+1].ToString() + if err != nil { + return nil, fmt.Errorf("recovering abandoned task: %w", err) } + values[key] = value } + + return msgToTaskItem[T](valkey.XRangeEntry{ + ID: id, + FieldValues: values, + }) } } From f4217bf03869071474086fd54663515cb9a1a391 Mon Sep 17 00:00:00 2001 From: Derk Schooltink Date: Tue, 25 Nov 2025 16:03:03 +0100 Subject: [PATCH 21/23] revert some minor changes to redis impl. that are unnecessary --- backend/redis/redis.go | 4 ++-- backend/valkey/activity.go | 1 - backend/valkey/valkey.go | 4 ++-- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/backend/redis/redis.go b/backend/redis/redis.go index dabda50f..379f4194 100644 --- a/backend/redis/redis.go +++ b/backend/redis/redis.go @@ -19,7 +19,7 @@ import ( var _ backend.Backend = (*redisBackend)(nil) //go:embed scripts -var Luas embed.FS +var luaScripts embed.FS var ( createWorkflowInstanceCmd *redis.Script @@ -85,7 +85,7 @@ func NewRedisBackend(client redis.UniversalClient, opts ...RedisBackendOption) ( func loadScripts(ctx context.Context, rdb redis.UniversalClient, cmdMapping map[string]**redis.Script) error { for scriptFile, cmd := range cmdMapping { - scriptContent, err := fs.ReadFile(Luas, "scripts/"+scriptFile) + scriptContent, err := fs.ReadFile(luaScripts, "scripts/"+scriptFile) if err != nil { return fmt.Errorf("reading Lua script %s: %w", scriptFile, err) } diff --git a/backend/valkey/activity.go b/backend/valkey/activity.go index 33a9c2b1..3e740f18 100644 --- a/backend/valkey/activity.go +++ b/backend/valkey/activity.go @@ -46,7 +46,6 @@ func (vb *valkeyBackend) CompleteActivityTask(ctx context.Context, task *backend return err } - // Marshal event data eventData, payload, err := marshalEvent(result) if err != nil { return err diff --git a/backend/valkey/valkey.go b/backend/valkey/valkey.go index 9eeefce3..e624c668 100644 --- a/backend/valkey/valkey.go +++ b/backend/valkey/valkey.go @@ -18,7 +18,7 @@ import ( var _ backend.Backend = (*valkeyBackend)(nil) //go:embed scripts -var Luas embed.FS +var luaScripts embed.FS var ( createWorkflowInstanceScript *valkey.Lua @@ -80,7 +80,7 @@ func NewValkeyBackend(client valkey.Client, opts ...BackendOption) (*valkeyBacke func loadScripts(scriptMapping map[string]**valkey.Lua) error { for scriptFile, scriptVar := range scriptMapping { - scriptContent, err := fs.ReadFile(Luas, "scripts/"+scriptFile) + scriptContent, err := fs.ReadFile(luaScripts, "scripts/"+scriptFile) if err != nil { return fmt.Errorf("reading Lua script %s: %w", scriptFile, err) } From f85e1960667ca85dbb0a62945f19135bee31b66b Mon Sep 17 00:00:00 2001 From: Derk Schooltink Date: Tue, 25 Nov 2025 16:09:18 +0100 Subject: [PATCH 22/23] avoid shadowing keys name --- backend/valkey/queue.go | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/backend/valkey/queue.go b/backend/valkey/queue.go index 2e6c45d8..9ec9e902 100644 --- a/backend/valkey/queue.go +++ b/backend/valkey/queue.go @@ -134,9 +134,8 @@ func (q *taskQueue[T]) Enqueue(ctx context.Context, client valkey.Client, queue return err } - keys := q.Keys(queue) - - if err := enqueueCmd.Exec(ctx, client, []string{q.queueSetKey, keys.SetKey, keys.StreamKey}, []string{q.groupName, id, string(ds)}).Error(); err != nil { + queueStreamInfo := q.Keys(queue) + if err := enqueueCmd.Exec(ctx, client, []string{q.queueSetKey, queueStreamInfo.SetKey, queueStreamInfo.StreamKey}, []string{q.groupName, id, string(ds)}).Error(); err != nil { return fmt.Errorf("enqueueing task: %w", err) } From 9bcabf1dd958c14b3dd49109a17f462e47876935 Mon Sep 17 00:00:00 2001 From: Derk Schooltink Date: Thu, 18 Dec 2025 09:50:30 +0100 Subject: [PATCH 23/23] treat scores as float slice not a single float --- backend/valkey/diagnostics.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/backend/valkey/diagnostics.go b/backend/valkey/diagnostics.go index c561a976..2a27e845 100644 --- a/backend/valkey/diagnostics.go +++ b/backend/valkey/diagnostics.go @@ -16,12 +16,12 @@ func (vb *valkeyBackend) GetWorkflowInstances(ctx context.Context, afterInstance zrangeCmd := vb.client.B().Zrange().Key(vb.keys.instancesByCreation()).Min("+inf").Max("-inf").Byscore().Rev().Limit(0, int64(count)) if afterInstanceID != "" { afterSegmentID := instanceSegment(core.NewWorkflowInstance(afterInstanceID, afterExecutionID)) - scores, err := vb.client.Do(ctx, vb.client.B().Zmscore().Key(vb.keys.instancesByCreation()).Member(afterSegmentID).Build()).AsFloat64() + scores, err := vb.client.Do(ctx, vb.client.B().Zmscore().Key(vb.keys.instancesByCreation()).Member(afterSegmentID).Build()).AsFloatSlice() if err != nil { return nil, fmt.Errorf("getting instance score for %v: %w", afterSegmentID, err) } - if scores == 0 { + if len(scores) == 0 || scores[0] == 0 { vb.Options().Logger.Error("could not find instance %v", log.NamespaceKey+".valkey.afterInstanceID", afterInstanceID, log.NamespaceKey+".valkey.afterExecutionID", afterExecutionID, @@ -29,7 +29,7 @@ func (vb *valkeyBackend) GetWorkflowInstances(ctx context.Context, afterInstance return nil, nil } - zrangeCmd = vb.client.B().Zrange().Key(vb.keys.instancesByCreation()).Min("+inf").Max(fmt.Sprintf("(%f", scores)).Byscore().Rev().Limit(0, int64(count)) + zrangeCmd = vb.client.B().Zrange().Key(vb.keys.instancesByCreation()).Min("+inf").Max(fmt.Sprintf("(%f", scores[0])).Byscore().Rev().Limit(0, int64(count)) } instanceSegments, err := vb.client.Do(ctx, zrangeCmd.Build()).AsStrSlice()