From 44ece7c62ba26777bcb339c993b091a658734553 Mon Sep 17 00:00:00 2001 From: Jemiah Westerman Date: Mon, 10 Nov 2025 15:06:44 -0800 Subject: [PATCH 1/6] Add schema support Adds support for per-entity-type schemas. Schemas are definied in configuration and applied as MongoDB data validations and indexes. The /api/v1/schemas/ endpoint exposes schemas. --- api/api_gomux.go | 42 +++ api/get_schemas_test.go | 134 ++++++++ config/config.go | 3 + go.mod | 1 + go.sum | 2 + schema/README.md | 146 +++++++++ schema/config.go | 86 +++++ schema/mongo_schema.go | 401 +++++++++++++++++++++++ schema/mongo_schema_test.go | 622 ++++++++++++++++++++++++++++++++++++ server/server.go | 31 ++ 10 files changed, 1468 insertions(+) create mode 100644 api/get_schemas_test.go create mode 100644 schema/README.md create mode 100644 schema/config.go create mode 100644 schema/mongo_schema.go create mode 100644 schema/mongo_schema_test.go diff --git a/api/api_gomux.go b/api/api_gomux.go index 584b377..948af15 100644 --- a/api/api_gomux.go +++ b/api/api_gomux.go @@ -28,6 +28,7 @@ import ( "github.com/square/etre/entity" "github.com/square/etre/metrics" "github.com/square/etre/query" + "github.com/square/etre/schema" ) func init() { @@ -65,6 +66,7 @@ type API struct { queryProfSampleRate int queryProfReportThreshold time.Duration srv *http.Server + schemas schema.Config } // NewAPI godoc @@ -94,6 +96,7 @@ func NewAPI(appCtx app.Context) *API { queryLatencySLA: queryLatencySLA, queryProfSampleRate: int(appCtx.Config.Metrics.QueryProfileSampleRate * 100), queryProfReportThreshold: queryProfReportThreshold, + schemas: appCtx.Config.Schemas, } mux := http.NewServeMux() @@ -120,6 +123,12 @@ func NewAPI(appCtx app.Context) *API { mux.Handle("GET "+etre.API_ROOT+"/entity/{type}/{id}/labels", api.requestWrapper(api.id(http.HandlerFunc(api.getLabelsHandler)))) mux.Handle("DELETE "+etre.API_ROOT+"/entity/{type}/{id}/labels/{label}", api.requestWrapper(api.id(http.HandlerFunc(api.deleteLabelHandler)))) + // ///////////////////////////////////////////////////////////////////// + // Schemas + // ///////////////////////////////////////////////////////////////////// + mux.HandleFunc("GET "+etre.API_ROOT+"/schemas/{type}", api.getSchemasHandler) + mux.HandleFunc("GET "+etre.API_ROOT+"/schemas", api.getSchemasHandler) + // ///////////////////////////////////////////////////////////////////// // Metrics and status // ///////////////////////////////////////////////////////////////////// @@ -1173,6 +1182,39 @@ func (api *API) changesHandler(w http.ResponseWriter, r *http.Request) { } } +// getSchemasHandler godoc +// @Summary Get entity schemas +// @Description Return the schema for one entity of the given :type +// @ID getEntityHandler +// @Produce json +// @Param type path string true "Entity type" +// @Success 200 {object} schema.Config "OK" +// @Failure 400,404 {object} etre.Error +// @Router /schema/:type [get] +func (api *API) getSchemasHandler(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + // Get the schemas config for all schemas + cfg := api.schemas + + // If entity type is provided, make sure it is valid + entityType := r.PathValue("type") + if entityType != "" { + if err := api.validate.EntityType(entityType); err != nil { + log.Printf("Invalid entity type: '%s': request=%+v", entityType, r) + w.WriteHeader(http.StatusBadRequest) + json.NewEncoder(w).Encode(err) // validation error will encode nicely + return + } + // Make a config with only the requested entity type schema. + cfg.Entities = map[string]schema.EntitySchema{ + entityType: api.schemas.Entities[entityType], + } + } + + // Return the schema(s) + json.NewEncoder(w).Encode(cfg) +} + // Return error on read. Writes always return an etre.WriteResult by calling WriteResult. func (api *API) readError(rc *req, w http.ResponseWriter, err error) { api.systemMetrics.Inc(metrics.Error, 1) diff --git a/api/get_schemas_test.go b/api/get_schemas_test.go new file mode 100644 index 0000000..be72dd5 --- /dev/null +++ b/api/get_schemas_test.go @@ -0,0 +1,134 @@ +package api_test + +import ( + "net/http" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/square/etre" + "github.com/square/etre/schema" + "github.com/square/etre/test" + "github.com/square/etre/test/mock" +) + +func TestGetSchemas(t *testing.T) { + tests := []struct { + name string + entityType string + config schema.Config + expect schema.Config + }{ + { + name: "No schemas configured", + entityType: "nodes", + expect: schema.Config{Entities: map[string]schema.EntitySchema{"nodes": {}}}, // the server will return an empty schema for known types + }, + { + name: "No schemas configured - No type param", + }, + { + name: "Schema configured - type param present", + entityType: "nodes", + config: schema.Config{ + Entities: map[string]schema.EntitySchema{ + "nodes": { + Schema: &schema.Schema{ + Fields: []schema.Field{ + {Name: "hostname", Type: "string", Required: true}, + {Name: "status", Type: "string", Required: false}, + }, + AdditionalProperties: true, + ValidationLevel: "strict", + }, + }, + }, + }, + expect: schema.Config{ + Entities: map[string]schema.EntitySchema{ + "nodes": { + Schema: &schema.Schema{ + Fields: []schema.Field{ + {Name: "hostname", Type: "string", Required: true}, + {Name: "status", Type: "string", Required: false}, + }, + AdditionalProperties: true, + ValidationLevel: "strict", + }, + }, + }, + }, + }, + { + name: "Schema configured - no type param", + config: schema.Config{ + Entities: map[string]schema.EntitySchema{ + "nodes": { + Schema: &schema.Schema{ + Fields: []schema.Field{ + {Name: "hostname", Type: "string", Required: true}, + }, + AdditionalProperties: false, + }, + }, + "racks": { + Schema: &schema.Schema{ + Fields: []schema.Field{ + {Name: "rack_id", Type: "string", Required: true}, + {Name: "datacenter", Type: "string", Required: true}, + }, + ValidationLevel: "moderate", + }, + }, + }, + }, + expect: schema.Config{ + Entities: map[string]schema.EntitySchema{ + "nodes": { + Schema: &schema.Schema{ + Fields: []schema.Field{ + {Name: "hostname", Type: "string", Required: true}, + }, + AdditionalProperties: false, + }, + }, + "racks": { + Schema: &schema.Schema{ + Fields: []schema.Field{ + {Name: "rack_id", Type: "string", Required: true}, + {Name: "datacenter", Type: "string", Required: true}, + }, + ValidationLevel: "moderate", + }, + }, + }, + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // Set up the server + config := defaultConfig + config.Schemas = tt.config + server := setup(t, config, mock.EntityStore{}) + defer server.ts.Close() + + // Set up the request URL + etreurl := server.url + etre.API_ROOT + "/schemas" + if tt.entityType != "" { + etreurl += "/" + tt.entityType + } + + // Make the HTTP call + var gotSchemas schema.Config + statusCode, err := test.MakeHTTPRequest("GET", etreurl, nil, &gotSchemas) + require.NoError(t, err) + assert.Equal(t, http.StatusOK, statusCode, "response status = %d, expected %d, url %s", statusCode, http.StatusOK, etreurl) + + // Make sure we got the expected schemas + assert.Equal(t, tt.expect, gotSchemas) + }) + } +} diff --git a/config/config.go b/config/config.go index 1951582..f08d077 100644 --- a/config/config.go +++ b/config/config.go @@ -9,6 +9,8 @@ import ( "strings" "gopkg.in/yaml.v2" + + "github.com/square/etre/schema" ) const ( @@ -120,6 +122,7 @@ type Config struct { CDC CDCConfig `yaml:"cdc"` Security SecurityConfig `yaml:"security"` Metrics MetricsConfig `yaml:"metrics"` + Schemas schema.Config `yaml:"schemas"` } func Redact(c Config) Config { diff --git a/go.mod b/go.mod index 291c773..cf0f0e7 100644 --- a/go.mod +++ b/go.mod @@ -10,6 +10,7 @@ require ( github.com/go-test/deep v1.1.1 github.com/golang/groupcache v0.0.0-20241129210726-2c02b8208cf8 github.com/gorilla/websocket v1.5.3 + github.com/pkg/errors v0.9.1 github.com/stretchr/testify v1.9.0 github.com/swaggo/http-swagger v1.3.4 github.com/swaggo/swag v1.16.4 diff --git a/go.sum b/go.sum index 3570107..07ad522 100644 --- a/go.sum +++ b/go.sum @@ -36,6 +36,8 @@ github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0= github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc= +github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= +github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/rogpeppe/go-internal v1.11.0 h1:cWPaGQEPrBb5/AsnsZesgZZ9yb1OQ+GOISoDNXVBh4M= diff --git a/schema/README.md b/schema/README.md new file mode 100644 index 0000000..014306f --- /dev/null +++ b/schema/README.md @@ -0,0 +1,146 @@ +# Overview +The schema validation framework for Etre uses a configuration language to define validations such as JSON schema declarations and indexes for entity types. Initially implemented in `block-etre`, it can transition to OSS Etre later. + +For full context of how we arrived at this approach, see the [Schema Validation SPADE](https://docs.google.com/document/d/19Tevi7lVJe43EAj2F9LFfN-PYkCtauwoFtcva2ynYT8/edit?tab=t.wl5rum3hp9oe). +# How It Works +## Basic Structure +### Syntax +**Syntax:** YAML format with `validations` at the top level. + +**Entities:** Define entity types under `entities` and use `schema` for basic JSON schema validation. +### Basic (JSON) Schema Components +#### Fields +`fields` contains the list of object fields that require some level of schema validation +- `name` (required): Field name. +- `type`: string, int, enum, object, bool. +- `required`: Field necessity for operations. +- `pattern`: Regex for field value validation. +- `enum`: Value list for fields. +- `case`: Control case enforcement. Overrides global configuration setting for `case`. +- `dependents`: List of fields that depend on this field. If this field is present, all dependent fields must also be present. + +#### Indexes +**Naming Conventions**: Although naming is managed automatically by block-etre, it follows existing conventions like `{{ "SL" or "IL" }}_{{ keys }}` to distinguish between standard and unique indexes. + +**Directives**: +- `keys` (required): Fields included in the index. +- `unique`: Boolean indicating uniqueness of the index. +- `direction`: Order of index keys; use 1 for ascending and -1 for descending. If not specified, defaults to ascending. + +#### Additional Properties +**Additional Properties**: This allows for flexible schema definitions where fields can be added dynamically without strict validation. It is useful for cases where the schema may evolve or when dealing with semi-structured data. + +#### Global Configuration +**Schema**: Specify global case in `validations` -> `config` -> `schema`. +### Example +```yaml +validations: + entities: + elasticache: + schema: + fields: + - name: app + type: string + required: true + - name: aws_account_id + type: string + required: true + - name: replication_group_arn + type: string + required: true + pattern: "^arn:aws:elasticache:[a-z]{2}(-[a-z0-9]{1,15}){2}:[0-9]{12}:replicationgroup:[a-zA-Z0-9-]+$" + - name: node_arn + required: true + type: string + pattern: "^arn:aws:elasticache:[a-z]{2}(-[a-z0-9]{1,15}){2}:[0-9]{12}:cluster:[a-zA-Z0-9-]+$" + - name: engine_version + type: string + - name: global_datastore + type: bool + required: true + - name: region + type: string + enum: ["us-east-1", "us-west-2"] + required: true + - name: replication_group_description + type: string + case: # This overrides the default case for this field. + strict: false + additional_properties: true + indexes: + - keys: [app] + - keys: [replication_group_arn] + - keys: [node_id] + unique: true + - keys: [app, env, region] + config: + schema: + case: + strict: true + type: lower +``` +# Custom Validations +## Extending The Schema +The `schema` validation type is built on DocumentDB’s JSON schema and indexing features. To extend it, modify the schema's in-memory struct and update the validator logic in `block-etre` +## Creating New Validations +To create custom validations, enhance the in-memory representation and adjust the YAML configuration. Implementation of the required validation logic will likely need to be done in OSS Etre, potentially as a `Plugin` or similar interface. + +### Go Code By Example +Here’s how you might represent the in-memory structure: + +1. **Modify the Structs**: + +```go +type Config struct { + Entities map[string]EntityValidations `yaml:"entities"` + // Global Configurations + Global struct { + SchemaValidationConfig struct { + Case Case `yaml:"case"` + } `yaml:"schema"` + } `yaml:"config"` +} + +type EntityValidations struct { + Schema *Schema `yaml:"schema,omitempty"` + + // This is an example of how to add additional validation types. + ApAdditionalProperties *ApAdditionalProperties `yaml:"ap_additional_properties,omitempty"` +} +… +type ApAdditionalProperties struct { + // Auto correct is a flag to indicate if the field should be auto-corrected. + AutoCorrect map[string]AutoCorrect `yaml:"auto_correct"` +} + +type AutoCorrect struct { + ... +} +``` + +2. **YAML Configuration**: + +Define how the validation appears in your YAML file: +```yaml +validations: + entities: + elasticache: + schema: + … + ap_additional_properties: + auto_correct: + provision_environment: + fill_null: helm_if_beta + config: + … +``` +## Performance Implications +Schema validation impacts write operations in terms of resource consumption and latency, but is optimized within MongoDB. Benchmark new validations to ensure they meet performance criteria.. +# Error Handling +Currently, InsertEntities and UpdateEntities in OSS Etre handle each entity separately, stopping on the first error encountered. Until batch processing is improved, handle discrete error scenarios within your application logic +# Adding and Deleting Indexes +Indexes are automatically managed based on the YAML configuration when the application starts. You can safely add numerous indexes for new entity types. However, exercise caution with existing entity types that have large data volumes, as DocumentDB limits the number of concurrent index build processes per collection to one. Both creating and deleting indexes initiate these processes. Therefore, avoid deleting a large index and creating a new one simultaneously in the same update. + +In DocumentDB v5.0, all index builds are performed in the background, meaning they won't block other operations on the collection. Despite this, it's advisable to avoid index modifications on large collections during peak usage times to minimize performance impacts. +## Unique Index Considerations +Since all index creations occur in the background, duplicate key violations are detected asynchronously. This means that if you insert a document violating a unique index, the index creation request will appear to succeed initially, but the index build will eventually fail. Therefore, it's crucial to remove any duplicate documents before creating a unique index. diff --git a/schema/config.go b/schema/config.go new file mode 100644 index 0000000..e91dfc3 --- /dev/null +++ b/schema/config.go @@ -0,0 +1,86 @@ +package schema + +import "fmt" + +// Configuration structures are really higher level constructs that are meant to be decoupled from +// underlying MongoDB structures. This can be moved to a package separate from the MongoDB DDL processing +// if needed in the future e.g. if we want to support other databases especially in OSS etre. + +// Config represents the schema configurations for entities. +type Config struct { + // A map of existing entities to their validation configurations. + Entities map[string]EntitySchema `yaml:"entities"` + // Global configuration for validation implementations that are used. + // This is for any validation that is not specific to an entity. + Global Global `yaml:"config"` +} + +// EntitySchema represents the schema for a specific entity. +type EntitySchema struct { + Schema *Schema `yaml:"schema,omitempty"` +} + +// Schema represents the basic schema for an Entity. +// This includes JSON schema validation for entitie fields as well as database index definitions. +type Schema struct { + Fields []Field `yaml:"fields"` + AdditionalProperties bool `yaml:"additional_properties"` + Indexes []Index `yaml:"indexes"` + ValidationLevel string `yaml:"validation_level"` +} + +// Field represents a single field in the schema. +// Only the name is required. +type Field struct { + // Name is the name of the field in the schema. + Name string `yaml:"name"` + // Type is the type of the field. This can be string, int, bool, object, or enum. + Type string `yaml:"type"` + // Required indicates if the field is required in the schema. + Required bool `yaml:"required"` + // Pattern is a regex pattern that the field must match. This overrides any case rules. + Pattern string `yaml:"pattern"` + // Case is the case rules for the field. This can be strict or loose. Only case type + // "lower" is supported right now. This overrides global case rules. + Case *Case `yaml:"case,omitempty"` + // Enum is a list of valid values for the field. Only string is supported right now. + Enum []string `yaml:"enum,omitempty"` + // Dependents is a list of field names that must also be present if this field is present. + Dependents []string `yaml:"dependents,omitempty"` + // Description is a human-readable description of the field. + Description string `yaml:"description,omitempty"` +} + +// Case represents the case rules for a field. +type Case struct { + // Strict indicates if the case rules are "strict" or "loose". + Strict bool `yaml:"strict"` + // Type is the type of case. Only "lower" is supported right now. + Type string `yaml:"type"` +} + +// Index represents an index definition for a field or fields in the schema. +type Index struct { + // Keys is a list of field names to be indexed. + Keys []string `yaml:"keys"` + // Unique indicates if the index is unique. + Unique bool `yaml:"unique"` + // Direction contains information the sort order of the stored index for each given key. + // 1 for ascending, -1 for descending. If not specified, defaults to ascending. If set, + // the number of keys and directions must match. + Direction []int `yaml:"direction,omitempty"` + // Sparse indicates if the index is a sparse index. + Sparse bool `yaml:"sparse,omitempty"` +} + +func (i Index) String() string { + return fmt.Sprintf("Index{Keys: %v, Unique: %v, Direction: %v}", i.Keys, i.Unique, i.Direction) +} + +// Global represents the global configuration for validation implementations that are used. Each +// validation implementation implements its own set of global configurations. +type Global struct { + SchemaValidationConfig struct { + Case Case `yaml:"case"` + } `yaml:"schema"` +} diff --git a/schema/mongo_schema.go b/schema/mongo_schema.go new file mode 100644 index 0000000..be4467f --- /dev/null +++ b/schema/mongo_schema.go @@ -0,0 +1,401 @@ +package schema + +import ( + "context" + "fmt" + "log" + "strconv" + "strings" + + "github.com/pkg/errors" + "go.mongodb.org/mongo-driver/v2/bson" + "go.mongodb.org/mongo-driver/v2/mongo" + "go.mongodb.org/mongo-driver/v2/mongo/options" +) + +const ( + logDebugLevel = 10 + + // Validation level of "moderate" allows for existing invalid documents to be + // bypassed during updates. This should allow for more flexible schema migration + // rollouts compared to "strict". + // See: https://docs.aws.amazon.com/documentdb/latest/developerguide/json-schema-validation.html + defaultJSONSchemaValidationLevel = "moderate" + defaultIndexDirection = 1 + + regexLowerCase = `^[a-z0-9\W_]+$` + regexRFC3339 = `^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(\.\d+)?([+-]\d{2}:\d{2}|Z)$` + regexInt64 = `^(-?(0|[1-9]\d{0,18})|922337203685477580[0-7]|-9223372036854775808)$` +) + +var ( + errNoKeysForIndex = errors.New("no keys defined for index") + errTooManyKeysForIndex = errors.New("too many keys defined for index; max is 30") + errKeysAndDirectionsDoNotMatch = errors.New("number of keys and directions do not match for index") + errInvalidIndexDirection = errors.New("invalid direction for key(s) in index; must be 1 or -1") + errIndexSparseAndUnique = errors.New("index cannot be both sparse and unique") + errInvalidFieldType = errors.New("unsupported field type; only string, int, bool, object are supported") + errEnumNotString = errors.New("enums are only supported for string types") + errFieldNameEmpty = errors.New("field name cannot be empty") +) + +// CreateOrUpdateMongoSchema creates or updates the MongoDB schema for the given entity. If the schema is nil or has +// empty fields, it removes the JSON schema validation. If the schema is not nil, it ensures that the indexes in the +// schem exists, and any indexes that are not in the schema are removed. Entity Collection creation is handled by the +// index creation process. We assume that any reasonaby designed schema should not solely rely on full collection scans. +func CreateOrUpdateMongoSchema(ctx context.Context, db *mongo.Database, config Config) error { + log.Printf("INFO: walking through entity validations") + for entity, validations := range config.Entities { + log.Printf("INFO: Creating or updating schema for %s", entity) + + // New entity collections are created upon the first attempt to create a unique index. If there are no + // indexes defined, there's no automated creation of the collection. + + // If the schema is nil, we assume the entity owner wants to bypass Schema validation type. + // Disable JSON schema validation and move on. For safety, we don't touch the indexes in + // case it's not intended to be managed by `schema` type validation. + if validations.Schema == nil { + log.Printf("INFO: No `schema` type validation defined for %s. Validators associated with the entity collection will be removed", entity) + if err := disableMongoJSONValidation(ctx, db, entity); err != nil { + return err + } + + continue + } + + log.Printf("INFO: Ensuring the %d defined indexes for %s exists", len(validations.Schema.Indexes), entity) + if err := updateMongoIndexes(ctx, db, entity, validations.Schema.Indexes); err != nil { + return errors.Wrapf(err, "failed to ensure index creation for %s", entity) + } + + log.Printf("INFO: Updating JSON schema validation for %s", entity) + if err := updateMongoJSONValidation(ctx, db, entity, *validations.Schema, config.Global); err != nil { + return err + } + } + + log.Printf("INFO: Schema updated successfully") + return nil +} + +func disableMongoJSONValidation(ctx context.Context, db *mongo.Database, entity string) error { + command := bson.D{ + {Key: "collMod", Value: entity}, + {Key: "validator", Value: bson.D{}}, + {Key: "validationLevel", Value: "off"}, + } + if err := db.RunCommand(ctx, command).Err(); err != nil { + return errors.Wrapf(err, "failed to remove JSON schema validation for entity %s", entity) + } + return nil +} + +func updateMongoIndexes(ctx context.Context, db *mongo.Database, entity string, indexes []Index) error { + // If there are no indexes defined, we assume that this is a mistake and return an error. + // There should not be a reasonable use case for an entity that depends solely on full collection scans. + if len(indexes) == 0 { + return fmt.Errorf("no indexes defined for %s; at least one index should be defined for any entity", entity) + } + + coll := db.Collection(entity) + + // Index deletion and creation both should be idempotent operations and should not cause + // any issues if multiple processes are trying to drop the same index. + createdIndexes := make(map[string]struct{}) + for _, index := range indexes { + idxName, err := createIndex(ctx, coll, index) + if err != nil { + if strings.Contains(errors.Cause(err).Error(), "Existing index build in progress on the same collection") { + log.Printf("WARN: Index build in progress for %s. Skipping rest of index creation because of database limit", coll.Name()) + break + } + + return err + } + createdIndexes[idxName] = struct{}{} + } + + log.Printf("INFO: Checking if any non-system indexes need to be dropped for %s", coll.Name()) + // If any indexes exist for the entity collection that are not in the schema, or + // are not system indexes, we assume the user wants to drop them or they are + // obsolete and should be removed. + existing, err := existingIndexes(ctx, coll) + if err != nil { + return errors.Wrapf(err, "failed to get existing indexes for %s", coll.Name()) + } + // NOTE: This is kind of a critical section that is not testable in the current + // code structure. The logic is quite simple at this point and reads much easier as is. + // But if we ever expand beyond a simple set check and string prefix check, we should + // consider refactoring this into a more testable structure. + for _, idx := range existing { + if _, ok := createdIndexes[idx]; !ok && !strings.HasPrefix(idx, "_") { + log.Printf("WARN: Index %s is not in the schema and not a system index. Will drop", idx) + // DocumentDB only allows one index build at a time for a collection, whether that is + // a create or drop. This means that if we try to drop an index while another index is + // being built, the database will return an error. For the interim, rely on testing for + // the declared configurations to ensure only one index change per collection. The + // handling of this could be subject to change depending on how we end up implementing + // the onboarding to block-etre from ods-etre. + // See: https://docs.aws.amazon.com/documentdb/latest/developerguide/functional-differences.html + err := coll.Indexes().DropOne(ctx, idx) + if err != nil { + if strings.Contains(err.Error(), "index not found") { + log.Printf("INFO: Index %s not found. It may have been dropped by another process", idx) + continue + } + + return errors.Wrapf(err, "failed to drop index %s for %s", idx, coll.Name()) + } + } + } + + return nil +} + +func createIndex(ctx context.Context, coll *mongo.Collection, index Index) (string, error) { + // Handle all index configuration errors up front. + if len(index.Keys) == 0 { + return "", errors.Wrapf(errNoKeysForIndex, "index: %s", index) + } + if len(index.Keys) > 30 { + return "", errors.Wrapf(errTooManyKeysForIndex, "index: %s", index) + } + if len(index.Direction) > 0 && len(index.Keys) != len(index.Direction) { + return "", errors.Wrapf(errKeysAndDirectionsDoNotMatch, "index: %s", index) + } + if index.Sparse && index.Unique { + return "", errors.Wrapf(errIndexSparseAndUnique, "index: %s", index) + + } + // Validate the direction values. + for _, direction := range index.Direction { + if direction != 1 && direction != -1 { + return "", errors.Wrapf(errInvalidIndexDirection, "index: %s", index) + } + } + + log.Printf("INFO: Creating index %s ", index) + name := indexName(index) + bsonIndex := toBSONIndex(index) + mod := mongo.IndexModel{ + Keys: bsonIndex, + Options: options.Index().SetUnique(index.Unique).SetSparse(index.Sparse).SetName(name), + } + if _, err := coll.Indexes().CreateOne(ctx, mod); err != nil { + return "", errors.Wrapf(err, "failed to create index %s with name %s", index, name) + } + + log.Printf("INFO: Index %s created successfully if it did not exist before", name) + return name, nil +} + +func indexName(index Index) string { + if len(index.Keys) == 0 { + return "" + } + + // Legacy naming convention for ods-etre. + indexNamePrefix := "SL" + if index.Unique { + indexNamePrefix = "IL" + } else if index.Sparse { + indexNamePrefix = "SPARSE" + } + + // If no direction is specified, we don't need to add it to the index name. + if len(index.Direction) == 0 { + return fmt.Sprintf("%s_%s", indexNamePrefix, strings.Join(index.Keys, "_")) + } + + // If there are directions, we need to add them to the index name to ensure uniqueness. + direction := intSliceToString(index.Direction) + return fmt.Sprintf("%s_%s_%s", indexNamePrefix, strings.Join(index.Keys, "_"), strings.Join(direction, "_")) +} + +func intSliceToString(slice []int) []string { + stringSlice := make([]string, len(slice)) + for i, num := range slice { + stringSlice[i] = strconv.Itoa(num) + } + + return stringSlice +} + +func toBSONIndex(index Index) bson.D { + // The bson.D data structure is a slice of KV pairs of the key name and direction. + // It's important to preserve the order of keys since MongoDB uses the order of keys + // in the index model to determine how the index data is stored and retrieved. Therefore + // it's important to use bson.D instead of bson.M or any hashmap-like structure. + bsonIndex := bson.D{} + // If no direction is specified, default to ascending order. + if len(index.Direction) == 0 { + for _, key := range index.Keys { + bsonIndex = append(bsonIndex, bson.E{Key: key, Value: defaultIndexDirection}) + } + } else { + for i, key := range index.Keys { + bsonIndex = append(bsonIndex, bson.E{Key: key, Value: index.Direction[i]}) + } + } + + return bsonIndex +} + +func existingIndexes(ctx context.Context, coll *mongo.Collection) ([]string, error) { + cursor, err := coll.Indexes().List(ctx) + if err != nil { + return nil, errors.Wrap(err, "Failed to list indexes") + } + defer cursor.Close(ctx) + + var ret []string + for cursor.Next(ctx) { + var index bson.M + if err := cursor.Decode(&index); err != nil { + return nil, errors.Wrap(err, "Failed to decode index description") + } + + log.Printf("INFO: Current exising index for %s: %+v", coll.Name(), index) + name, ok := index["name"].(string) + if !ok { + return nil, fmt.Errorf("Failed to get index name for index from entity %s: %+v", coll.Name(), index) + } + ret = append(ret, name) + } + if err := cursor.Err(); err != nil { + return nil, errors.Wrap(err, "Mongo cursor error when inspecting indexes") + } + + return ret, nil +} + +func updateMongoJSONValidation(ctx context.Context, db *mongo.Database, entity string, schema Schema, global Global) error { + log.Printf("INFO: Generating bson schema validator for %s", entity) + + // If there are no field defined, we assume the entity owner want to bypass JSON validation and only use + // `schema` to manage indexes. Disable JSON schema validation and move on. + if len(schema.Fields) == 0 { + log.Printf("INFO: No schema fields defined for %s. Validators associated with the entity collection will be removed", entity) + return disableMongoJSONValidation(ctx, db, entity) + } + + validator, err := BSONSchemaValidator(schema, global.SchemaValidationConfig.Case) + if err != nil { + return errors.Wrapf(err, "failed to create schema validator for %s", entity) + } + log.Printf("INFO: %s schema validator: %v", entity, validator) + + validationLevel := schema.ValidationLevel + switch validationLevel { + case "": + validationLevel = defaultJSONSchemaValidationLevel + case "moderate", "strict": + default: + return fmt.Errorf("invalid validation level %s for entity %s", validationLevel, entity) + } + + // The collMod call to update the validator is atomic and idempotent. Therefore it should not cause + // any issues if multiple processes are trying to update the same validator. + d := bson.D{ + {Key: "collMod", Value: entity}, + {Key: "validator", Value: validator}, + {Key: "validationLevel", Value: validationLevel}, + } + + log.Printf("INFO: Updating validator for %s with collMod command with BSON document %v", entity, d) + err = db.RunCommand(ctx, d).Err() + return errors.Wrapf(err, "failed to update validator for entity %s", entity) +} + +// BSONSchemaValidator converts a Schema into a BSON schema validator for MongoDB or DocumentDB. +func BSONSchemaValidator(schema Schema, globalCase Case) (bson.M, error) { + // Typically we don't want to initialize this since we don't know if there will be any + // required fields. However, the mongo client requires that the required cannot be nil. + requiredFields := make([]string, 0) + dependents := make(map[string][]string) + properties := bson.M{} + + for _, field := range schema.Fields { + if field.Name == "" { + return nil, errors.Wrapf(errFieldNameEmpty, "field of type %s has an empty name", field.Type) + } + + // Convert the field type to a BSON type. + var bsonType string + switch field.Type { + case "string", "bool", "object": + bsonType = field.Type + case "int": + // In MongoDB, long is a 64-bit integer which is the more common standard for int + bsonType = "long" + case "datetime", "int-str", "bool-str": + bsonType = "string" + default: + return nil, errors.Wrapf(errInvalidFieldType, "field %s is of type %q", field.Name, field.Type) + } + + // We only handle enums for strings right now. + if field.Type != "string" && field.Enum != nil { + return nil, errors.Wrapf(errEnumNotString, "field %s is of type %q", field.Name, field.Type) + } + + // Build the field schema... + fieldSchema := bson.M{ + "bsonType": bsonType, + } + + // Determine casing rules + effectiveCase := field.Case + if effectiveCase == nil { + effectiveCase = &globalCase + } + // Apply pattern or casing rule + switch { + // Custom pattern overrides any casing rules. + case field.Pattern != "": + fieldSchema["pattern"] = field.Pattern + // Enum lists overrides any casing rules. + case field.Enum != nil && len(field.Enum) > 0: + fieldSchema["enum"] = field.Enum + // DocumentDB does not support the "format" keyword, therefore we use the pattern keyword as + // a workaround. + case field.Type == "datetime": + fieldSchema["pattern"] = regexRFC3339 + // ES CLI does not currently support the use of a actual integer type, so we temporarily use a + // int string that conforms to long type in MongoDB, which is a 64-bit integer. + case field.Type == "int-str": + fieldSchema["pattern"] = regexInt64 + // ES CLI does not currently support the use of a actual integer type, so we temporarily use a + // string that conforms to boolean value string representations. + case field.Type == "bool-str": + fieldSchema["enum"] = []string{"true", "false"} + // Apply casing rules since there is no prioritized schema validations. + case effectiveCase.Strict && bsonType == "string": + if effectiveCase.Type == "lower" { + fieldSchema["pattern"] = regexLowerCase + } + } + properties[field.Name] = fieldSchema + + if field.Required { + requiredFields = append(requiredFields, field.Name) + } + + if len(field.Dependents) > 0 { + dependents[field.Name] = append([]string{}, field.Dependents...) + } + } + + jsonSchema := bson.M{ + "bsonType": "object", + "properties": properties, + "required": requiredFields, + "additionalProperties": schema.AdditionalProperties, + } + + if len(dependents) > 0 { + jsonSchema["dependencies"] = dependents + } + + return bson.M{"$jsonSchema": jsonSchema}, nil +} diff --git a/schema/mongo_schema_test.go b/schema/mongo_schema_test.go new file mode 100644 index 0000000..7eef047 --- /dev/null +++ b/schema/mongo_schema_test.go @@ -0,0 +1,622 @@ +package schema + +import ( + "context" + "strconv" + "testing" + + "github.com/pkg/errors" + "github.com/stretchr/testify/assert" + + "go.mongodb.org/mongo-driver/v2/bson" + "go.mongodb.org/mongo-driver/v2/mongo" +) + +func TestBsonSchemaValidator(t *testing.T) { + tests := []struct { + name string + schema Schema + caseConfig Case + expected bson.M + }{ + { + name: "global caseConfig", + schema: Schema{ + Fields: []Field{ + {Name: "app", Type: "string", Required: true}, + {Name: "aws_account_id", Type: "string", Required: true}, + }, + AdditionalProperties: true, + }, + caseConfig: Case{Strict: true, Type: "lower"}, + expected: bson.M{ + "$jsonSchema": bson.M{ + "bsonType": "object", + "properties": bson.M{ + "app": bson.M{ + "bsonType": "string", + "pattern": regexLowerCase, + }, + "aws_account_id": bson.M{ + "bsonType": "string", + "pattern": regexLowerCase, + }, + }, + "required": []string{"app", "aws_account_id"}, + "additionalProperties": true, + }, + }, + }, + { + name: "custom patterns and enums", + schema: Schema{ + Fields: []Field{ + { + Name: "replication_group_arn", + Type: "string", + Pattern: "^arn:aws:elasticache:[a-z]{2}(-[a-z]{1,3}){3}:[0-9]{12}:replicationgroup:[a-zA-Z0-9-]+$", + }, + { + Name: "region", + Type: "string", + Enum: []string{"us-east-1", "us-west-2"}, + }, + }, + AdditionalProperties: false, + }, + caseConfig: Case{Strict: true, Type: "lower"}, + expected: bson.M{ + "$jsonSchema": bson.M{ + "bsonType": "object", + "properties": bson.M{ + "replication_group_arn": bson.M{ + "bsonType": "string", + "pattern": "^arn:aws:elasticache:[a-z]{2}(-[a-z]{1,3}){3}:[0-9]{12}:replicationgroup:[a-zA-Z0-9-]+$", + }, + "region": bson.M{ + "bsonType": "string", + "enum": []string{"us-east-1", "us-west-2"}, + }, + }, + "required": []string{}, + "additionalProperties": false, + }, + }, + }, + { + name: "override global caseConfig", + schema: Schema{ + Fields: []Field{ + { + Name: "userName", + Type: "string", + Case: &Case{Strict: true, Type: "lower"}, + }, + }, + AdditionalProperties: false, + }, + caseConfig: Case{Strict: true, Type: "upper"}, + expected: bson.M{ + "$jsonSchema": bson.M{ + "bsonType": "object", + "properties": bson.M{ + "userName": bson.M{ + "bsonType": "string", + "pattern": regexLowerCase, + }, + }, + "required": []string{}, + "additionalProperties": false, + }, + }, + }, + { + name: "global caseConfig wrong field type", + schema: Schema{ + Fields: []Field{ + { + Name: "global", + Type: "bool", + Case: &Case{Strict: true, Type: "lower"}, + }, + }, + AdditionalProperties: false, + }, + caseConfig: Case{Strict: true, Type: "upper"}, + expected: bson.M{ + "$jsonSchema": bson.M{ + "bsonType": "object", + "properties": bson.M{ + "global": bson.M{ + "bsonType": "bool", + }, + }, + "required": []string{}, + "additionalProperties": false, + }, + }, + }, + { + name: "global caseConfig without override", + schema: Schema{ + Fields: []Field{ + {Name: "email", Type: "string"}, + }, + AdditionalProperties: true, + }, + caseConfig: Case{Strict: true, Type: "lower"}, + expected: bson.M{ + "$jsonSchema": bson.M{ + "bsonType": "object", + "properties": bson.M{ + "email": bson.M{ + "bsonType": "string", + "pattern": regexLowerCase, + }, + }, + "required": []string{}, + "additionalProperties": true, + }, + }, + }, + { + name: "mixed field requirements", + schema: Schema{ + Fields: []Field{ + {Name: "name", Type: "string", Required: true}, + {Name: "age", Type: "int"}, + }, + AdditionalProperties: false, + }, + caseConfig: Case{Strict: false, Type: ""}, + expected: bson.M{ + "$jsonSchema": bson.M{ + "bsonType": "object", + "properties": bson.M{ + "name": bson.M{"bsonType": "string"}, + "age": bson.M{"bsonType": "long"}, + }, + "required": []string{"name"}, + "additionalProperties": false, + }, + }, + }, + { + name: "int type", + schema: Schema{ + Fields: []Field{ + { + Name: "age", + Type: "int", + }, + }, + AdditionalProperties: true, + }, + caseConfig: Case{Strict: true, Type: "lower"}, + expected: bson.M{ + "$jsonSchema": bson.M{ + "bsonType": "object", + "properties": bson.M{ + "age": bson.M{ + "bsonType": "long", + }, + }, + "required": []string{}, + "additionalProperties": true, + }, + }, + }, + { + name: "datetime type", + schema: Schema{ + Fields: []Field{ + { + Name: "created", + Type: "datetime", + }, + }, + AdditionalProperties: true, + }, + caseConfig: Case{Strict: true, Type: "lower"}, + expected: bson.M{ + "$jsonSchema": bson.M{ + "bsonType": "object", + "properties": bson.M{ + "created": bson.M{ + "bsonType": "string", + "pattern": regexRFC3339, + }, + }, + "required": []string{}, + "additionalProperties": true, + }, + }, + }, + { + name: "int-str type", + schema: Schema{ + Fields: []Field{ + { + Name: "shards", + Type: "int-str", + }, + }, + AdditionalProperties: true, + }, + caseConfig: Case{Strict: true, Type: "lower"}, + expected: bson.M{ + "$jsonSchema": bson.M{ + "bsonType": "object", + "properties": bson.M{ + "shards": bson.M{ + "bsonType": "string", + "pattern": regexInt64, + }, + }, + "required": []string{}, + "additionalProperties": true, + }, + }, + }, + { + name: "bool-str type", + schema: Schema{ + Fields: []Field{ + { + Name: "is_global", + Type: "bool-str", + }, + }, + AdditionalProperties: true, + }, + caseConfig: Case{Strict: true, Type: "lower"}, + expected: bson.M{ + "$jsonSchema": bson.M{ + "bsonType": "object", + "properties": bson.M{ + "is_global": bson.M{ + "bsonType": "string", + "enum": []string{"true", "false"}, + }, + }, + "required": []string{}, + "additionalProperties": true, + }, + }, + }, + { + name: "with dependents", + schema: Schema{ + Fields: []Field{ + { + Name: "cluster_endpoint", + Type: "string", + Dependents: []string{"shard", "slots"}, + }, + }, + AdditionalProperties: true, + }, + expected: bson.M{ + "$jsonSchema": bson.M{ + "bsonType": "object", + "properties": bson.M{ + "cluster_endpoint": bson.M{ + "bsonType": "string", + }, + }, + "dependencies": map[string][]string{ + "cluster_endpoint": {"shard", "slots"}, + }, + "required": []string{}, + "additionalProperties": true, + }, + }, + }, + { + name: "circular dependents", + schema: Schema{ + Fields: []Field{ + { + Name: "cluster_endpoint", + Type: "string", + Dependents: []string{"shard"}, + }, + { + Name: "shard", + Type: "int", + Dependents: []string{"cluster_endpoint"}, + }, + }, + AdditionalProperties: true, + }, + expected: bson.M{ + "$jsonSchema": bson.M{ + "bsonType": "object", + "properties": bson.M{ + "cluster_endpoint": bson.M{ + "bsonType": "string", + }, + "shard": bson.M{ + "bsonType": "long", + }, + }, + "dependencies": map[string][]string{ + "cluster_endpoint": {"shard"}, + "shard": {"cluster_endpoint"}, + }, + "required": []string{}, + "additionalProperties": true, + }, + }, + }, + { + name: "empty schema", + schema: Schema{ + Fields: []Field{}, + AdditionalProperties: true, + }, + caseConfig: Case{Strict: false, Type: ""}, + expected: bson.M{ + "$jsonSchema": bson.M{ + "bsonType": "object", + "properties": bson.M{}, + "required": []string{}, + "additionalProperties": true, + }, + }, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + result, err := BSONSchemaValidator(test.schema, test.caseConfig) + assert.NoError(t, err) + assert.Equal(t, test.expected, result) + }) + } + + negativeTests := []struct { + name string + schema Schema + caseConfig Case + expected error + }{ + { + name: "bad field type", + schema: Schema{ + Fields: []Field{ + {Name: "unknown", Type: "undefinedType"}, + }, + AdditionalProperties: true, + }, + expected: errInvalidFieldType, + }, + { + name: "non-string enum", + schema: Schema{ + Fields: []Field{ + {Name: "status", Type: "int", Enum: []string{"active", "inactive", "unknown"}}, + }, + AdditionalProperties: true, + }, + expected: errEnumNotString, + }, + { + name: "empty field name", + schema: Schema{ + Fields: []Field{ + {Name: "", Type: "int"}, + }, + AdditionalProperties: true, + }, + expected: errFieldNameEmpty, + }, + } + + for _, test := range negativeTests { + t.Run(test.name, func(t *testing.T) { + _, err := BSONSchemaValidator(test.schema, test.caseConfig) + assert.Equal(t, test.expected, errors.Cause(err)) + }) + } +} + +func TestCreateIndex_NegativeTests(t *testing.T) { + tests := []struct { + name string + index Index + expected error + }{ + { + name: "no keys", + index: Index{ + Keys: []string{}, + }, + expected: errNoKeysForIndex, + }, + { + name: "too many keys", + index: Index{ + Keys: func() []string { + fields := make([]string, 31) + for i := 0; i < 31; i++ { + fields[i] = "field" + strconv.Itoa(i) + } + return fields + }(), + }, + expected: errTooManyKeysForIndex, + }, + { + name: "invalid index direction", + index: Index{ + Keys: []string{"field1", "field2"}, + Direction: []int{1, 2}, + }, + expected: errInvalidIndexDirection, + }, + { + name: "direction length mismatch", + index: Index{ + Keys: []string{"field1", "field2"}, + Direction: []int{1}, + }, + expected: errKeysAndDirectionsDoNotMatch, + }, + { + name: "unique and sparse", + index: Index{ + Keys: []string{"field1", "field2"}, + Unique: true, + Sparse: true, + }, + expected: errIndexSparseAndUnique, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + ctx := context.Background() + coll := &mongo.Collection{} + _, err := createIndex(ctx, coll, test.index) + assert.Equal(t, test.expected, errors.Cause(err)) + }) + } +} + +func TestIndexName(t *testing.T) { + tests := []struct { + name string + index Index + expected string + }{ + { + name: "unique single field index", + index: Index{ + Keys: []string{"node_arn"}, + Unique: true, + }, + expected: "IL_node_arn", + }, + { + name: "standard single field index", + index: Index{ + Keys: []string{"app"}, + }, + expected: "SL_app", + }, + { + name: "unique compound index", + index: Index{ + Keys: []string{"app", "region", "env"}, + Unique: true, + }, + expected: "IL_app_region_env", + }, + { + name: "standard compound index", + index: Index{ + Keys: []string{"app", "aws_account_id"}, + }, + expected: "SL_app_aws_account_id", + }, + { + name: "compound index with direction", + index: Index{ + Keys: []string{"app", "region", "env"}, + Direction: []int{1, -1, 1}, + }, + expected: "SL_app_region_env_1_-1_1", + }, + { + name: "sparse index", + index: Index{ + Keys: []string{"is_rare"}, + Sparse: true, + }, + expected: "SPARSE_is_rare", + }, + { + name: "empty index", + index: Index{}, + expected: "", + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + got := indexName(test.index) + assert.Equal(t, test.expected, got) + }) + } +} + +func TestToBSONIndex(t *testing.T) { + tests := []struct { + name string + input Index + expected bson.D + }{ + { + name: "single key default direction", + input: Index{ + Keys: []string{"field1"}, + Unique: false, + }, + expected: bson.D{ + {Key: "field1", Value: defaultIndexDirection}, + }, + }, + { + name: "single key with direction specified", + input: Index{ + Keys: []string{"field1"}, + Direction: []int{-1}, + }, + expected: bson.D{ + {Key: "field1", Value: -1}, + }, + }, + { + name: "compound index default direction", + input: Index{ + Keys: []string{"field1", "field2"}, + Unique: true, + }, + expected: bson.D{ + {Key: "field1", Value: defaultIndexDirection}, + {Key: "field2", Value: defaultIndexDirection}, + }, + }, + { + name: "compound index with mixed directions", + input: Index{ + Keys: []string{"field1", "field2", "field3"}, + Direction: []int{1, -1, 1}, + }, + expected: bson.D{ + {Key: "field1", Value: 1}, + {Key: "field2", Value: -1}, + {Key: "field3", Value: 1}, + }, + }, + { + name: "compound index same direction", + input: Index{ + Keys: []string{"field1", "field2"}, + Direction: []int{1, 1}, + }, + expected: bson.D{ + {Key: "field1", Value: 1}, + {Key: "field2", Value: 1}, + }, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + result := toBSONIndex(test.input) + assert.Equal(t, test.expected, result) + }) + } +} diff --git a/server/server.go b/server/server.go index 1737475..b6aaada 100644 --- a/server/server.go +++ b/server/server.go @@ -6,8 +6,10 @@ import ( "context" "fmt" "log" + "math/rand" "time" + "github.com/pkg/errors" "go.mongodb.org/mongo-driver/v2/mongo" "go.mongodb.org/mongo-driver/v2/mongo/options" @@ -20,6 +22,7 @@ import ( "github.com/square/etre/config" "github.com/square/etre/entity" "github.com/square/etre/metrics" + "github.com/square/etre/schema" ) type Server struct { @@ -102,6 +105,14 @@ func (s *Server) Boot(configFile string) error { s.appCtx.EntityStore = entity.NewStore(coll, s.appCtx.CDCStore, cfg.Entity) s.appCtx.EntityValidator = entity.NewValidator(cfg.Entity.Types) + // ////////////////////////////////////////////////////////////////////// + // Entity Schemas + // ////////////////////////////////////////////////////////////////////// + err = s.runSchemaDDL() + if err != nil { + return fmt.Errorf("cannot run schema DDL: %w", err) + } + // ////////////////////////////////////////////////////////////////////// // Auth // ////////////////////////////////////////////////////////////////////// @@ -240,6 +251,26 @@ func (s *Server) connectToDatasource(ds config.DatasourceConfig, client *mongo.C } } +func (s *Server) runSchemaDDL() error { + // We need to retry because the collMod calls that is required to update the schema may error + // if there is simulaneous writes to the collection. This is a known behavior with MongoDB. + // However it's safe to retry because 1) the schema is idempotent and 2) the update is very fast + // since it's just updating metadata 3) index updates also idempotent and fast since DocumentDB + // defaults all index builds to background as of v5.0. + db := s.mainDbClient.Database(s.appCtx.Config.Datasource.Database) + var err error + try := 0 + for ; try < 5; try++ { + if err = schema.CreateOrUpdateMongoSchema(context.Background(), db, s.appCtx.Config.Schemas); err == nil { + return nil + } + // Sleep 2-4 seconds before retrying. Updates are very fast, so we don't need long waits. + jitter := time.Duration(rand.Intn(2000)) * time.Millisecond + time.Sleep(2*time.Second + jitter) + } + return errors.Wrapf(err, "failed to run DDL after %d tries", try) +} + func MapConfigACLRoles(aclRoles []config.ACL) ([]auth.ACL, error) { acls := make([]auth.ACL, len(aclRoles)) for i, acl := range aclRoles { From 988756a3fbe2d2d8936c3eb5d0fa7d4401235662 Mon Sep 17 00:00:00 2001 From: Jemiah Westerman Date: Mon, 10 Nov 2025 15:18:54 -0800 Subject: [PATCH 2/6] Add entity-types endpoint to fetch all entity types --- api/api_gomux.go | 18 +++++++ api/entity_types_test.go | 104 +++++++++++++++++++++++++++++++++++++++ entity/validate.go | 5 ++ 3 files changed, 127 insertions(+) create mode 100644 api/entity_types_test.go diff --git a/api/api_gomux.go b/api/api_gomux.go index 948af15..54c4959 100644 --- a/api/api_gomux.go +++ b/api/api_gomux.go @@ -129,6 +129,11 @@ func NewAPI(appCtx app.Context) *API { mux.HandleFunc("GET "+etre.API_ROOT+"/schemas/{type}", api.getSchemasHandler) mux.HandleFunc("GET "+etre.API_ROOT+"/schemas", api.getSchemasHandler) + // ///////////////////////////////////////////////////////////////////// + // Entity Types + // ///////////////////////////////////////////////////////////////////// + mux.HandleFunc("GET "+etre.API_ROOT+"/entity-types", api.getEntityTypesHandler) + // ///////////////////////////////////////////////////////////////////// // Metrics and status // ///////////////////////////////////////////////////////////////////// @@ -1215,6 +1220,19 @@ func (api *API) getSchemasHandler(w http.ResponseWriter, r *http.Request) { json.NewEncoder(w).Encode(cfg) } +// getEntityTypesHandler godoc +// @Summary Get supported entity types +// @Description Return a list of all supported entity types +// @ID getEntityTypesHandler +// @Produce json +// @Success 200 {array} string "List of entity types" +// @Router /entity-types [get] +func (api *API) getEntityTypesHandler(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + entityTypes := api.validate.EntityTypes() + json.NewEncoder(w).Encode(entityTypes) +} + // Return error on read. Writes always return an etre.WriteResult by calling WriteResult. func (api *API) readError(rc *req, w http.ResponseWriter, err error) { api.systemMetrics.Inc(metrics.Error, 1) diff --git a/api/entity_types_test.go b/api/entity_types_test.go new file mode 100644 index 0000000..b204466 --- /dev/null +++ b/api/entity_types_test.go @@ -0,0 +1,104 @@ +package api_test + +import ( + "fmt" + "net/http" + "net/http/httptest" + "net/url" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/square/etre" + "github.com/square/etre/api" + "github.com/square/etre/app" + "github.com/square/etre/auth" + "github.com/square/etre/config" + "github.com/square/etre/entity" + "github.com/square/etre/metrics" + srv "github.com/square/etre/server" + "github.com/square/etre/test" + "github.com/square/etre/test/mock" +) + +func TestGetEntityTypes(t *testing.T) { + tests := []struct { + name string + entityTypes []string + expectTypes []string + }{ + { + name: "Single entity type", + entityTypes: []string{"nodes"}, + expectTypes: []string{"nodes"}, + }, + { + name: "Multiple entity types", + entityTypes: []string{"nodes", "racks", "hosts"}, + expectTypes: []string{"nodes", "racks", "hosts"}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // Set up the server with custom entity types + config := defaultConfig + server := setupWithValidator(t, config, mock.EntityStore{}, entity.NewValidator(tt.entityTypes)) + defer server.ts.Close() + + // Set up the request URL + etreurl := server.url + etre.API_ROOT + "/entity-types" + + // Make the HTTP call + var gotTypes []string + statusCode, err := test.MakeHTTPRequest("GET", etreurl, nil, &gotTypes) + require.NoError(t, err) + assert.Equal(t, http.StatusOK, statusCode, "response status = %d, expected %d, url %s", statusCode, http.StatusOK, etreurl) + + // Make sure we got the expected entity types + assert.Equal(t, tt.expectTypes, gotTypes) + }) + } +} + +func setupWithValidator(t *testing.T, cfg config.Config, store mock.EntityStore, validator entity.Validator) *server { + etre.DebugEnabled = true + + server := &server{ + store: store, + cfg: cfg, + auth: &mock.AuthRecorder{}, + cdcStore: &mock.CDCStore{}, + streamerFactory: &mock.StreamerFactory{}, + metricsrec: mock.NewMetricsRecorder(), + sysmetrics: mock.NewMetricsRecorder(), + } + + acls, err := srv.MapConfigACLRoles(cfg.Security.ACL) + require.NoError(t, err, "invalid Config.ACL: %s", err) + + ms := metrics.NewMemoryStore() + mf := metrics.GroupFactory{Store: ms} + sm := metrics.NewSystemMetrics() + + appCtx := app.Context{ + Config: server.cfg, + EntityStore: server.store, + EntityValidator: validator, + Auth: auth.NewManager(acls, server.auth), + MetricsStore: ms, + MetricsFactory: mock.NewMetricsFactory(mf, server.metricsrec), + StreamerFactory: server.streamerFactory, + SystemMetrics: mock.NewSystemMetrics(sm, server.sysmetrics), + } + server.api = api.NewAPI(appCtx) + server.ts = httptest.NewServer(server.api) + + u, err := url.Parse(server.ts.URL) + require.NoError(t, err) + + server.url = fmt.Sprintf("http://%s", u.Host) + + return server +} diff --git a/entity/validate.go b/entity/validate.go index b572450..f5142a1 100644 --- a/entity/validate.go +++ b/entity/validate.go @@ -29,6 +29,7 @@ type Validator interface { Entities([]etre.Entity, byte) error WriteOp(WriteOp) error DeleteLabel(string) error + EntityTypes() []string } type validator struct { @@ -152,3 +153,7 @@ func (v validator) DeleteLabel(label string) error { } return nil } + +func (v validator) EntityTypes() []string { + return v.entityTypes +} From c766eb940a45bda95fef3ebfa60954f5f3f39fef Mon Sep 17 00:00:00 2001 From: Jemiah Westerman Date: Mon, 10 Nov 2025 15:45:47 -0800 Subject: [PATCH 3/6] Add schema integration tests --- schema/mongo_integration_test.go | 504 +++++++++++++++++++++++++++++++ schema/mongo_schema.go | 6 +- schema/mongo_schema_test.go | 11 - 3 files changed, 509 insertions(+), 12 deletions(-) create mode 100644 schema/mongo_integration_test.go diff --git a/schema/mongo_integration_test.go b/schema/mongo_integration_test.go new file mode 100644 index 0000000..3f4a1cf --- /dev/null +++ b/schema/mongo_integration_test.go @@ -0,0 +1,504 @@ +package schema_test + +import ( + "context" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "go.mongodb.org/mongo-driver/v2/bson" + "go.mongodb.org/mongo-driver/v2/mongo" + + "github.com/square/etre/schema" + "github.com/square/etre/test" +) + +const testEntityType = "schema_test" + +func TestCreateOrUpdateMongoSchema_Integration(t *testing.T) { + // Setup: Connect to MongoDB + client, colls, err := test.DbCollections([]string{testEntityType}) + require.NoError(t, err, "failed to connect to MongoDB") + defer client.Disconnect(context.Background()) + + db := client.Database("etre_test") + coll := colls[testEntityType] + + // Clean up before and after test + cleanup := func() { + // Drop the collection to start fresh + err := coll.Drop(context.Background()) + if err != nil { + t.Logf("Warning: failed to drop collection: %v", err) + } + } + cleanup() + defer cleanup() + + t.Run("create indexes and validation", func(t *testing.T) { + // Define a schema with indexes and field validations + config := schema.Config{ + Entities: map[string]schema.EntitySchema{ + testEntityType: { + Schema: &schema.Schema{ + Fields: []schema.Field{ + {Name: "hostname", Type: "string", Required: true}, + {Name: "region", Type: "string", Required: true, Enum: []string{"us-east-1", "us-west-2"}}, + {Name: "port", Type: "int", Required: false}, + {Name: "active", Type: "bool"}, + }, + AdditionalProperties: true, // Must be true to allow MongoDB's auto-generated _id field + ValidationLevel: "strict", + Indexes: []schema.Index{ + { + Keys: []string{"hostname"}, + Unique: true, + }, + { + Keys: []string{"region", "hostname"}, + }, + }, + }, + }, + }, + } + + // Apply the schema to MongoDB + err := schema.CreateOrUpdateMongoSchema(context.Background(), db, config) + require.NoError(t, err, "failed to create or update schema") + + // Verify indexes were created + indexes, err := getIndexes(context.Background(), coll) + require.NoError(t, err, "failed to get indexes") + + // Should have: _id_ (default), IL_hostname, SL_region_hostname + assert.Len(t, indexes, 3, "expected 3 indexes: _id_, IL_hostname, SL_region_hostname") + assert.Contains(t, indexes, "_id_", "should have default _id_ index") + assert.Contains(t, indexes, "IL_hostname", "should have unique hostname index") + assert.Contains(t, indexes, "SL_region_hostname", "should have compound region_hostname index") + + // Verify validation rules by attempting to insert documents + ctx := context.Background() + + // Valid document should succeed + validDoc := bson.M{ + "hostname": "server01", + "region": "us-east-1", + "port": int64(8080), + "active": true, + } + _, err = coll.InsertOne(ctx, validDoc) + assert.NoError(t, err, "valid document should be inserted") + + // Invalid document: missing required field + invalidDoc1 := bson.M{ + "region": "us-west-2", + "port": int64(8080), + } + _, err = coll.InsertOne(ctx, invalidDoc1) + assert.Error(t, err, "document missing required field should fail") + + // Invalid document: enum violation + invalidDoc2 := bson.M{ + "hostname": "server02", + "region": "eu-west-1", // Not in enum + "port": int64(8080), + } + _, err = coll.InsertOne(ctx, invalidDoc2) + assert.Error(t, err, "document with invalid enum value should fail") + + // Invalid document: duplicate unique key + duplicateDoc := bson.M{ + "hostname": "server01", // Duplicate + "region": "us-west-2", + } + _, err = coll.InsertOne(ctx, duplicateDoc) + assert.Error(t, err, "duplicate unique key should fail") + }) + + t.Run("update schema - add index and modify validation", func(t *testing.T) { + // First, apply initial schema + initialConfig := schema.Config{ + Entities: map[string]schema.EntitySchema{ + testEntityType: { + Schema: &schema.Schema{ + Fields: []schema.Field{ + {Name: "name", Type: "string", Required: true}, + {Name: "status", Type: "string"}, + }, + AdditionalProperties: true, + ValidationLevel: "moderate", + Indexes: []schema.Index{ + {Keys: []string{"name"}, Unique: true}, + }, + }, + }, + }, + } + + err := schema.CreateOrUpdateMongoSchema(context.Background(), db, initialConfig) + require.NoError(t, err) + + // Verify initial state + indexes, err := getIndexes(context.Background(), coll) + require.NoError(t, err) + assert.Len(t, indexes, 2, "should have _id_ and IL_name") + + // Update schema: add new index and field + updatedConfig := schema.Config{ + Entities: map[string]schema.EntitySchema{ + testEntityType: { + Schema: &schema.Schema{ + Fields: []schema.Field{ + {Name: "name", Type: "string", Required: true}, + {Name: "status", Type: "string"}, + {Name: "category", Type: "string", Required: true}, + }, + AdditionalProperties: true, + ValidationLevel: "moderate", + Indexes: []schema.Index{ + {Keys: []string{"name"}, Unique: true}, + {Keys: []string{"category"}}, + }, + }, + }, + }, + } + + err = schema.CreateOrUpdateMongoSchema(context.Background(), db, updatedConfig) + require.NoError(t, err) + + // Verify updated state + indexes, err = getIndexes(context.Background(), coll) + require.NoError(t, err) + assert.Len(t, indexes, 3, "should have _id_, IL_name, and SL_category") + assert.Contains(t, indexes, "SL_category", "should have new category index") + + // Insert document with new required field + doc := bson.M{ + "name": "test1", + "status": "active", + "category": "server", + } + _, err = coll.InsertOne(context.Background(), doc) + assert.NoError(t, err, "document with new required field should be inserted") + + // Missing new required field should fail + invalidDoc := bson.M{ + "name": "test2", + "status": "active", + } + _, err = coll.InsertOne(context.Background(), invalidDoc) + assert.Error(t, err, "document missing new required field should fail") + }) + + t.Run("remove obsolete indexes", func(t *testing.T) { + // Create schema with multiple indexes + config1 := schema.Config{ + Entities: map[string]schema.EntitySchema{ + testEntityType: { + Schema: &schema.Schema{ + Fields: []schema.Field{ + {Name: "field1", Type: "string"}, + {Name: "field2", Type: "string"}, + {Name: "field3", Type: "string"}, + }, + AdditionalProperties: true, + Indexes: []schema.Index{ + {Keys: []string{"field1"}}, + {Keys: []string{"field2"}}, + {Keys: []string{"field3"}}, + }, + }, + }, + }, + } + + err := schema.CreateOrUpdateMongoSchema(context.Background(), db, config1) + require.NoError(t, err) + + indexes, err := getIndexes(context.Background(), coll) + require.NoError(t, err) + assert.Len(t, indexes, 4, "should have 4 indexes") + + // Update schema to remove field3 index + config2 := schema.Config{ + Entities: map[string]schema.EntitySchema{ + testEntityType: { + Schema: &schema.Schema{ + Fields: []schema.Field{ + {Name: "field1", Type: "string"}, + {Name: "field2", Type: "string"}, + }, + AdditionalProperties: true, + Indexes: []schema.Index{ + {Keys: []string{"field1"}}, + {Keys: []string{"field2"}}, + }, + }, + }, + }, + } + + err = schema.CreateOrUpdateMongoSchema(context.Background(), db, config2) + require.NoError(t, err) + + indexes, err = getIndexes(context.Background(), coll) + require.NoError(t, err) + assert.Len(t, indexes, 3, "should have 3 indexes (field3 removed)") + assert.NotContains(t, indexes, "SL_field3", "field3 index should be removed") + }) + + t.Run("compound index with direction", func(t *testing.T) { + config := schema.Config{ + Entities: map[string]schema.EntitySchema{ + testEntityType: { + Schema: &schema.Schema{ + Fields: []schema.Field{ + {Name: "timestamp", Type: "int"}, + {Name: "user_id", Type: "string"}, + }, + AdditionalProperties: true, + Indexes: []schema.Index{ + { + Keys: []string{"user_id", "timestamp"}, + Direction: []int{1, -1}, // ascending user_id, descending timestamp + }, + }, + }, + }, + }, + } + + err := schema.CreateOrUpdateMongoSchema(context.Background(), db, config) + require.NoError(t, err) + + indexes, err := getIndexes(context.Background(), coll) + require.NoError(t, err) + assert.Contains(t, indexes, "SL_user_id_timestamp_1_-1", "should have compound index with direction") + }) + + t.Run("sparse index", func(t *testing.T) { + config := schema.Config{ + Entities: map[string]schema.EntitySchema{ + testEntityType: { + Schema: &schema.Schema{ + Fields: []schema.Field{ + {Name: "optional_field", Type: "string"}, + }, + AdditionalProperties: true, + Indexes: []schema.Index{ + { + Keys: []string{"optional_field"}, + Sparse: true, + }, + }, + }, + }, + }, + } + + err := schema.CreateOrUpdateMongoSchema(context.Background(), db, config) + require.NoError(t, err) + + indexes, err := getIndexes(context.Background(), coll) + require.NoError(t, err) + assert.Contains(t, indexes, "SPARSE_optional_field", "should have sparse index") + + // Verify sparse index allows multiple documents without the field + ctx := context.Background() + _, err = coll.InsertOne(ctx, bson.M{"other_field": "value1"}) + assert.NoError(t, err) + _, err = coll.InsertOne(ctx, bson.M{"other_field": "value2"}) + assert.NoError(t, err, "sparse index should allow multiple docs without indexed field") + }) + + t.Run("disable validation when schema is nil", func(t *testing.T) { + // First apply a schema with validation + configWithValidation := schema.Config{ + Entities: map[string]schema.EntitySchema{ + testEntityType: { + Schema: &schema.Schema{ + Fields: []schema.Field{ + {Name: "required_field", Type: "string", Required: true}, + }, + AdditionalProperties: false, + Indexes: []schema.Index{ + {Keys: []string{"required_field"}}, + }, + }, + }, + }, + } + + err := schema.CreateOrUpdateMongoSchema(context.Background(), db, configWithValidation) + require.NoError(t, err) + + // Verify validation is enforced + _, err = coll.InsertOne(context.Background(), bson.M{"other_field": "value"}) + assert.Error(t, err, "validation should be enforced") + + // Now disable validation by setting schema to nil + configWithoutValidation := schema.Config{ + Entities: map[string]schema.EntitySchema{ + testEntityType: { + Schema: nil, + }, + }, + } + + err = schema.CreateOrUpdateMongoSchema(context.Background(), db, configWithoutValidation) + require.NoError(t, err) + + // Verify validation is disabled + _, err = coll.InsertOne(context.Background(), bson.M{"any_field": "any_value"}) + assert.NoError(t, err, "validation should be disabled") + }) + + t.Run("case validation with global config", func(t *testing.T) { + config := schema.Config{ + Global: schema.Global{ + SchemaValidationConfig: struct { + Case schema.Case `yaml:"case"` + }{ + Case: schema.Case{Strict: true, Type: "lower"}, + }, + }, + Entities: map[string]schema.EntitySchema{ + testEntityType: { + Schema: &schema.Schema{ + Fields: []schema.Field{ + {Name: "lowercase_field", Type: "string", Required: true}, + }, + AdditionalProperties: true, // Changed to true to allow _id field + Indexes: []schema.Index{ + {Keys: []string{"lowercase_field"}}, + }, + }, + }, + }, + } + + err := schema.CreateOrUpdateMongoSchema(context.Background(), db, config) + require.NoError(t, err) + + ctx := context.Background() + + // Valid lowercase value + _, err = coll.InsertOne(ctx, bson.M{"lowercase_field": "alllowercase123"}) + assert.NoError(t, err, "lowercase value should be accepted") + + // Invalid uppercase value + _, err = coll.InsertOne(ctx, bson.M{"lowercase_field": "HasUpperCase"}) + assert.Error(t, err, "uppercase characters should fail validation") + }) + + t.Run("field with pattern validation", func(t *testing.T) { + config := schema.Config{ + Entities: map[string]schema.EntitySchema{ + testEntityType: { + Schema: &schema.Schema{ + Fields: []schema.Field{ + { + Name: "email", + Type: "string", + Pattern: "^[a-z0-9._%+-]+@[a-z0-9.-]+\\.[a-z]{2,}$", + }, + }, + AdditionalProperties: true, + Indexes: []schema.Index{ + {Keys: []string{"email"}}, + }, + }, + }, + }, + } + + err := schema.CreateOrUpdateMongoSchema(context.Background(), db, config) + require.NoError(t, err) + + ctx := context.Background() + + // Valid email + _, err = coll.InsertOne(ctx, bson.M{"email": "user@example.com"}) + assert.NoError(t, err, "valid email should be accepted") + + // Invalid email + _, err = coll.InsertOne(ctx, bson.M{"email": "not-an-email"}) + assert.Error(t, err, "invalid email should fail validation") + }) + + t.Run("field dependencies", func(t *testing.T) { + config := schema.Config{ + Entities: map[string]schema.EntitySchema{ + testEntityType: { + Schema: &schema.Schema{ + Fields: []schema.Field{ + { + Name: "cluster_mode", + Type: "string", + Dependents: []string{"shard_count", "replica_count"}, + }, + {Name: "shard_count", Type: "int"}, + {Name: "replica_count", Type: "int"}, + }, + AdditionalProperties: true, + Indexes: []schema.Index{ + {Keys: []string{"cluster_mode"}}, + }, + }, + }, + }, + } + + err := schema.CreateOrUpdateMongoSchema(context.Background(), db, config) + require.NoError(t, err) + + ctx := context.Background() + + // Document with cluster_mode must have dependent fields + _, err = coll.InsertOne(ctx, bson.M{ + "cluster_mode": "enabled", + "shard_count": int64(3), + "replica_count": int64(2), + }) + assert.NoError(t, err, "document with all dependent fields should be accepted") + + // Document with cluster_mode but missing dependent fields should fail + _, err = coll.InsertOne(ctx, bson.M{ + "cluster_mode": "enabled", + "shard_count": int64(3), + // missing replica_count + }) + assert.Error(t, err, "document missing dependent field should fail") + + // Document without cluster_mode can omit dependent fields + _, err = coll.InsertOne(ctx, bson.M{ + "other_field": "value", + }) + assert.NoError(t, err, "document without parent field can omit dependent fields") + }) +} + +// Helper function to get list of index names +func getIndexes(ctx context.Context, coll *mongo.Collection) ([]string, error) { + cursor, err := coll.Indexes().List(ctx) + if err != nil { + return nil, err + } + defer cursor.Close(ctx) + + var indexes []string + for cursor.Next(ctx) { + var index bson.M + if err := cursor.Decode(&index); err != nil { + return nil, err + } + name, ok := index["name"].(string) + if !ok { + continue + } + indexes = append(indexes, name) + } + return indexes, cursor.Err() +} diff --git a/schema/mongo_schema.go b/schema/mongo_schema.go index be4467f..b602b22 100644 --- a/schema/mongo_schema.go +++ b/schema/mongo_schema.go @@ -389,10 +389,14 @@ func BSONSchemaValidator(schema Schema, globalCase Case) (bson.M, error) { jsonSchema := bson.M{ "bsonType": "object", "properties": properties, - "required": requiredFields, "additionalProperties": schema.AdditionalProperties, } + // MongoDB doesn't allow empty required arrays + if len(requiredFields) > 0 { + jsonSchema["required"] = requiredFields + } + if len(dependents) > 0 { jsonSchema["dependencies"] = dependents } diff --git a/schema/mongo_schema_test.go b/schema/mongo_schema_test.go index 7eef047..641eb5c 100644 --- a/schema/mongo_schema_test.go +++ b/schema/mongo_schema_test.go @@ -78,7 +78,6 @@ func TestBsonSchemaValidator(t *testing.T) { "enum": []string{"us-east-1", "us-west-2"}, }, }, - "required": []string{}, "additionalProperties": false, }, }, @@ -105,7 +104,6 @@ func TestBsonSchemaValidator(t *testing.T) { "pattern": regexLowerCase, }, }, - "required": []string{}, "additionalProperties": false, }, }, @@ -131,7 +129,6 @@ func TestBsonSchemaValidator(t *testing.T) { "bsonType": "bool", }, }, - "required": []string{}, "additionalProperties": false, }, }, @@ -154,7 +151,6 @@ func TestBsonSchemaValidator(t *testing.T) { "pattern": regexLowerCase, }, }, - "required": []string{}, "additionalProperties": true, }, }, @@ -201,7 +197,6 @@ func TestBsonSchemaValidator(t *testing.T) { "bsonType": "long", }, }, - "required": []string{}, "additionalProperties": true, }, }, @@ -227,7 +222,6 @@ func TestBsonSchemaValidator(t *testing.T) { "pattern": regexRFC3339, }, }, - "required": []string{}, "additionalProperties": true, }, }, @@ -253,7 +247,6 @@ func TestBsonSchemaValidator(t *testing.T) { "pattern": regexInt64, }, }, - "required": []string{}, "additionalProperties": true, }, }, @@ -279,7 +272,6 @@ func TestBsonSchemaValidator(t *testing.T) { "enum": []string{"true", "false"}, }, }, - "required": []string{}, "additionalProperties": true, }, }, @@ -307,7 +299,6 @@ func TestBsonSchemaValidator(t *testing.T) { "dependencies": map[string][]string{ "cluster_endpoint": {"shard", "slots"}, }, - "required": []string{}, "additionalProperties": true, }, }, @@ -344,7 +335,6 @@ func TestBsonSchemaValidator(t *testing.T) { "cluster_endpoint": {"shard"}, "shard": {"cluster_endpoint"}, }, - "required": []string{}, "additionalProperties": true, }, }, @@ -360,7 +350,6 @@ func TestBsonSchemaValidator(t *testing.T) { "$jsonSchema": bson.M{ "bsonType": "object", "properties": bson.M{}, - "required": []string{}, "additionalProperties": true, }, }, From 5d541145a1e4cb2f78a1d57e24fea68e348eb04c Mon Sep 17 00:00:00 2001 From: Jemiah Westerman Date: Tue, 11 Nov 2025 10:54:33 -0800 Subject: [PATCH 4/6] Spelling corrections Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- schema/README.md | 2 +- schema/config.go | 2 +- server/server.go | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/schema/README.md b/schema/README.md index 014306f..c185019 100644 --- a/schema/README.md +++ b/schema/README.md @@ -135,7 +135,7 @@ validations: … ``` ## Performance Implications -Schema validation impacts write operations in terms of resource consumption and latency, but is optimized within MongoDB. Benchmark new validations to ensure they meet performance criteria.. +Schema validation impacts write operations in terms of resource consumption and latency, but is optimized within MongoDB. Benchmark new validations to ensure they meet performance criteria. # Error Handling Currently, InsertEntities and UpdateEntities in OSS Etre handle each entity separately, stopping on the first error encountered. Until batch processing is improved, handle discrete error scenarios within your application logic # Adding and Deleting Indexes diff --git a/schema/config.go b/schema/config.go index e91dfc3..a8cf218 100644 --- a/schema/config.go +++ b/schema/config.go @@ -21,7 +21,7 @@ type EntitySchema struct { } // Schema represents the basic schema for an Entity. -// This includes JSON schema validation for entitie fields as well as database index definitions. +// This includes JSON schema validation for entity fields as well as database index definitions. type Schema struct { Fields []Field `yaml:"fields"` AdditionalProperties bool `yaml:"additional_properties"` diff --git a/server/server.go b/server/server.go index b6aaada..423b7cb 100644 --- a/server/server.go +++ b/server/server.go @@ -253,9 +253,9 @@ func (s *Server) connectToDatasource(ds config.DatasourceConfig, client *mongo.C func (s *Server) runSchemaDDL() error { // We need to retry because the collMod calls that is required to update the schema may error - // if there is simulaneous writes to the collection. This is a known behavior with MongoDB. + // if there is simultaneous writes to the collection. This is a known behavior with MongoDB. // However it's safe to retry because 1) the schema is idempotent and 2) the update is very fast - // since it's just updating metadata 3) index updates also idempotent and fast since DocumentDB + // since it's just updating metadata 3) index updates are also idempotent and fast since DocumentDB // defaults all index builds to background as of v5.0. db := s.mainDbClient.Database(s.appCtx.Config.Datasource.Database) var err error From 9f7f160eac6f8be8ad633c6f4bfe1041b7ab507b Mon Sep 17 00:00:00 2001 From: Jemiah Westerman Date: Tue, 11 Nov 2025 10:55:48 -0800 Subject: [PATCH 5/6] Fix readme --- schema/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/schema/README.md b/schema/README.md index c185019..4550590 100644 --- a/schema/README.md +++ b/schema/README.md @@ -34,7 +34,7 @@ For full context of how we arrived at this approach, see the [Schema Validation **Schema**: Specify global case in `validations` -> `config` -> `schema`. ### Example ```yaml -validations: +schemas: entities: elasticache: schema: From 473803e0f80e5a670c110d7e3ae522be2cd0e4dd Mon Sep 17 00:00:00 2001 From: Jemiah Westerman Date: Tue, 11 Nov 2025 14:21:25 -0800 Subject: [PATCH 6/6] More spelling corrections Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- schema/mongo_schema.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/schema/mongo_schema.go b/schema/mongo_schema.go index b602b22..f1080f4 100644 --- a/schema/mongo_schema.go +++ b/schema/mongo_schema.go @@ -41,8 +41,8 @@ var ( // CreateOrUpdateMongoSchema creates or updates the MongoDB schema for the given entity. If the schema is nil or has // empty fields, it removes the JSON schema validation. If the schema is not nil, it ensures that the indexes in the -// schem exists, and any indexes that are not in the schema are removed. Entity Collection creation is handled by the -// index creation process. We assume that any reasonaby designed schema should not solely rely on full collection scans. +// schema exists, and any indexes that are not in the schema are removed. Entity Collection creation is handled by the +// index creation process. We assume that any reasonably designed schema should not solely rely on full collection scans. func CreateOrUpdateMongoSchema(ctx context.Context, db *mongo.Database, config Config) error { log.Printf("INFO: walking through entity validations") for entity, validations := range config.Entities { @@ -255,7 +255,7 @@ func existingIndexes(ctx context.Context, coll *mongo.Collection) ([]string, err return nil, errors.Wrap(err, "Failed to decode index description") } - log.Printf("INFO: Current exising index for %s: %+v", coll.Name(), index) + log.Printf("INFO: Current existing index for %s: %+v", coll.Name(), index) name, ok := index["name"].(string) if !ok { return nil, fmt.Errorf("Failed to get index name for index from entity %s: %+v", coll.Name(), index)