From 95901e03e0be6327eceff82a695dedc730104eda Mon Sep 17 00:00:00 2001 From: Guillaume ERETEO Date: Fri, 13 Feb 2026 13:28:45 +0100 Subject: [PATCH 1/7] Add source_type/target_type fields to relationships for efficient filtering MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Problem Bundle export API filters relationships by entity type using wildcard queries on source_ref/target_ref URIs (e.g., `source_ref:*malware*`). This is inefficient as it doesn't use the ES inverted index. ## Solution - Add explicit `source_type` and `target_type` fields to relationship docs - Extract type from URI on write (e.g., `malware` from `.../ctia/malware/...`) - Query uses backward-compatible OR clause during migration: `((source_type:malware) OR (source_ref:*malware*))` ## Migration Strategy (Zero-Downtime) 1. Deploy this PR → new docs get fast path, old docs use wildcard fallback 2. Run ES update_by_query to populate fields on existing docs 3. Remove wildcard fallback in follow-up PR ## Files - src/ctia/entity/relationship/es_store.clj - New ES store with mapping & transforms - src/ctia/bundle/core.clj - Backward-compatible query generation - src/ctia/entity/relationship.clj - Use new es-store - src/ctia/task/check_es_stores.clj - Update schema reference Co-Authored-By: Claude Opus 4.5 --- src/ctia/bundle/core.clj | 35 +++++++---- src/ctia/entity/relationship.clj | 28 ++------- src/ctia/entity/relationship/es_store.clj | 72 +++++++++++++++++++++++ src/ctia/task/check_es_stores.clj | 7 ++- test/ctia/bundle/core_test.clj | 15 ++--- 5 files changed, 112 insertions(+), 45 deletions(-) create mode 100644 src/ctia/entity/relationship/es_store.clj diff --git a/src/ctia/bundle/core.clj b/src/ctia/bundle/core.clj index fafd8c1004..f3610926e6 100644 --- a/src/ctia/bundle/core.clj +++ b/src/ctia/bundle/core.clj @@ -410,12 +410,23 @@ :metric (- (System/currentTimeMillis) start)}) (clean-bundle fetched))) -(defn node-filters [field entity-types] - (->> entity-types - (map name) - (map #(format "%s:*%s*" field %)) - (clojure.string/join " OR ") - (format "(%s)"))) +(defn node-filters + "Generate ES query filters for relationship type filtering. + During migration, generates backward-compatible OR clause: + - new-field: exact match on source_type/target_type (fast, for new docs) + - old-field: wildcard match on source_ref/target_ref (slow, for legacy docs) + Example: ((source_type:malware) OR (source_ref:*malware*))" + [new-field old-field entity-types] + (let [type-names (map name entity-types) + new-field-query (->> type-names + (map #(format "%s:%s" new-field %)) + (clojure.string/join " OR ") + (format "(%s)")) + old-field-query (->> type-names + (map #(format "%s:*%s*" old-field %)) + (clojure.string/join " OR ") + (format "(%s)"))] + (format "(%s OR %s)" new-field-query old-field-query))) (defn relationships-filters [id @@ -425,13 +436,13 @@ :or {related_to #{:source_ref :target_ref}}}] (let [edge-filters (->> (map #(hash-map % id) (set related_to)) (apply merge)) - node-filters (cond->> [] - (seq source_type) (cons (node-filters "source_ref" source_type)) - (seq target_type) (cons (node-filters "target_ref" target_type)) - :always (string/join " AND "))] + node-filter-clauses (cond->> [] + (seq source_type) (cons (node-filters "source_type" "source_ref" source_type)) + (seq target_type) (cons (node-filters "target_type" "target_ref" target_type)) + :always (string/join " AND "))] (into {:one-of edge-filters} - (when (seq node-filters) - {:query node-filters})))) + (when (seq node-filter-clauses) + {:query node-filter-clauses})))) (s/defn fetch-entity-relationships "given an entity id, fetch all related relationship" diff --git a/src/ctia/entity/relationship.clj b/src/ctia/entity/relationship.clj index 9c692bf2ab..b779455d3c 100644 --- a/src/ctia/entity/relationship.clj +++ b/src/ctia/entity/relationship.clj @@ -3,6 +3,7 @@ [clojure.string :as str] [ctia.domain.entities :refer [long-id->id short-id->long-id with-long-id]] [ctia.entity.relationship.schemas :as rs] + [ctia.entity.relationship.es-store :as es-store] [ctia.flows.crud :as flows] [ctia.http.middleware.auth :refer [require-capability!]] [ctia.http.routes.common :as routes.common] @@ -11,30 +12,11 @@ [ctia.schemas.core :refer [APIHandlerServices Reference TLP]] [ctia.schemas.sorting :as sorting] [ctia.store :refer [create-record read-record]] - [ctia.stores.es.mapping :as em] - [ctia.stores.es.store :refer [def-es-store]] [ring.swagger.json-schema :refer [describe]] [ring.util.http-response :refer [not-found bad-request bad-request!]] [schema-tools.core :as st] - [schema.core :as s])) - -(def relationship-mapping - {"relationship" - {:dynamic false - :properties - (merge - em/base-entity-mapping - em/describable-entity-mapping - em/sourcable-entity-mapping - em/stored-entity-mapping - {:relationship_type em/token - :source_ref em/token - :target_ref em/token})}}) - -(def-es-store RelationshipStore - :relationship - rs/StoredRelationship - rs/PartialStoredRelationship) + [schema.core :as s] + [ctia.entity.event.schemas :as es])) (def relationship-fields (concat sorting/default-entity-sort-fields @@ -254,8 +236,8 @@ :stored-schema rs/StoredRelationship :partial-stored-schema rs/PartialStoredRelationship :realize-fn rs/realize-relationship - :es-store ->RelationshipStore - :es-mapping relationship-mapping + :es-store es-store/->RelationshipStore + :es-mapping es-store/relationship-mapping :services->routes (routes.common/reloadable-function relationship-routes) :capabilities capabilities :fields relationship-fields diff --git a/src/ctia/entity/relationship/es_store.clj b/src/ctia/entity/relationship/es_store.clj new file mode 100644 index 0000000000..6e587ce38f --- /dev/null +++ b/src/ctia/entity/relationship/es_store.clj @@ -0,0 +1,72 @@ +(ns ctia.entity.relationship.es-store + (:require [ctia.entity.relationship.schemas :as rs] + [ctia.domain.entities :refer [long-id->id]] + [ctia.lib.pagination :refer [list-response-schema]] + [ctia.stores.es.mapping :as em] + [ctia.stores.es.store :refer [def-es-store StoreOpts]] + [schema-tools.core :as st] + [schema.core :as s])) + +(def relationship-mapping + {"relationship" + {:dynamic false + :properties + (merge + em/base-entity-mapping + em/describable-entity-mapping + em/sourcable-entity-mapping + em/stored-entity-mapping + {:relationship_type em/token + :source_ref em/token + :target_ref em/token + :source_type em/token + :target_type em/token})}}) + +(s/defschema ESStoredRelationship + (st/merge rs/StoredRelationship + (st/optional-keys + {:source_type s/Str + :target_type s/Str}))) + +(s/defschema ESPartialStoredRelationship + (st/merge rs/PartialStoredRelationship + (st/optional-keys + {:source_type s/Str + :target_type s/Str}))) + +(def ESPartialStoredRelationshipList (list-response-schema ESPartialStoredRelationship)) +(def PartialStoredRelationshipList (list-response-schema rs/PartialStoredRelationship)) + +(s/defn stored-relationship->es-stored-relationship + :- ESStoredRelationship + "adds source and target types to a relationship" + [{:keys [source_ref target_ref] :as r} :- rs/StoredRelationship] + (assoc r + :source_type (:type (long-id->id source_ref)) + :target_type (:type (long-id->id target_ref)))) + +(s/defn es-stored-relationship->stored-relationship + :- ESStoredRelationship + "dissoc source and target types to a relationship" + [{:keys [source_ref target_ref] :as r} :- ESStoredRelationship] + (dissoc r :source_type :target_type)) + +(s/defn es-partial-stored-relationship->partial-stored-relationship + :- rs/PartialStoredRelationship + "dissoc source and target types to a relationship" + [r :- ESPartialStoredRelationship] + (dissoc r :source_type :target_type)) + +(s/def store-opts :- StoreOpts + {:stored->es-stored (comp stored-relationship->es-stored-relationship :doc) + :es-stored->stored (comp es-stored-relationship->stored-relationship :doc) + :es-partial-stored->partial-stored (comp es-partial-stored-relationship->partial-stored-relationship :doc) + :es-stored-schema ESStoredRelationship + :es-partial-stored-schema ESPartialStoredRelationship}) + +(def-es-store RelationshipStore + :relationship + rs/StoredRelationship + rs/PartialStoredRelationship + :store-opts store-opts + ) diff --git a/src/ctia/task/check_es_stores.clj b/src/ctia/task/check_es_stores.clj index 1b9cc71db8..5b3da6fbc4 100644 --- a/src/ctia/task/check_es_stores.clj +++ b/src/ctia/task/check_es_stores.clj @@ -8,7 +8,8 @@ [ctia.properties :as p] [ctia.store-service :as store-svc] [ctia.entity.entities :as entities] - [ctia.entity.sighting.schemas :refer [StoredSighting]] + [ctia.entity.sighting.es-store :refer [ESStoredSighting]] + [ctia.entity.relationship.es-store :refer [ESStoredRelationship]] [ctia.stores.es.crud :refer [coerce-to-fn]] [ctia.store-service.schemas :refer [AllStoresFn]] [puppetlabs.trapperkeeper.app :as app] @@ -23,8 +24,8 @@ (assoc (into {} (map (fn [[_ {:keys [entity stored-schema]}]] {entity stored-schema}) (entities/all-entities))) - :sighting (st/merge StoredSighting - {(s/optional-key :observables_hash) s/Any}))) + :relationship ESStoredRelationship + :sighting ESStoredSighting)) (defn type->schema [entity-type] (if-let [schema (get all-types entity-type)] diff --git a/test/ctia/bundle/core_test.clj b/test/ctia/bundle/core_test.clj index e2e687e0a4..7f541bc873 100644 --- a/test/ctia/bundle/core_test.clj +++ b/test/ctia/bundle/core_test.clj @@ -38,23 +38,24 @@ :target_ref "id"} (:one-of (sut/relationships-filters "id" {:related_to [:source_ref :target_ref]}))))) - (testing "relationships-filters should properly add query filters" - (is (= "(source_ref:*malware*)" + (testing "relationships-filters should properly add query filters with backward-compatible OR" + ;; During migration, queries include both new field (exact) and old field (wildcard) + (is (= "((source_type:malware) OR (source_ref:*malware*))" (:query (sut/relationships-filters "id" {:source_type [:malware]})))) - (is (= "(target_ref:*sighting*)" + (is (= "((target_type:sighting) OR (target_ref:*sighting*))" (:query (sut/relationships-filters "id" {:target_type [:sighting]})))) - (is (= "(target_ref:*sighting*) AND (source_ref:*malware*)" + (is (= "((target_type:sighting) OR (target_ref:*sighting*)) AND ((source_type:malware) OR (source_ref:*malware*))" (:query (sut/relationships-filters "id" {:source_type [:malware] :target_type [:sighting]})))) - (is (= "(source_ref:*malware* OR source_ref:*vulnerability*)" + (is (= "((source_type:malware OR source_type:vulnerability) OR (source_ref:*malware* OR source_ref:*vulnerability*))" (:query (sut/relationships-filters "id" {:source_type [:malware :vulnerability]})))) - (is (= "(target_ref:*sighting* OR target_ref:*incident*)" + (is (= "((target_type:sighting OR target_type:incident) OR (target_ref:*sighting* OR target_ref:*incident*))" (:query (sut/relationships-filters "id" {:target_type [:sighting :incident]}))))) (testing "relationships-filters should return proper fields and combine filters" (is (= {:one-of {:source_ref "id"} - :query "(source_ref:*malware*)"} + :query "((source_type:malware) OR (source_ref:*malware*))"} (sut/relationships-filters "id" {:source_type [:malware] :related_to [:source_ref]}))))) From e158c27641319ca00d300d31dffcd9f3179130f4 Mon Sep 17 00:00:00 2001 From: Guillaume ERETEO Date: Fri, 13 Feb 2026 13:53:29 +0100 Subject: [PATCH 2/7] Add ES _update_by_query script for relationship type migration (iroh-gr6) Adds a bash script that uses Elasticsearch's _update_by_query API with a Painless script to backfill source_type and target_type fields on existing relationship documents. Usage: # Dry run ./scripts/migrate_relationship_types.sh localhost:9200 ctia_relationship --dry-run # Execute ./scripts/migrate_relationship_types.sh localhost:9200 ctia_relationship The script extracts entity type from source_ref/target_ref URLs: http://example.com/ctia/malware/malware-123 -> source_type: "malware" Co-Authored-By: Claude Opus 4.5 --- scripts/migrate_relationship_types.sh | 148 ++++++++++++++++++++++++++ 1 file changed, 148 insertions(+) create mode 100755 scripts/migrate_relationship_types.sh diff --git a/scripts/migrate_relationship_types.sh b/scripts/migrate_relationship_types.sh new file mode 100755 index 0000000000..4302a873c1 --- /dev/null +++ b/scripts/migrate_relationship_types.sh @@ -0,0 +1,148 @@ +#!/bin/bash +# +# Migration script to backfill source_type and target_type fields on relationship documents. +# Uses Elasticsearch _update_by_query with Painless script. +# +# Usage: +# ./migrate_relationship_types.sh [--dry-run] +# +# Examples: +# # Dry run (shows how many docs would be updated) +# ./migrate_relationship_types.sh localhost:9200 ctia_relationship --dry-run +# +# # Execute migration +# ./migrate_relationship_types.sh localhost:9200 ctia_relationship +# +# # With authentication +# ES_USER=elastic ES_PASS=changeme ./migrate_relationship_types.sh localhost:9200 ctia_relationship +# + +set -euo pipefail + +ES_HOST="${1:-localhost:9200}" +INDEX_NAME="${2:-ctia_relationship}" +DRY_RUN="${3:-}" + +# Build auth header if credentials provided +AUTH_OPTS="" +if [[ -n "${ES_USER:-}" && -n "${ES_PASS:-}" ]]; then + AUTH_OPTS="-u ${ES_USER}:${ES_PASS}" +fi + +# Painless script to extract entity type from CTIM reference URLs +# Format: http://host/ctia//- +# Example: http://example.com/ctia/malware/malware-123 -> "malware" +PAINLESS_SCRIPT=' +String extractType(String ref) { + if (ref == null || ref.isEmpty()) { + return null; + } + // Find /ctia/ in the path + int ctiaIdx = ref.indexOf("/ctia/"); + if (ctiaIdx == -1) { + return null; + } + // Extract the path after /ctia/ + String path = ref.substring(ctiaIdx + 6); + // Get the entity type (first path segment) + int slashIdx = path.indexOf("/"); + if (slashIdx == -1) { + return null; + } + return path.substring(0, slashIdx); +} + +// Extract types from refs +String sourceType = extractType(ctx._source.source_ref); +String targetType = extractType(ctx._source.target_ref); + +// Only update if we successfully extracted types +if (sourceType != null) { + ctx._source.source_type = sourceType; +} +if (targetType != null) { + ctx._source.target_type = targetType; +} +' + +# Query for documents missing source_type OR target_type +QUERY='{ + "query": { + "bool": { + "must": [ + { "term": { "type": "relationship" } } + ], + "should": [ + { "bool": { "must_not": { "exists": { "field": "source_type" } } } }, + { "bool": { "must_not": { "exists": { "field": "target_type" } } } } + ], + "minimum_should_match": 1 + } + } +}' + +echo "=== Relationship Type Migration ===" +echo "ES Host: ${ES_HOST}" +echo "Index: ${INDEX_NAME}" +echo "" + +# Count documents to be updated +echo "Counting documents to migrate..." +COUNT_RESPONSE=$(curl -s ${AUTH_OPTS} -X GET "http://${ES_HOST}/${INDEX_NAME}/_count" \ + -H 'Content-Type: application/json' \ + -d "${QUERY}") + +DOC_COUNT=$(echo "${COUNT_RESPONSE}" | grep -o '"count":[0-9]*' | grep -o '[0-9]*' || echo "0") +echo "Documents to update: ${DOC_COUNT}" +echo "" + +if [[ "${DOC_COUNT}" == "0" ]]; then + echo "No documents need migration. Done." + exit 0 +fi + +if [[ "${DRY_RUN}" == "--dry-run" ]]; then + echo "[DRY RUN] Would update ${DOC_COUNT} documents." + echo "" + echo "Sample document (first match):" + curl -s ${AUTH_OPTS} -X GET "http://${ES_HOST}/${INDEX_NAME}/_search?size=1" \ + -H 'Content-Type: application/json' \ + -d "${QUERY}" | python3 -m json.tool 2>/dev/null || cat + exit 0 +fi + +echo "Starting migration..." +echo "" + +# Execute _update_by_query +# - wait_for_completion=true: wait for the operation to complete +# - conflicts=proceed: continue even if there are version conflicts +# - scroll_size=1000: process 1000 docs at a time +RESPONSE=$(curl -s ${AUTH_OPTS} -X POST "http://${ES_HOST}/${INDEX_NAME}/_update_by_query?wait_for_completion=true&conflicts=proceed&scroll_size=1000" \ + -H 'Content-Type: application/json' \ + -d "{ + \"query\": ${QUERY#*\"query\": }, + \"script\": { + \"source\": $(echo "${PAINLESS_SCRIPT}" | python3 -c 'import json,sys; print(json.dumps(sys.stdin.read()))'), + \"lang\": \"painless\" + } + }") + +echo "Response:" +echo "${RESPONSE}" | python3 -m json.tool 2>/dev/null || echo "${RESPONSE}" +echo "" + +# Extract stats from response +UPDATED=$(echo "${RESPONSE}" | grep -o '"updated":[0-9]*' | grep -o '[0-9]*' || echo "0") +FAILURES=$(echo "${RESPONSE}" | grep -o '"failures":\[[^]]*\]' || echo "[]") + +echo "=== Migration Complete ===" +echo "Documents updated: ${UPDATED}" + +if [[ "${FAILURES}" != "[]" && "${FAILURES}" != "" ]]; then + echo "WARNING: Some failures occurred:" + echo "${FAILURES}" + exit 1 +fi + +echo "Migration successful!" From 8a93b703ebd0be7810d8c0a4e575837868338f0e Mon Sep 17 00:00:00 2001 From: Guillaume ERETEO Date: Fri, 13 Feb 2026 14:08:00 +0100 Subject: [PATCH 3/7] Add unit tests for relationship es_store transformation functions Tests for: - stored-relationship->es-stored-relationship (extracts source_type/target_type) - es-stored-relationship->stored-relationship (removes type fields) - es-partial-stored-relationship->partial-stored-relationship - store-opts functions with :doc wrapper Co-Authored-By: Claude Opus 4.5 --- .../entity/relationship/es_store_test.clj | 111 ++++++++++++++++++ 1 file changed, 111 insertions(+) create mode 100644 test/ctia/entity/relationship/es_store_test.clj diff --git a/test/ctia/entity/relationship/es_store_test.clj b/test/ctia/entity/relationship/es_store_test.clj new file mode 100644 index 0000000000..5c4bfbcff1 --- /dev/null +++ b/test/ctia/entity/relationship/es_store_test.clj @@ -0,0 +1,111 @@ +(ns ctia.entity.relationship.es-store-test + (:require [clojure.test :refer [deftest is testing are]] + [ctia.entity.relationship.es-store :as sut] + [schema.test :refer [validate-schemas]] + [clojure.test :refer [use-fixtures]])) + +(use-fixtures :once validate-schemas) + +(def base-relationship + {:id "http://example.com/ctia/relationship/relationship-123" + :type "relationship" + :schema_version "1.1.0" + :relationship_type "related-to" + :source_ref "http://example.com/ctia/malware/malware-456" + :target_ref "http://example.com/ctia/sighting/sighting-789" + :tlp "amber" + :owner "test-user" + :groups ["test-group"] + :created #inst "2024-01-01T00:00:00.000Z" + :modified #inst "2024-01-01T00:00:00.000Z"}) + +(deftest stored-relationship->es-stored-relationship-test + (testing "extracts source_type and target_type from refs" + (let [result (sut/stored-relationship->es-stored-relationship base-relationship)] + (is (= "malware" (:source_type result))) + (is (= "sighting" (:target_type result))) + ;; Original fields preserved + (is (= (:source_ref base-relationship) (:source_ref result))) + (is (= (:target_ref base-relationship) (:target_ref result))))) + + (testing "handles various entity types" + (are [source-type target-type source-ref target-ref] + (let [rel (assoc base-relationship + :source_ref source-ref + :target_ref target-ref) + result (sut/stored-relationship->es-stored-relationship rel)] + (and (= source-type (:source_type result)) + (= target-type (:target_type result)))) + + "indicator" "incident" + "http://example.com/ctia/indicator/indicator-123" + "http://example.com/ctia/incident/incident-456" + + "judgement" "verdict" + "http://example.com/ctia/judgement/judgement-123" + "http://example.com/ctia/verdict/verdict-456" + + "attack-pattern" "vulnerability" + "http://example.com/ctia/attack-pattern/attack-pattern-123" + "http://example.com/ctia/vulnerability/vulnerability-456" + + "casebook" "investigation" + "http://example.com/ctia/casebook/casebook-123" + "http://example.com/ctia/investigation/investigation-456"))) + +(deftest es-stored-relationship->stored-relationship-test + (testing "removes source_type and target_type" + (let [es-relationship (assoc base-relationship + :source_type "malware" + :target_type "sighting") + result (sut/es-stored-relationship->stored-relationship es-relationship)] + (is (nil? (:source_type result))) + (is (nil? (:target_type result))) + ;; Original fields preserved + (is (= (:source_ref base-relationship) (:source_ref result))) + (is (= (:target_ref base-relationship) (:target_ref result))) + (is (= (:relationship_type base-relationship) (:relationship_type result)))))) + +(deftest es-partial-stored-relationship->partial-stored-relationship-test + (testing "removes source_type and target_type from partial" + (let [partial-rel {:id "http://example.com/ctia/relationship/relationship-123" + :source_ref "http://example.com/ctia/malware/malware-456" + :source_type "malware" + :target_type "sighting"} + result (sut/es-partial-stored-relationship->partial-stored-relationship partial-rel)] + (is (nil? (:source_type result))) + (is (nil? (:target_type result))) + (is (= (:source_ref partial-rel) (:source_ref result))))) + + (testing "handles partial without type fields" + (let [partial-rel {:id "http://example.com/ctia/relationship/relationship-123" + :source_ref "http://example.com/ctia/malware/malware-456"} + result (sut/es-partial-stored-relationship->partial-stored-relationship partial-rel)] + (is (nil? (:source_type result))) + (is (nil? (:target_type result)))))) + +(deftest store-opts-test + (testing "stored->es-stored extracts from :doc wrapper" + (let [doc-wrapper {:doc base-relationship} + transform-fn (:stored->es-stored sut/store-opts) + result (transform-fn doc-wrapper)] + (is (= "malware" (:source_type result))) + (is (= "sighting" (:target_type result))))) + + (testing "es-stored->stored extracts from :doc wrapper and removes types" + (let [es-rel (assoc base-relationship + :source_type "malware" + :target_type "sighting") + doc-wrapper {:doc es-rel} + transform-fn (:es-stored->stored sut/store-opts) + result (transform-fn doc-wrapper)] + (is (nil? (:source_type result))) + (is (nil? (:target_type result))))) + + (testing "es-partial-stored->partial-stored extracts from :doc wrapper" + (let [partial-rel {:source_type "malware" :target_type "sighting"} + doc-wrapper {:doc partial-rel} + transform-fn (:es-partial-stored->partial-stored sut/store-opts) + result (transform-fn doc-wrapper)] + (is (nil? (:source_type result))) + (is (nil? (:target_type result)))))) From c4f8b288950492dea291e938c205c8dee0022073 Mon Sep 17 00:00:00 2001 From: Guillaume ERETEO Date: Fri, 13 Feb 2026 14:08:36 +0100 Subject: [PATCH 4/7] Add throttling and async options to ES migration script New options: - --throttle : Limit to N documents per second using requests_per_second - --async: Run in background with wait_for_completion=false, returns task ID Recommended for production: ./scripts/migrate_relationship_types.sh host:9200 index --throttle 500 --async Also includes: - Estimated time calculation in dry-run mode - Task monitoring and cancellation commands for async mode Co-Authored-By: Claude Opus 4.5 --- scripts/migrate_relationship_types.sh | 132 +++++++++++++++++++++----- 1 file changed, 110 insertions(+), 22 deletions(-) diff --git a/scripts/migrate_relationship_types.sh b/scripts/migrate_relationship_types.sh index 4302a873c1..b5e13b9039 100755 --- a/scripts/migrate_relationship_types.sh +++ b/scripts/migrate_relationship_types.sh @@ -4,24 +4,74 @@ # Uses Elasticsearch _update_by_query with Painless script. # # Usage: -# ./migrate_relationship_types.sh [--dry-run] +# ./migrate_relationship_types.sh [OPTIONS] +# +# Options: +# --dry-run Show count and sample doc without making changes +# --throttle Limit to N documents per second (default: unlimited) +# --async Run in background, return task ID for monitoring # # Examples: # # Dry run (shows how many docs would be updated) # ./migrate_relationship_types.sh localhost:9200 ctia_relationship --dry-run # -# # Execute migration +# # Execute migration (full speed) # ./migrate_relationship_types.sh localhost:9200 ctia_relationship # +# # Throttled execution (1000 docs/sec) - safer for production +# ./migrate_relationship_types.sh localhost:9200 ctia_relationship --throttle 1000 +# +# # Async execution (returns task ID to monitor) +# ./migrate_relationship_types.sh localhost:9200 ctia_relationship --async +# +# # Throttled + async (recommended for large production indices) +# ./migrate_relationship_types.sh localhost:9200 ctia_relationship --throttle 500 --async +# # # With authentication # ES_USER=elastic ES_PASS=changeme ./migrate_relationship_types.sh localhost:9200 ctia_relationship # +# Monitoring async tasks: +# # Check task status +# curl -X GET "http://localhost:9200/_tasks/" +# +# # List all update_by_query tasks +# curl -X GET "http://localhost:9200/_tasks?actions=*update_by_query&detailed" +# +# # Cancel a running task +# curl -X POST "http://localhost:9200/_tasks//_cancel" +# set -euo pipefail +# Parse arguments ES_HOST="${1:-localhost:9200}" INDEX_NAME="${2:-ctia_relationship}" -DRY_RUN="${3:-}" +shift 2 || true + +DRY_RUN=false +THROTTLE="" +ASYNC=false + +while [[ $# -gt 0 ]]; do + case "$1" in + --dry-run) + DRY_RUN=true + shift + ;; + --throttle) + THROTTLE="$2" + shift 2 + ;; + --async) + ASYNC=true + shift + ;; + *) + echo "Unknown option: $1" + exit 1 + ;; + esac +done # Build auth header if credentials provided AUTH_OPTS="" @@ -84,6 +134,8 @@ QUERY='{ echo "=== Relationship Type Migration ===" echo "ES Host: ${ES_HOST}" echo "Index: ${INDEX_NAME}" +echo "Throttle: ${THROTTLE:-unlimited}" +echo "Async: ${ASYNC}" echo "" # Count documents to be updated @@ -101,8 +153,12 @@ if [[ "${DOC_COUNT}" == "0" ]]; then exit 0 fi -if [[ "${DRY_RUN}" == "--dry-run" ]]; then +if [[ "${DRY_RUN}" == "true" ]]; then echo "[DRY RUN] Would update ${DOC_COUNT} documents." + if [[ -n "${THROTTLE}" ]]; then + ESTIMATED_TIME=$((DOC_COUNT / THROTTLE)) + echo "Estimated time with throttle: ~${ESTIMATED_TIME} seconds" + fi echo "" echo "Sample document (first match):" curl -s ${AUTH_OPTS} -X GET "http://${ES_HOST}/${INDEX_NAME}/_search?size=1" \ @@ -114,11 +170,21 @@ fi echo "Starting migration..." echo "" +# Build query parameters +QUERY_PARAMS="conflicts=proceed&scroll_size=1000" + +if [[ "${ASYNC}" == "true" ]]; then + QUERY_PARAMS="${QUERY_PARAMS}&wait_for_completion=false" +else + QUERY_PARAMS="${QUERY_PARAMS}&wait_for_completion=true" +fi + +if [[ -n "${THROTTLE}" ]]; then + QUERY_PARAMS="${QUERY_PARAMS}&requests_per_second=${THROTTLE}" +fi + # Execute _update_by_query -# - wait_for_completion=true: wait for the operation to complete -# - conflicts=proceed: continue even if there are version conflicts -# - scroll_size=1000: process 1000 docs at a time -RESPONSE=$(curl -s ${AUTH_OPTS} -X POST "http://${ES_HOST}/${INDEX_NAME}/_update_by_query?wait_for_completion=true&conflicts=proceed&scroll_size=1000" \ +RESPONSE=$(curl -s ${AUTH_OPTS} -X POST "http://${ES_HOST}/${INDEX_NAME}/_update_by_query?${QUERY_PARAMS}" \ -H 'Content-Type: application/json' \ -d "{ \"query\": ${QUERY#*\"query\": }, @@ -128,21 +194,43 @@ RESPONSE=$(curl -s ${AUTH_OPTS} -X POST "http://${ES_HOST}/${INDEX_NAME}/_update } }") -echo "Response:" -echo "${RESPONSE}" | python3 -m json.tool 2>/dev/null || echo "${RESPONSE}" -echo "" +if [[ "${ASYNC}" == "true" ]]; then + # Extract task ID from async response + TASK_ID=$(echo "${RESPONSE}" | grep -o '"task":"[^"]*"' | cut -d'"' -f4 || echo "") + + if [[ -n "${TASK_ID}" ]]; then + echo "=== Migration Started (Async) ===" + echo "Task ID: ${TASK_ID}" + echo "" + echo "Monitor progress:" + echo " curl -X GET \"http://${ES_HOST}/_tasks/${TASK_ID}\"" + echo "" + echo "Cancel if needed:" + echo " curl -X POST \"http://${ES_HOST}/_tasks/${TASK_ID}/_cancel\"" + echo "" + echo "The migration is running in the background." + else + echo "ERROR: Failed to start async task" + echo "${RESPONSE}" | python3 -m json.tool 2>/dev/null || echo "${RESPONSE}" + exit 1 + fi +else + echo "Response:" + echo "${RESPONSE}" | python3 -m json.tool 2>/dev/null || echo "${RESPONSE}" + echo "" -# Extract stats from response -UPDATED=$(echo "${RESPONSE}" | grep -o '"updated":[0-9]*' | grep -o '[0-9]*' || echo "0") -FAILURES=$(echo "${RESPONSE}" | grep -o '"failures":\[[^]]*\]' || echo "[]") + # Extract stats from response + UPDATED=$(echo "${RESPONSE}" | grep -o '"updated":[0-9]*' | grep -o '[0-9]*' || echo "0") + FAILURES=$(echo "${RESPONSE}" | grep -o '"failures":\[[^]]*\]' || echo "[]") -echo "=== Migration Complete ===" -echo "Documents updated: ${UPDATED}" + echo "=== Migration Complete ===" + echo "Documents updated: ${UPDATED}" -if [[ "${FAILURES}" != "[]" && "${FAILURES}" != "" ]]; then - echo "WARNING: Some failures occurred:" - echo "${FAILURES}" - exit 1 -fi + if [[ "${FAILURES}" != "[]" && "${FAILURES}" != "" ]]; then + echo "WARNING: Some failures occurred:" + echo "${FAILURES}" + exit 1 + fi -echo "Migration successful!" + echo "Migration successful!" +fi From 051efcfdad3b7de35357a44793e2da153e675371 Mon Sep 17 00:00:00 2001 From: Guillaume ERETEO Date: Fri, 13 Feb 2026 15:10:11 +0100 Subject: [PATCH 5/7] Fix migration script shell escaping issues - Use heredoc with temp file to avoid shell escaping issues with Painless script - The forward slashes in '/ctia/' were being misinterpreted - Tested and verified working against OpenSearch 2.19 Co-Authored-By: Claude Opus 4.5 --- scripts/migrate_relationship_types.sh | 145 ++++++++++++++------------ 1 file changed, 78 insertions(+), 67 deletions(-) diff --git a/scripts/migrate_relationship_types.sh b/scripts/migrate_relationship_types.sh index b5e13b9039..a67f8e5f32 100755 --- a/scripts/migrate_relationship_types.sh +++ b/scripts/migrate_relationship_types.sh @@ -79,58 +79,6 @@ if [[ -n "${ES_USER:-}" && -n "${ES_PASS:-}" ]]; then AUTH_OPTS="-u ${ES_USER}:${ES_PASS}" fi -# Painless script to extract entity type from CTIM reference URLs -# Format: http://host/ctia//- -# Example: http://example.com/ctia/malware/malware-123 -> "malware" -PAINLESS_SCRIPT=' -String extractType(String ref) { - if (ref == null || ref.isEmpty()) { - return null; - } - // Find /ctia/ in the path - int ctiaIdx = ref.indexOf("/ctia/"); - if (ctiaIdx == -1) { - return null; - } - // Extract the path after /ctia/ - String path = ref.substring(ctiaIdx + 6); - // Get the entity type (first path segment) - int slashIdx = path.indexOf("/"); - if (slashIdx == -1) { - return null; - } - return path.substring(0, slashIdx); -} - -// Extract types from refs -String sourceType = extractType(ctx._source.source_ref); -String targetType = extractType(ctx._source.target_ref); - -// Only update if we successfully extracted types -if (sourceType != null) { - ctx._source.source_type = sourceType; -} -if (targetType != null) { - ctx._source.target_type = targetType; -} -' - -# Query for documents missing source_type OR target_type -QUERY='{ - "query": { - "bool": { - "must": [ - { "term": { "type": "relationship" } } - ], - "should": [ - { "bool": { "must_not": { "exists": { "field": "source_type" } } } }, - { "bool": { "must_not": { "exists": { "field": "target_type" } } } } - ], - "minimum_should_match": 1 - } - } -}' - echo "=== Relationship Type Migration ===" echo "ES Host: ${ES_HOST}" echo "Index: ${INDEX_NAME}" @@ -142,7 +90,20 @@ echo "" echo "Counting documents to migrate..." COUNT_RESPONSE=$(curl -s ${AUTH_OPTS} -X GET "http://${ES_HOST}/${INDEX_NAME}/_count" \ -H 'Content-Type: application/json' \ - -d "${QUERY}") + -d '{ + "query": { + "bool": { + "must": [ + { "term": { "type": "relationship" } } + ], + "should": [ + { "bool": { "must_not": { "exists": { "field": "source_type" } } } }, + { "bool": { "must_not": { "exists": { "field": "target_type" } } } } + ], + "minimum_should_match": 1 + } + } + }') DOC_COUNT=$(echo "${COUNT_RESPONSE}" | grep -o '"count":[0-9]*' | grep -o '[0-9]*' || echo "0") echo "Documents to update: ${DOC_COUNT}" @@ -161,9 +122,27 @@ if [[ "${DRY_RUN}" == "true" ]]; then fi echo "" echo "Sample document (first match):" - curl -s ${AUTH_OPTS} -X GET "http://${ES_HOST}/${INDEX_NAME}/_search?size=1" \ + SAMPLE=$(curl -s ${AUTH_OPTS} -X GET "http://${ES_HOST}/${INDEX_NAME}/_search?size=1" \ -H 'Content-Type: application/json' \ - -d "${QUERY}" | python3 -m json.tool 2>/dev/null || cat + -d '{ + "query": { + "bool": { + "must": [ + { "term": { "type": "relationship" } } + ], + "should": [ + { "bool": { "must_not": { "exists": { "field": "source_type" } } } }, + { "bool": { "must_not": { "exists": { "field": "target_type" } } } } + ], + "minimum_should_match": 1 + } + } + }') + if command -v jq &> /dev/null; then + echo "${SAMPLE}" | jq . + else + echo "${SAMPLE}" + fi exit 0 fi @@ -183,16 +162,39 @@ if [[ -n "${THROTTLE}" ]]; then QUERY_PARAMS="${QUERY_PARAMS}&requests_per_second=${THROTTLE}" fi +# Create temp file for request body to avoid shell escaping issues +TMPFILE=$(mktemp) +trap "rm -f ${TMPFILE}" EXIT + +# Write the request body with Painless script +# The script extracts entity type from CTIM reference URLs +# Format: http://host/ctia//- +# Example: http://example.com/ctia/malware/malware-123 -> "malware" +cat > "${TMPFILE}" << 'EOFBODY' +{ + "query": { + "bool": { + "must": [ + { "term": { "type": "relationship" } } + ], + "should": [ + { "bool": { "must_not": { "exists": { "field": "source_type" } } } }, + { "bool": { "must_not": { "exists": { "field": "target_type" } } } } + ], + "minimum_should_match": 1 + } + }, + "script": { + "source": "String sourceRef = ctx._source.source_ref; if (sourceRef != null) { int ctiaIdx = sourceRef.indexOf('/ctia/'); if (ctiaIdx != -1) { String path = sourceRef.substring(ctiaIdx + 6); int slashIdx = path.indexOf('/'); if (slashIdx != -1) { ctx._source.source_type = path.substring(0, slashIdx); } } } String targetRef = ctx._source.target_ref; if (targetRef != null) { int ctiaIdx2 = targetRef.indexOf('/ctia/'); if (ctiaIdx2 != -1) { String path2 = targetRef.substring(ctiaIdx2 + 6); int slashIdx2 = path2.indexOf('/'); if (slashIdx2 != -1) { ctx._source.target_type = path2.substring(0, slashIdx2); } } }", + "lang": "painless" + } +} +EOFBODY + # Execute _update_by_query RESPONSE=$(curl -s ${AUTH_OPTS} -X POST "http://${ES_HOST}/${INDEX_NAME}/_update_by_query?${QUERY_PARAMS}" \ -H 'Content-Type: application/json' \ - -d "{ - \"query\": ${QUERY#*\"query\": }, - \"script\": { - \"source\": $(echo "${PAINLESS_SCRIPT}" | python3 -c 'import json,sys; print(json.dumps(sys.stdin.read()))'), - \"lang\": \"painless\" - } - }") + -d @"${TMPFILE}") if [[ "${ASYNC}" == "true" ]]; then # Extract task ID from async response @@ -211,24 +213,33 @@ if [[ "${ASYNC}" == "true" ]]; then echo "The migration is running in the background." else echo "ERROR: Failed to start async task" - echo "${RESPONSE}" | python3 -m json.tool 2>/dev/null || echo "${RESPONSE}" + if command -v jq &> /dev/null; then + echo "${RESPONSE}" | jq . + else + echo "${RESPONSE}" + fi exit 1 fi else echo "Response:" - echo "${RESPONSE}" | python3 -m json.tool 2>/dev/null || echo "${RESPONSE}" + if command -v jq &> /dev/null; then + echo "${RESPONSE}" | jq . + else + echo "${RESPONSE}" + fi echo "" # Extract stats from response UPDATED=$(echo "${RESPONSE}" | grep -o '"updated":[0-9]*' | grep -o '[0-9]*' || echo "0") - FAILURES=$(echo "${RESPONSE}" | grep -o '"failures":\[[^]]*\]' || echo "[]") + # Check for non-empty failures array (failures with actual content) + HAS_FAILURES=$(echo "${RESPONSE}" | grep -o '"failures":\[[^]]*\]' | grep -v '"failures":\[\]' || echo "") echo "=== Migration Complete ===" echo "Documents updated: ${UPDATED}" - if [[ "${FAILURES}" != "[]" && "${FAILURES}" != "" ]]; then + if [[ -n "${HAS_FAILURES}" ]]; then echo "WARNING: Some failures occurred:" - echo "${FAILURES}" + echo "${HAS_FAILURES}" exit 1 fi From 754a1af49d69ee36d7ae974f5b8442e08fe9c2b9 Mon Sep 17 00:00:00 2001 From: Guillaume ERETEO Date: Fri, 13 Feb 2026 15:20:09 +0100 Subject: [PATCH 6/7] Add migration verification script Adds check_relationship_migration.sh that: - Shows migration statistics (total, migrated, remaining) - Displays progress percentage - Shows breakdown by source_type and target_type - Validates sample documents (ensures extracted types match refs) - Returns exit codes: 0=success, 1=validation error, 2=incomplete Usage: ./scripts/check_relationship_migration.sh Co-Authored-By: Claude Opus 4.5 --- scripts/check_relationship_migration.sh | 182 ++++++++++++++++++++++++ 1 file changed, 182 insertions(+) create mode 100755 scripts/check_relationship_migration.sh diff --git a/scripts/check_relationship_migration.sh b/scripts/check_relationship_migration.sh new file mode 100755 index 0000000000..c6e3c78249 --- /dev/null +++ b/scripts/check_relationship_migration.sh @@ -0,0 +1,182 @@ +#!/bin/bash +# +# Script to check the status of relationship type migration. +# Verifies that source_type and target_type fields are properly populated. +# +# Usage: +# ./check_relationship_migration.sh +# +# Examples: +# ./check_relationship_migration.sh localhost:9200 ctia_relationship +# ES_USER=elastic ES_PASS=changeme ./check_relationship_migration.sh prod-es:9200 ctia_relationship +# + +set -euo pipefail + +ES_HOST="${1:-localhost:9200}" +INDEX_NAME="${2:-ctia_relationship}" + +# Build auth header if credentials provided +AUTH_OPTS="" +if [[ -n "${ES_USER:-}" && -n "${ES_PASS:-}" ]]; then + AUTH_OPTS="-u ${ES_USER}:${ES_PASS}" +fi + +echo "=== Relationship Type Migration Check ===" +echo "ES Host: ${ES_HOST}" +echo "Index: ${INDEX_NAME}" +echo "" + +# Total relationship documents +TOTAL=$(curl -s ${AUTH_OPTS} "http://${ES_HOST}/${INDEX_NAME}/_count" \ + -H 'Content-Type: application/json' \ + -d '{"query":{"term":{"type":"relationship"}}}' | grep -o '"count":[0-9]*' | grep -o '[0-9]*' || echo "0") + +# Documents with source_type +WITH_SOURCE_TYPE=$(curl -s ${AUTH_OPTS} "http://${ES_HOST}/${INDEX_NAME}/_count" \ + -H 'Content-Type: application/json' \ + -d '{"query":{"bool":{"must":[{"term":{"type":"relationship"}},{"exists":{"field":"source_type"}}]}}}' | grep -o '"count":[0-9]*' | grep -o '[0-9]*' || echo "0") + +# Documents with target_type +WITH_TARGET_TYPE=$(curl -s ${AUTH_OPTS} "http://${ES_HOST}/${INDEX_NAME}/_count" \ + -H 'Content-Type: application/json' \ + -d '{"query":{"bool":{"must":[{"term":{"type":"relationship"}},{"exists":{"field":"target_type"}}]}}}' | grep -o '"count":[0-9]*' | grep -o '[0-9]*' || echo "0") + +# Documents with BOTH fields +WITH_BOTH=$(curl -s ${AUTH_OPTS} "http://${ES_HOST}/${INDEX_NAME}/_count" \ + -H 'Content-Type: application/json' \ + -d '{"query":{"bool":{"must":[{"term":{"type":"relationship"}},{"exists":{"field":"source_type"}},{"exists":{"field":"target_type"}}]}}}' | grep -o '"count":[0-9]*' | grep -o '[0-9]*' || echo "0") + +# Documents missing EITHER field (need migration) +NEEDS_MIGRATION=$(curl -s ${AUTH_OPTS} "http://${ES_HOST}/${INDEX_NAME}/_count" \ + -H 'Content-Type: application/json' \ + -d '{ + "query": { + "bool": { + "must": [{"term": {"type": "relationship"}}], + "should": [ + {"bool": {"must_not": {"exists": {"field": "source_type"}}}}, + {"bool": {"must_not": {"exists": {"field": "target_type"}}}} + ], + "minimum_should_match": 1 + } + } + }' | grep -o '"count":[0-9]*' | grep -o '[0-9]*' || echo "0") + +echo "=== Migration Statistics ===" +echo "" +echo "Total relationships: ${TOTAL}" +echo "With source_type: ${WITH_SOURCE_TYPE}" +echo "With target_type: ${WITH_TARGET_TYPE}" +echo "With both fields: ${WITH_BOTH}" +echo "Needs migration: ${NEEDS_MIGRATION}" +echo "" + +if [[ "${TOTAL}" -gt 0 ]]; then + PERCENT_COMPLETE=$((WITH_BOTH * 100 / TOTAL)) + echo "Migration progress: ${PERCENT_COMPLETE}%" + echo "" +fi + +if [[ "${NEEDS_MIGRATION}" == "0" ]]; then + echo "✅ Migration complete! All documents have source_type and target_type." +else + echo "⚠️ Migration incomplete. ${NEEDS_MIGRATION} documents still need migration." +fi +echo "" + +# Show breakdown by source_type +echo "=== Breakdown by source_type ===" +curl -s ${AUTH_OPTS} "http://${ES_HOST}/${INDEX_NAME}/_search?size=0" \ + -H 'Content-Type: application/json' \ + -d '{ + "query": {"term": {"type": "relationship"}}, + "aggs": { + "source_types": { + "terms": {"field": "source_type", "size": 50, "missing": "(not set)"} + } + } + }' | jq -r '.aggregations.source_types.buckets[] | "\(.key): \(.doc_count)"' 2>/dev/null || echo "(jq not available for aggregation display)" + +echo "" + +# Show breakdown by target_type +echo "=== Breakdown by target_type ===" +curl -s ${AUTH_OPTS} "http://${ES_HOST}/${INDEX_NAME}/_search?size=0" \ + -H 'Content-Type: application/json' \ + -d '{ + "query": {"term": {"type": "relationship"}}, + "aggs": { + "target_types": { + "terms": {"field": "target_type", "size": 50, "missing": "(not set)"} + } + } + }' | jq -r '.aggregations.target_types.buckets[] | "\(.key): \(.doc_count)"' 2>/dev/null || echo "(jq not available for aggregation display)" + +echo "" + +# Validate a sample of migrated documents +echo "=== Sample Validation (5 random migrated docs) ===" +SAMPLE=$(curl -s ${AUTH_OPTS} "http://${ES_HOST}/${INDEX_NAME}/_search?size=5" \ + -H 'Content-Type: application/json' \ + -d '{ + "query": { + "bool": { + "must": [ + {"term": {"type": "relationship"}}, + {"exists": {"field": "source_type"}}, + {"exists": {"field": "target_type"}} + ] + } + }, + "_source": ["source_ref", "target_ref", "source_type", "target_type"], + "sort": [{"_script": {"type": "number", "script": "Math.random()", "order": "asc"}}] + }') + +# Validate each sample +VALIDATION_ERRORS=0 +if command -v jq &> /dev/null; then + echo "${SAMPLE}" | jq -r '.hits.hits[]._source | "source_ref: \(.source_ref)\nsource_type: \(.source_type)\ntarget_ref: \(.target_ref)\ntarget_type: \(.target_type)\n---"' 2>/dev/null + + # Check if source_type matches what's in source_ref + while IFS= read -r line; do + source_ref=$(echo "$line" | jq -r '.source_ref // empty') + source_type=$(echo "$line" | jq -r '.source_type // empty') + target_ref=$(echo "$line" | jq -r '.target_ref // empty') + target_type=$(echo "$line" | jq -r '.target_type // empty') + + if [[ -n "$source_ref" && -n "$source_type" ]]; then + # Extract expected type from source_ref + expected_source=$(echo "$source_ref" | sed -n 's|.*/ctia/\([^/]*\)/.*|\1|p') + if [[ "$expected_source" != "$source_type" ]]; then + echo "❌ Mismatch: source_type='$source_type' but source_ref suggests '$expected_source'" + VALIDATION_ERRORS=$((VALIDATION_ERRORS + 1)) + fi + fi + + if [[ -n "$target_ref" && -n "$target_type" ]]; then + expected_target=$(echo "$target_ref" | sed -n 's|.*/ctia/\([^/]*\)/.*|\1|p') + if [[ "$expected_target" != "$target_type" ]]; then + echo "❌ Mismatch: target_type='$target_type' but target_ref suggests '$expected_target'" + VALIDATION_ERRORS=$((VALIDATION_ERRORS + 1)) + fi + fi + done < <(echo "${SAMPLE}" | jq -c '.hits.hits[]._source' 2>/dev/null) +else + echo "(jq not available for detailed validation)" +fi + +echo "" +echo "=== Summary ===" +if [[ "${NEEDS_MIGRATION}" == "0" && "${VALIDATION_ERRORS}" == "0" ]]; then + echo "✅ Migration verified successfully!" + echo " - All ${TOTAL} documents have source_type and target_type" + echo " - Sample validation passed" + exit 0 +elif [[ "${NEEDS_MIGRATION}" == "0" ]]; then + echo "⚠️ Migration complete but validation found ${VALIDATION_ERRORS} mismatches" + exit 1 +else + echo "⏳ Migration in progress: ${WITH_BOTH}/${TOTAL} complete (${NEEDS_MIGRATION} remaining)" + exit 2 +fi From 1588797f7a3d0a655191afbea4b1c9dfe2bf5b98 Mon Sep 17 00:00:00 2001 From: Guillaume ERETEO Date: Fri, 13 Feb 2026 16:35:01 +0100 Subject: [PATCH 7/7] fix: handle nil values from long-id->id gracefully - Use cond-> to only assoc source_type/target_type when non-nil - Update test data to use valid CTIA ID format (type-uuid pattern) - Add test case for graceful handling of unparseable refs Co-Authored-By: Claude Opus 4.5 --- src/ctia/entity/relationship/es_store.clj | 8 ++-- .../entity/relationship/es_store_test.clj | 39 ++++++++++++------- 2 files changed, 29 insertions(+), 18 deletions(-) diff --git a/src/ctia/entity/relationship/es_store.clj b/src/ctia/entity/relationship/es_store.clj index 6e587ce38f..ca580e6be9 100644 --- a/src/ctia/entity/relationship/es_store.clj +++ b/src/ctia/entity/relationship/es_store.clj @@ -41,9 +41,11 @@ :- ESStoredRelationship "adds source and target types to a relationship" [{:keys [source_ref target_ref] :as r} :- rs/StoredRelationship] - (assoc r - :source_type (:type (long-id->id source_ref)) - :target_type (:type (long-id->id target_ref)))) + (let [source-type (:type (long-id->id source_ref)) + target-type (:type (long-id->id target_ref))] + (cond-> r + source-type (assoc :source_type source-type) + target-type (assoc :target_type target-type)))) (s/defn es-stored-relationship->stored-relationship :- ESStoredRelationship diff --git a/test/ctia/entity/relationship/es_store_test.clj b/test/ctia/entity/relationship/es_store_test.clj index 5c4bfbcff1..4a425f73ed 100644 --- a/test/ctia/entity/relationship/es_store_test.clj +++ b/test/ctia/entity/relationship/es_store_test.clj @@ -7,12 +7,12 @@ (use-fixtures :once validate-schemas) (def base-relationship - {:id "http://example.com/ctia/relationship/relationship-123" + {:id "http://localhost:3000/ctia/relationship/relationship-00000000-0000-0000-0000-000000000001" :type "relationship" :schema_version "1.1.0" :relationship_type "related-to" - :source_ref "http://example.com/ctia/malware/malware-456" - :target_ref "http://example.com/ctia/sighting/sighting-789" + :source_ref "http://localhost:3000/ctia/malware/malware-00000000-0000-0000-0000-000000000002" + :target_ref "http://localhost:3000/ctia/sighting/sighting-00000000-0000-0000-0000-000000000003" :tlp "amber" :owner "test-user" :groups ["test-group"] @@ -38,20 +38,29 @@ (= target-type (:target_type result)))) "indicator" "incident" - "http://example.com/ctia/indicator/indicator-123" - "http://example.com/ctia/incident/incident-456" + "http://localhost:3000/ctia/indicator/indicator-00000000-0000-0000-0000-000000000010" + "http://localhost:3000/ctia/incident/incident-00000000-0000-0000-0000-000000000011" "judgement" "verdict" - "http://example.com/ctia/judgement/judgement-123" - "http://example.com/ctia/verdict/verdict-456" + "http://localhost:3000/ctia/judgement/judgement-00000000-0000-0000-0000-000000000012" + "http://localhost:3000/ctia/verdict/verdict-00000000-0000-0000-0000-000000000013" "attack-pattern" "vulnerability" - "http://example.com/ctia/attack-pattern/attack-pattern-123" - "http://example.com/ctia/vulnerability/vulnerability-456" + "http://localhost:3000/ctia/attack-pattern/attack-pattern-00000000-0000-0000-0000-000000000014" + "http://localhost:3000/ctia/vulnerability/vulnerability-00000000-0000-0000-0000-000000000015" "casebook" "investigation" - "http://example.com/ctia/casebook/casebook-123" - "http://example.com/ctia/investigation/investigation-456"))) + "http://localhost:3000/ctia/casebook/casebook-00000000-0000-0000-0000-000000000016" + "http://localhost:3000/ctia/investigation/investigation-00000000-0000-0000-0000-000000000017")) + + (testing "handles unparseable refs gracefully (no nil values added)" + (let [rel (assoc base-relationship + :source_ref "not-a-valid-ctia-url" + :target_ref "also-invalid") + result (sut/stored-relationship->es-stored-relationship rel)] + ;; Should not contain :source_type or :target_type keys at all + (is (not (contains? result :source_type))) + (is (not (contains? result :target_type)))))) (deftest es-stored-relationship->stored-relationship-test (testing "removes source_type and target_type" @@ -68,8 +77,8 @@ (deftest es-partial-stored-relationship->partial-stored-relationship-test (testing "removes source_type and target_type from partial" - (let [partial-rel {:id "http://example.com/ctia/relationship/relationship-123" - :source_ref "http://example.com/ctia/malware/malware-456" + (let [partial-rel {:id "http://localhost:3000/ctia/relationship/relationship-00000000-0000-0000-0000-000000000001" + :source_ref "http://localhost:3000/ctia/malware/malware-00000000-0000-0000-0000-000000000002" :source_type "malware" :target_type "sighting"} result (sut/es-partial-stored-relationship->partial-stored-relationship partial-rel)] @@ -78,8 +87,8 @@ (is (= (:source_ref partial-rel) (:source_ref result))))) (testing "handles partial without type fields" - (let [partial-rel {:id "http://example.com/ctia/relationship/relationship-123" - :source_ref "http://example.com/ctia/malware/malware-456"} + (let [partial-rel {:id "http://localhost:3000/ctia/relationship/relationship-00000000-0000-0000-0000-000000000001" + :source_ref "http://localhost:3000/ctia/malware/malware-00000000-0000-0000-0000-000000000002"} result (sut/es-partial-stored-relationship->partial-stored-relationship partial-rel)] (is (nil? (:source_type result))) (is (nil? (:target_type result))))))