From 19feab3405c788354079ef13fa89b6548f928522 Mon Sep 17 00:00:00 2001 From: Joshua Blanch Date: Thu, 7 Aug 2025 05:07:54 +0000 Subject: [PATCH 01/17] qa/clyso/upgrade: upgrade testing to ces image - wip upgrade testing between different versions Signed-off-by: Joshua Blanch --- qa/suites/orch/cephadm/clyso/upgrade/+ | 0 .../cephadm/clyso/upgrade/1-start-ces.yaml | 45 ++++++ .../clyso/upgrade/2-create-baseline.yaml | 43 +++++ .../clyso/upgrade/3-downgrade-upstream.yaml | 149 ++++++++++++++++++ qa/suites/orch/cephadm/clyso/upgradeMatrix/% | 0 .../cross-distro-pairs/ces-to-upstream.yaml | 9 ++ .../cross-distro-pairs/upstream-to-ces.yaml | 9 ++ .../cephadm/clyso/upgradeMatrix/workflow/+ | 0 .../upgradeMatrix/workflow/1-bootstrap.yaml | 44 ++++++ .../upgradeMatrix/workflow/3-upgrade.yaml | 149 ++++++++++++++++++ .../upgradeMatrix/workflow/5-downgrade.yaml | 149 ++++++++++++++++++ 11 files changed, 597 insertions(+) create mode 100644 qa/suites/orch/cephadm/clyso/upgrade/+ create mode 100644 qa/suites/orch/cephadm/clyso/upgrade/1-start-ces.yaml create mode 100644 qa/suites/orch/cephadm/clyso/upgrade/2-create-baseline.yaml create mode 100644 qa/suites/orch/cephadm/clyso/upgrade/3-downgrade-upstream.yaml create mode 100644 qa/suites/orch/cephadm/clyso/upgradeMatrix/% create mode 100644 qa/suites/orch/cephadm/clyso/upgradeMatrix/cross-distro-pairs/ces-to-upstream.yaml create mode 100644 qa/suites/orch/cephadm/clyso/upgradeMatrix/cross-distro-pairs/upstream-to-ces.yaml create mode 100644 qa/suites/orch/cephadm/clyso/upgradeMatrix/workflow/+ create mode 100644 qa/suites/orch/cephadm/clyso/upgradeMatrix/workflow/1-bootstrap.yaml create mode 100644 qa/suites/orch/cephadm/clyso/upgradeMatrix/workflow/3-upgrade.yaml create mode 100644 qa/suites/orch/cephadm/clyso/upgradeMatrix/workflow/5-downgrade.yaml diff --git a/qa/suites/orch/cephadm/clyso/upgrade/+ b/qa/suites/orch/cephadm/clyso/upgrade/+ new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/qa/suites/orch/cephadm/clyso/upgrade/1-start-ces.yaml b/qa/suites/orch/cephadm/clyso/upgrade/1-start-ces.yaml new file mode 100644 index 0000000000000..15e2ece1e7071 --- /dev/null +++ b/qa/suites/orch/cephadm/clyso/upgrade/1-start-ces.yaml @@ -0,0 +1,45 @@ +roles: +- - host.a + - mon.a + - mgr.a + - osd.0 +- - host.b + - osd.1 + - mgr.b + - client.0 +- - host.b + - osd.2 + - mgr.c + +tasks: +# Install system packages via pexec (avoids CEPH RPM installation) +- pexec: + all: + - sudo dnf install s3cmd curl jq -y + +- cephadm: + +- cephadm.shell: + host.a: + - ceph status + - ceph orch ps + - ceph version + - echo "Starting with CES version (from override YAML)" + +openstack: +- volumes: + count: 4 + size: 20 + +overrides: + ceph: + conf: + osd: + osd shutdown pgref assert: true + log-only-match: + - CEPHADM_ + log-ignorelist: + - CEPHADM_DAEMON_PLACE_FAIL + - CEPHADM_FAILED_DAEMON + - CEPHADM_STRAY_DAEMON + - CEPHADM_AGENT_DOWN diff --git a/qa/suites/orch/cephadm/clyso/upgrade/2-create-baseline.yaml b/qa/suites/orch/cephadm/clyso/upgrade/2-create-baseline.yaml new file mode 100644 index 0000000000000..fbf225d91b312 --- /dev/null +++ b/qa/suites/orch/cephadm/clyso/upgrade/2-create-baseline.yaml @@ -0,0 +1,43 @@ +tasks: +- cephadm.apply: + specs: + - service_type: rgw + service_id: foo + placement: + host_pattern: "*" + spec: + rgw_frontend_port: 8080 + +# it will be named rgw.foo for some reason +- cephadm.wait_for_service: + service: rgw.foo + +- cephadm.shell: + host.a: + - ceph status + +# - cephadm.shell: +# host.a: +# - ceph_test_rgw_obj + +# - workunit: +# clients: +# client.0: +# - rgw/test_rgw_obj.sh + + # client.0: + # - | + # cat > /tmp/s3cfg << 'EOF' + # [default] + # access_key = ceskey + # secret_key = cessecret + # host_base = host.b:8080 + # host_bucket = host.b:8080 + # use_https = False + # signature_v2 = True + # EOF + # - s3cmd -c /tmp/s3cfg mb s3://ces-baseline-bucket + # - echo "CES S3 baseline data - must survive downgrade!" > /tmp/ces-s3-baseline.txt + # - s3cmd -c /tmp/s3cfg put /tmp/ces-s3-baseline.txt s3://ces-baseline-bucket/ces-baseline-s3-object.txt + # - s3cmd -c /tmp/s3cfg ls s3://ces-baseline-bucket/ + # - echo "CES S3 baseline data created successfully" diff --git a/qa/suites/orch/cephadm/clyso/upgrade/3-downgrade-upstream.yaml b/qa/suites/orch/cephadm/clyso/upgrade/3-downgrade-upstream.yaml new file mode 100644 index 0000000000000..5b794a3f6c18c --- /dev/null +++ b/qa/suites/orch/cephadm/clyso/upgrade/3-downgrade-upstream.yaml @@ -0,0 +1,149 @@ +tasks: +- cephadm.shell: + host.a: + - echo "PRE-DOWNGRADE CES VERSION:" + - ceph version + - ceph orch ps + + - echo "Starting downgrade from CES to upstream CEPH v18.2.7..." + - ceph orch upgrade start --image quay.io/ceph/ceph:v18.2.7 + - sleep 30 + + - | + cat > /tmp/upgrade_monitor.sh << 'EOF' + #!/bin/bash + + # Upgrade monitoring script for cephadm upgrade tests + # Monitors upgrade/downgrade completion by checking both upgrade status and daemon versions + + set -e + + TARGET_VERSION="$1" + OPERATION="$2" # "upgrade" or "downgrade" + BASE_IMAGE_NAME="${3:-base}" + TARGET_IMAGE_NAME="${4:-target}" + TIMEOUT="${5:-2400}" + + if [ -z "$TARGET_VERSION" ] || [ -z "$OPERATION" ]; then + echo "Usage: $0 [base_image_name] [target_image_name] [timeout_seconds]" + exit 1 + fi + + echo "=== CEPH Upgrade Monitor Started ===" + echo "Base image: $BASE_IMAGE_NAME" + echo "Target image: $TARGET_IMAGE_NAME" + echo "Operation: $OPERATION to $TARGET_VERSION" + echo "Timeout: ${TIMEOUT}s" + echo "Start time: $(date)" + + echo "=== Capturing Baseline Version ===" + ceph versions + baseline_version=$(ceph versions --format json | jq -r ".overall | keys[0]") + echo "Baseline version: $baseline_version" + + echo "=== Starting Upgrade Monitoring ===" + start_time=$(date +%s) + + while true; do + current_time=$(date +%s) + elapsed=$((current_time - start_time)) + + echo "" + echo "=== Upgrade Status (Elapsed: ${elapsed}s) ===" + echo "Time: $(date)" + + echo "--- Orchestrator Upgrade Status ---" + upgrade_status=$(ceph orch upgrade status --format json) + echo "$upgrade_status" + + echo "--- Daemon Versions ---" + ceph versions + + in_progress=$(echo "$upgrade_status" | jq -r ".in_progress") + version_count=$(ceph versions --format json | jq ".overall | length") + + echo "Upgrade in progress: $in_progress" + echo "Number of different versions running: $version_count" + + if [ "$in_progress" = "false" ] && [ "$version_count" -eq 1 ]; then + current_version=$(ceph versions --format json | jq -r ".overall | keys[0]") + echo "All daemons now on: $current_version" + + if [ "$current_version" != "$baseline_version" ]; then + echo "" + echo "=== SUCCESS: Upgrade Completed ===" + echo "From: $baseline_version" + echo "To: $current_version" + echo "Base image: $BASE_IMAGE_NAME" + echo "Target image: $TARGET_IMAGE_NAME" + echo "Total time: ${elapsed}s" + echo "End time: $(date)" + break + else + echo "" + echo "=== SUCCESS: Already on Target Version ===" + echo "Current version: $current_version" + echo "Base image: $BASE_IMAGE_NAME" + echo "Target image: $TARGET_IMAGE_NAME" + echo "Total time: ${elapsed}s" + echo "End time: $(date)" + break + fi + else + echo "Upgrade still in progress or daemons on mixed versions" + if [ "$version_count" -gt 1 ]; then + echo "--- Version Breakdown ---" + ceph versions --format json | jq ".overall" + fi + fi + + if echo "$upgrade_status" | jq -r ".message" | grep -q -i "error\|fail"; then + echo "" + echo "=== ERROR: Upgrade Failed ===" + echo "Upgrade status shows error or failure" + echo "$upgrade_status" + exit 1 + fi + + if [ $elapsed -ge $TIMEOUT ]; then + echo "" + echo "=== ERROR: Upgrade Timeout ===" + echo "Upgrade did not complete within $TIMEOUT seconds" + echo "Current status:" + echo "$upgrade_status" + ceph versions + exit 1 + fi + + echo "Waiting 60 seconds before next check..." + sleep 60 + done + + echo "" + echo "=== Final Verification ===" + ceph health detail + ceph orch ps + ceph status + + echo "" + echo "=== Upgrade Monitor Completed Successfully ===" + EOF + + chmod +x /tmp/upgrade_monitor.sh + /tmp/upgrade_monitor.sh "v18.2.7" "downgrade" "CES" "Upstream v18.2.7" "1800" + + - echo "POST-DOWNGRADE UPSTREAM VERSION:" + - ceph version + - ceph orch ps + - ceph -s + + +overrides: + ceph: + log-ignorelist: + - CEPHADM_STRAY_DAEMON + - CEPHADM_FAILED_DAEMON + - CEPHADM_AGENT_DOWN + - CEPHADM_DAEMON_PLACE_FAIL + log-only-match: + - CEPHADM_ diff --git a/qa/suites/orch/cephadm/clyso/upgradeMatrix/% b/qa/suites/orch/cephadm/clyso/upgradeMatrix/% new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/qa/suites/orch/cephadm/clyso/upgradeMatrix/cross-distro-pairs/ces-to-upstream.yaml b/qa/suites/orch/cephadm/clyso/upgradeMatrix/cross-distro-pairs/ces-to-upstream.yaml new file mode 100644 index 0000000000000..ea0b2b526a525 --- /dev/null +++ b/qa/suites/orch/cephadm/clyso/upgradeMatrix/cross-distro-pairs/ces-to-upstream.yaml @@ -0,0 +1,9 @@ +# Cross-distro test: Start with CES, upgrade to Upstream, downgrade back to CES +base_image: "harbor.clyso.com/ces/ceph/ceph:ces-v25.03.2-rc.4" +target_image: "quay.io/ceph/ceph:v18.2.7" +base_image_name: "CES-v25.03.2-rc.4" +target_image_name: "Upstream-18.2.7" + +overrides: + ceph: + image: "harbor.clyso.com/ces/ceph/ceph:ces-v25.03.2-rc.4" diff --git a/qa/suites/orch/cephadm/clyso/upgradeMatrix/cross-distro-pairs/upstream-to-ces.yaml b/qa/suites/orch/cephadm/clyso/upgradeMatrix/cross-distro-pairs/upstream-to-ces.yaml new file mode 100644 index 0000000000000..fa221779078ca --- /dev/null +++ b/qa/suites/orch/cephadm/clyso/upgradeMatrix/cross-distro-pairs/upstream-to-ces.yaml @@ -0,0 +1,9 @@ +# Cross-distro test: Start with Upstream, upgrade to CES, downgrade back to Upstream +base_image: "quay.io/ceph/ceph:v18.2.7" +target_image: "harbor.clyso.com/ces/ceph/ceph:ces-v25.03.2-rc.4" +base_image_name: "Upstream-18.2.7" +target_image_name: "CES-v25.03.2-rc.4" + +overrides: + ceph: + image: "quay.io/ceph/ceph:v18.2.7" diff --git a/qa/suites/orch/cephadm/clyso/upgradeMatrix/workflow/+ b/qa/suites/orch/cephadm/clyso/upgradeMatrix/workflow/+ new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/qa/suites/orch/cephadm/clyso/upgradeMatrix/workflow/1-bootstrap.yaml b/qa/suites/orch/cephadm/clyso/upgradeMatrix/workflow/1-bootstrap.yaml new file mode 100644 index 0000000000000..baaf340d552d1 --- /dev/null +++ b/qa/suites/orch/cephadm/clyso/upgradeMatrix/workflow/1-bootstrap.yaml @@ -0,0 +1,44 @@ +roles: +- - host.a + - mon.a + - mgr.a + - osd.0 +- - host.b + - mgr.b + - osd.1 + - client.0 + +tasks: +# Install system packages via pexec (avoids CEPH RPM installation) +- pexec: + all: + - sudo dnf install s3cmd curl jq -y + +- cephadm: + +- cephadm.shell: + host.a: + - echo "=== BOOTSTRAP COMPLETE ===" + - ceph orch status + - ceph orch ps + - ceph version + - ceph -s + - ceph orch device ls + +openstack: +- volumes: + count: 4 + size: 20 + +overrides: + ceph: + conf: + osd: + osd shutdown pgref assert: true + log-only-match: + - CEPHADM_ + log-ignorelist: + - CEPHADM_DAEMON_PLACE_FAIL + - CEPHADM_FAILED_DAEMON + - CEPHADM_STRAY_DAEMON + - CEPHADM_AGENT_DOWN diff --git a/qa/suites/orch/cephadm/clyso/upgradeMatrix/workflow/3-upgrade.yaml b/qa/suites/orch/cephadm/clyso/upgradeMatrix/workflow/3-upgrade.yaml new file mode 100644 index 0000000000000..f94b75375051d --- /dev/null +++ b/qa/suites/orch/cephadm/clyso/upgradeMatrix/workflow/3-upgrade.yaml @@ -0,0 +1,149 @@ +tasks: +- cephadm.shell: + host.a: + - echo "PRE-UPGRADE STATE:" + - ceph version + - ceph orch ps + - ceph -s + + - echo "Starting upgrade from CES-v25.03.2-rc.4 to reef-18.2.7..." + - ceph orch upgrade start --image "quay.io/ceph/ceph:v18.2.7" + - sleep 30 + + - | + cat > /tmp/upgrade_monitor.sh << 'EOF' + #!/bin/bash + + # Upgrade monitoring script for cephadm upgrade tests + # Monitors upgrade/downgrade completion by checking both upgrade status and daemon versions + + set -e + + TARGET_VERSION="$1" + OPERATION="$2" # "upgrade" or "downgrade" + BASE_IMAGE_NAME="${3:-base}" + TARGET_IMAGE_NAME="${4:-target}" + TIMEOUT="${5:-2400}" + + if [ -z "$TARGET_VERSION" ] || [ -z "$OPERATION" ]; then + echo "Usage: $0 [base_image_name] [target_image_name] [timeout_seconds]" + exit 1 + fi + + echo "=== CEPH Upgrade Monitor Started ===" + echo "Base image: $BASE_IMAGE_NAME" + echo "Target image: $TARGET_IMAGE_NAME" + echo "Operation: $OPERATION to $TARGET_VERSION" + echo "Timeout: ${TIMEOUT}s" + echo "Start time: $(date)" + + echo "=== Capturing Baseline Version ===" + ceph versions + baseline_version=$(ceph versions --format json | jq -r ".overall | keys[0]") + echo "Baseline version: $baseline_version" + + echo "=== Starting Upgrade Monitoring ===" + start_time=$(date +%s) + + while true; do + current_time=$(date +%s) + elapsed=$((current_time - start_time)) + + echo "" + echo "=== Upgrade Status (Elapsed: ${elapsed}s) ===" + echo "Time: $(date)" + + echo "--- Orchestrator Upgrade Status ---" + upgrade_status=$(ceph orch upgrade status --format json) + echo "$upgrade_status" + + echo "--- Daemon Versions ---" + ceph versions + + in_progress=$(echo "$upgrade_status" | jq -r ".in_progress") + version_count=$(ceph versions --format json | jq ".overall | length") + + echo "Upgrade in progress: $in_progress" + echo "Number of different versions running: $version_count" + + if [ "$in_progress" = "false" ] && [ "$version_count" -eq 1 ]; then + current_version=$(ceph versions --format json | jq -r ".overall | keys[0]") + echo "All daemons now on: $current_version" + + if [ "$current_version" != "$baseline_version" ]; then + echo "" + echo "=== SUCCESS: Upgrade Completed ===" + echo "From: $baseline_version" + echo "To: $current_version" + echo "Base image: $BASE_IMAGE_NAME" + echo "Target image: $TARGET_IMAGE_NAME" + echo "Total time: ${elapsed}s" + echo "End time: $(date)" + break + else + echo "" + echo "=== SUCCESS: Already on Target Version ===" + echo "Current version: $current_version" + echo "Base image: $BASE_IMAGE_NAME" + echo "Target image: $TARGET_IMAGE_NAME" + echo "Total time: ${elapsed}s" + echo "End time: $(date)" + break + fi + else + echo "Upgrade still in progress or daemons on mixed versions" + if [ "$version_count" -gt 1 ]; then + echo "--- Version Breakdown ---" + ceph versions --format json | jq ".overall" + fi + fi + + if echo "$upgrade_status" | jq -r ".message" | grep -q -i "error\|fail"; then + echo "" + echo "=== ERROR: Upgrade Failed ===" + echo "Upgrade status shows error or failure" + echo "$upgrade_status" + exit 1 + fi + + if [ $elapsed -ge $TIMEOUT ]; then + echo "" + echo "=== ERROR: Upgrade Timeout ===" + echo "Upgrade did not complete within $TIMEOUT seconds" + echo "Current status:" + echo "$upgrade_status" + ceph versions + exit 1 + fi + + echo "Waiting 60 seconds before next check..." + sleep 60 + done + + echo "" + echo "=== Final Verification ===" + ceph health detail + ceph orch ps + ceph status + + echo "" + echo "=== Upgrade Monitor Completed Successfully ===" + EOF + + chmod +x /tmp/upgrade_monitor.sh + /tmp/upgrade_monitor.sh "18.2.7" "upgrade" "CES-v25.03.2-rc.4" "reef-18.2.7" "2400" + + - echo "POST-UPGRADE STATE:" + - ceph version + - ceph orch ps + - ceph -s + +overrides: + ceph: + log-ignorelist: + - CEPHADM_STRAY_DAEMON + - CEPHADM_FAILED_DAEMON + - CEPHADM_AGENT_DOWN + - CEPHADM_DAEMON_PLACE_FAIL + log-only-match: + - CEPHADM_ diff --git a/qa/suites/orch/cephadm/clyso/upgradeMatrix/workflow/5-downgrade.yaml b/qa/suites/orch/cephadm/clyso/upgradeMatrix/workflow/5-downgrade.yaml new file mode 100644 index 0000000000000..25dd72bddca3b --- /dev/null +++ b/qa/suites/orch/cephadm/clyso/upgradeMatrix/workflow/5-downgrade.yaml @@ -0,0 +1,149 @@ +tasks: +- cephadm.shell: + host.a: + - echo "PRE-DOWNGRADE STATE:" + - ceph version + - ceph orch ps + - ceph -s + + - echo "Starting downgrade from reef-18.2.7 back to CES-v25.03.2-rc.4..." + - ceph orch upgrade start --image "harbor.clyso.com/ces/ceph/ceph:ces-v25.03.2-rc.4" + - sleep 30 + + - | + cat > /tmp/upgrade_monitor.sh << 'EOF' + #!/bin/bash + + # Upgrade monitoring script for cephadm upgrade tests + # Monitors upgrade/downgrade completion by checking both upgrade status and daemon versions + + set -e + + TARGET_VERSION="$1" + OPERATION="$2" # "upgrade" or "downgrade" + BASE_IMAGE_NAME="${3:-base}" + TARGET_IMAGE_NAME="${4:-target}" + TIMEOUT="${5:-2400}" + + if [ -z "$TARGET_VERSION" ] || [ -z "$OPERATION" ]; then + echo "Usage: $0 [base_image_name] [target_image_name] [timeout_seconds]" + exit 1 + fi + + echo "=== CEPH Upgrade Monitor Started ===" + echo "Base image: $BASE_IMAGE_NAME" + echo "Target image: $TARGET_IMAGE_NAME" + echo "Operation: $OPERATION to $TARGET_VERSION" + echo "Timeout: ${TIMEOUT}s" + echo "Start time: $(date)" + + echo "=== Capturing Baseline Version ===" + ceph versions + baseline_version=$(ceph versions --format json | jq -r ".overall | keys[0]") + echo "Baseline version: $baseline_version" + + echo "=== Starting Upgrade Monitoring ===" + start_time=$(date +%s) + + while true; do + current_time=$(date +%s) + elapsed=$((current_time - start_time)) + + echo "" + echo "=== Upgrade Status (Elapsed: ${elapsed}s) ===" + echo "Time: $(date)" + + echo "--- Orchestrator Upgrade Status ---" + upgrade_status=$(ceph orch upgrade status --format json) + echo "$upgrade_status" + + echo "--- Daemon Versions ---" + ceph versions + + in_progress=$(echo "$upgrade_status" | jq -r ".in_progress") + version_count=$(ceph versions --format json | jq ".overall | length") + + echo "Upgrade in progress: $in_progress" + echo "Number of different versions running: $version_count" + + if [ "$in_progress" = "false" ] && [ "$version_count" -eq 1 ]; then + current_version=$(ceph versions --format json | jq -r ".overall | keys[0]") + echo "All daemons now on: $current_version" + + if [ "$current_version" != "$baseline_version" ]; then + echo "" + echo "=== SUCCESS: Upgrade Completed ===" + echo "From: $baseline_version" + echo "To: $current_version" + echo "Base image: $BASE_IMAGE_NAME" + echo "Target image: $TARGET_IMAGE_NAME" + echo "Total time: ${elapsed}s" + echo "End time: $(date)" + break + else + echo "" + echo "=== SUCCESS: Already on Target Version ===" + echo "Current version: $current_version" + echo "Base image: $BASE_IMAGE_NAME" + echo "Target image: $TARGET_IMAGE_NAME" + echo "Total time: ${elapsed}s" + echo "End time: $(date)" + break + fi + else + echo "Upgrade still in progress or daemons on mixed versions" + if [ "$version_count" -gt 1 ]; then + echo "--- Version Breakdown ---" + ceph versions --format json | jq ".overall" + fi + fi + + if echo "$upgrade_status" | jq -r ".message" | grep -q -i "error\|fail"; then + echo "" + echo "=== ERROR: Upgrade Failed ===" + echo "Upgrade status shows error or failure" + echo "$upgrade_status" + exit 1 + fi + + if [ $elapsed -ge $TIMEOUT ]; then + echo "" + echo "=== ERROR: Upgrade Timeout ===" + echo "Upgrade did not complete within $TIMEOUT seconds" + echo "Current status:" + echo "$upgrade_status" + ceph versions + exit 1 + fi + + echo "Waiting 60 seconds before next check..." + sleep 60 + done + + echo "" + echo "=== Final Verification ===" + ceph health detail + ceph orch ps + ceph status + + echo "" + echo "=== Upgrade Monitor Completed Successfully ===" + EOF + + chmod +x /tmp/upgrade_monitor.sh + /tmp/upgrade_monitor.sh "25.03.2-rc.4" "downgrade" "reef-18.2.7" "CES-v25.03.2-rc.4" "2400" + + - echo "POST-DOWNGRADE STATE:" + - ceph version + - ceph orch ps + - ceph -s + +overrides: + ceph: + log-ignorelist: + - CEPHADM_STRAY_DAEMON + - CEPHADM_FAILED_DAEMON + - CEPHADM_AGENT_DOWN + - CEPHADM_DAEMON_PLACE_FAIL + log-only-match: + - CEPHADM_ From aca9f23efdf6f93267299434f6092d3fbcdd4689 Mon Sep 17 00:00:00 2001 From: Joshua Blanch Date: Thu, 7 Aug 2025 21:49:00 +0000 Subject: [PATCH 02/17] qa/cephadm_s3_bridge.py: test be able to run s3test repo from cephadm Signed-off-by: Joshua Blanch --- .../cephadm/s3tests-bridge/basic-s3tests.yaml | 48 ++ .../s3tests-bridge/bridge-test-only.yaml | 114 +++++ .../cephadm/s3tests-bridge/minimal-test.yaml | 47 ++ qa/tasks/cephadm_s3_bridge.py | 441 ++++++++++++++++++ qa/tasks/python.py | 9 +- 5 files changed, 658 insertions(+), 1 deletion(-) create mode 100644 qa/suites/orch/cephadm/s3tests-bridge/basic-s3tests.yaml create mode 100644 qa/suites/orch/cephadm/s3tests-bridge/bridge-test-only.yaml create mode 100644 qa/suites/orch/cephadm/s3tests-bridge/minimal-test.yaml create mode 100644 qa/tasks/cephadm_s3_bridge.py diff --git a/qa/suites/orch/cephadm/s3tests-bridge/basic-s3tests.yaml b/qa/suites/orch/cephadm/s3tests-bridge/basic-s3tests.yaml new file mode 100644 index 0000000000000..53d7dfd3844f0 --- /dev/null +++ b/qa/suites/orch/cephadm/s3tests-bridge/basic-s3tests.yaml @@ -0,0 +1,48 @@ +roles: +- [host.a, mon.a, mgr.a, osd.0, osd.1, osd.2, client.0] + +overrides: + ceph: + log-to-file: true + conf: + global: + log to file: true + mon cluster log to file: true + mon: + mon_warn_on_insecure_global_id_reclaim_allowed: false + +tasks: +- cephadm: + +- cephadm.apply: + specs: + - service_type: rgw + service_id: s3test + placement: + host_pattern: "*" + spec: + rgw_frontend_port: 8080 + +- cephadm.wait_for_service: + service: rgw.s3test + +- cephadm_s3_bridge: + client.0: + discover_from_cephadm: true + +# note: tox is needed +- tox: [client.0] + +- s3tests: + client.0: + rgw_server: client.0 + force-branch: master + conf: + DEFAULT: + is_secure: false + port: 8080 + calling_format: ordinary + fixtures: + bucket prefix: test-{random}- + # Only run 3 basic tests to verify bridge works + filter: "test_bucket_list_empty or test_bucket_create_naming_good_long_255" diff --git a/qa/suites/orch/cephadm/s3tests-bridge/bridge-test-only.yaml b/qa/suites/orch/cephadm/s3tests-bridge/bridge-test-only.yaml new file mode 100644 index 0000000000000..d1cd13f3664d2 --- /dev/null +++ b/qa/suites/orch/cephadm/s3tests-bridge/bridge-test-only.yaml @@ -0,0 +1,114 @@ +roles: +- [host.a, mon.a, mgr.a, osd.0, osd.1, osd.2, client.0] + +tasks: +- cephadm: + +- cephadm.apply: + specs: + - service_type: rgw + service_id: test + placement: + host_pattern: "*" + spec: + rgw_frontend_port: 8080 + +- cephadm.wait_for_service: + service: rgw.test + +- cephadm_s3_bridge: + client.0: + discover_from_cephadm: true + +- exec: + client.0: + - echo "Bridge task completed - ctx.rgw should now be available for s3tests" + - echo "Testing RGW accessibility..." + +# This uses the same execution path that s3tests uses, so it will trigger our monkey patch +- python: + client.0: + - | + import logging + from io import StringIO + log = logging.getLogger(__name__) + + log.info("=== Testing radosgw-admin monkey patch ===") + + # Get the remote connection (same way s3tests does it) + remote = list(ctx.cluster.only('client.0').remotes.keys())[0] + + log.info("Testing user creation via remote.run() - this should trigger monkey patch") + try: + # This should be intercepted by our monkey patch and converted to cephadm shell + result = remote.run( + args=[ + 'radosgw-admin', 'user', 'create', + '--uid=testuser', + '--display-name=Test User', + '--access-key=testkey', + '--secret-key=testsecret' + ], + stdout=StringIO() + ) + log.info("✅ SUCCESS: radosgw-admin user create executed through monkey patch!") + output = result.stdout.getvalue() if hasattr(result.stdout, 'getvalue') else str(result.stdout) + log.info(f"Command output length: {len(output)} chars") + if 'testuser' in output or 'access_key' in output: + log.info("✅ User creation output looks correct") + else: + log.warning(f"⚠️ Unexpected output: {output[:200]}...") + except Exception as e: + log.error(f"❌ FAILED: radosgw-admin user create failed: {e}") + raise + + log.info("Testing user info retrieval via remote.run()") + try: + result = remote.run( + args=[ + 'radosgw-admin', 'user', 'info', + '--uid=testuser' + ], + stdout=StringIO() + ) + log.info("✅ SUCCESS: radosgw-admin user info executed through monkey patch!") + output = result.stdout.getvalue() if hasattr(result.stdout, 'getvalue') else str(result.stdout) + if 'testuser' in output and 'access_key' in output: + log.info("✅ User info output contains expected fields") + else: + log.warning(f"⚠️ User info output: {output[:200]}...") + except Exception as e: + log.error(f"❌ FAILED: radosgw-admin user info failed: {e}") + raise + + log.info("=== ✅ Monkey patch test completed successfully! ===") + log.info("This confirms that s3tests radosgw-admin commands will work with cephadm") + +# Test S3 endpoint accessibility +- exec: + client.0: + - echo "Testing S3 endpoint accessibility..." + - 'response=$(curl -s http://localhost:8080/ 2>/dev/null || echo "CONNECTION_FAILED")' + - echo "RGW Response: $response" + - 'if echo "$response" | grep -q "ListBucketResult\|Error\|xml\|ACCESS_DENIED"; then echo "✅ RGW is responding with valid S3 response"; else echo "✗ RGW not responding correctly"; fi' + +# Cleanup test user using monkey patch (another test) +- python: + client.0: + - | + import logging + log = logging.getLogger(__name__) + + log.info("=== Testing cleanup via monkey patch ===") + remote = list(ctx.cluster.only('client.0').remotes.keys())[0] + + try: + result = remote.run(args=[ + 'radosgw-admin', 'user', 'rm', + '--uid=testuser' + ]) + log.info("✅ SUCCESS: User cleanup via monkey patch worked!") + except Exception as e: + log.warning(f"⚠️ Cleanup failed (this is often expected): {e}") + + log.info("=== Bridge functionality test completed ===") diff --git a/qa/suites/orch/cephadm/s3tests-bridge/minimal-test.yaml b/qa/suites/orch/cephadm/s3tests-bridge/minimal-test.yaml new file mode 100644 index 0000000000000..f1424fcb6874a --- /dev/null +++ b/qa/suites/orch/cephadm/s3tests-bridge/minimal-test.yaml @@ -0,0 +1,47 @@ +roles: +- [host.a, mon.a, mgr.a, osd.0, osd.1, osd.2, client.0] + +tasks: +- cephadm: + +- cephadm.apply: + specs: + - service_type: rgw + service_id: test + placement: + host_pattern: "*" + spec: + rgw_frontend_port: 8080 + +- cephadm.wait_for_service: + service: rgw.test + +- cephadm_s3_bridge: + client.0: + discover_from_cephadm: true + dns_name: client.0 + +- tox: [client.0] + +- s3tests: + client.0: + rgw_server: client.0 + force-branch: master + conf: + DEFAULT: + is_secure: false + port: 8080 + calling_format: ordinary + fixtures: + bucket prefix: s3test-{random}- + # POC run + exclude: + - test_bucket_policy* + - test_lifecycle* + - test_encryption* + - test_multipart_upload_size_too_small + - test_cors* + - test_website* + - test_logging* + - test_versioning* + filter: "test_bucket_list_empty or test_bucket_create or test_object_write or test_object_read" diff --git a/qa/tasks/cephadm_s3_bridge.py b/qa/tasks/cephadm_s3_bridge.py new file mode 100644 index 0000000000000..d870679fc207f --- /dev/null +++ b/qa/tasks/cephadm_s3_bridge.py @@ -0,0 +1,441 @@ +""" +Bridge task to make cephadm-deployed RGW compatible with s3tests. + +This task discovers RGW endpoints deployed via cephadm orchestrator +and creates the ctx.rgw.role_endpoints structure that s3tests expects. +""" + +import json +import logging +import time +from io import StringIO + +from teuthology.orchestra import run +from teuthology import misc as teuthology +from teuthology.exceptions import ConfigError +import teuthology.orchestra.remote + +import sys +import os + +qa_dir = os.path.dirname(os.path.abspath(__file__)) +sys.path.insert(0, qa_dir) +from rgw import RGWEndpoint + +log = logging.getLogger(__name__) + + +def detect_cephadm_deployment(ctx): + """Detect if we're in a cephadm environment with bridge active""" + return ( + hasattr(ctx, "rgw") + and hasattr(ctx.rgw, "cephadm_bridge_active") + and ctx.rgw.cephadm_bridge_active + ) + + +def patch_s3tests_radosgw_admin(ctx): + """ + Monkey patch teuthology remote execution to make radosgw-admin commands + work inside cephadm containers when running s3tests. + """ + log.info("convert radosgw-admin to cephadm command") + + original_run = teuthology.orchestra.remote.Remote.run + + def cephadm_aware_run(self, **kwargs): + args = kwargs.get("args", []) + + if args and len(args) > 0 and args[0] == "radosgw-admin": + if detect_cephadm_deployment(ctx): + log.info(f"Intercepting radosgw-admin command: {args}") + + try: + cluster_name = ( + list(ctx.ceph.keys())[0] if hasattr(ctx, "ceph") else "ceph" + ) + image = ctx.ceph[cluster_name].image + + cephadm_args = [ + "sudo", + "cephadm", + "--image", + image, + "shell", + "-c", + f"/etc/ceph/{cluster_name}.conf", + "-k", + f"/etc/ceph/{cluster_name}.client.admin.keyring", + "--fsid", + ctx.ceph[cluster_name].fsid, + "--", + ] + args + + log.info(f"Converted to cephadm shell command: {cephadm_args}") + kwargs["args"] = cephadm_args + + except Exception as e: + log.error(f"Failed to convert radosgw-admin to cephadm shell: {e}") + pass + + return original_run(self, **kwargs) + + + teuthology.orchestra.remote.Remote.run = cephadm_aware_run + + +def restore_original_remote_run(): + """Restore original remote run method (for cleanup)""" + # TODO: In practice, this is tricky to implement cleanly since we don't + # store the original reference. The monkey patch will remain active + # for the duration of the test run, which is typically desired. + log.info("Note: Monkey patch cleanup not implemented - patch remains active") + + +def discover_cephadm_rgw_endpoints(ctx): + """ + Discover RGW endpoints from cephadm orchestrator using cephadm shell. + Returns dict mapping service names to endpoint info. + """ + log.info("Discovering cephadm RGW endpoints via 'ceph orch ps'") + + cluster_roles = list(ctx.cluster.remotes.keys()) + if not cluster_roles: + raise ConfigError("No cluster nodes available for ceph commands") + + remote = cluster_roles[0] + + try: + # Get cluster name (usually 'ceph') + cluster_name = list(ctx.ceph.keys())[0] if hasattr(ctx, "ceph") else "ceph" + + result = remote.run( + args=[ + "sudo", + ctx.cephadm, + "--image", + ctx.ceph[cluster_name].image, + "shell", + "-c", + f"/etc/ceph/{cluster_name}.conf", + "-k", + f"/etc/ceph/{cluster_name}.client.admin.keyring", + "--fsid", + ctx.ceph[cluster_name].fsid, + "--", + "ceph", + "orch", + "ps", + "--daemon_type", + "rgw", + "--format", + "json", + ], + stdout=StringIO(), + ) + except AttributeError as e: + log.error(f"Missing cephadm context attributes: {e}") + log.error( + "Available ctx.cephadm attributes: " + str(dir(ctx.cephadm)) + if hasattr(ctx, "cephadm") + else "No ctx.cephadm found" + ) + raise ConfigError(f"cephadm context not properly initialized: {e}") + except Exception as e: + log.error(f"Failed to run ceph orch ps command: {e}") + raise ConfigError(f"RGW endpoint discovery failed: {e}") + + services_json = result.stdout.getvalue() + log.info(f"Raw ceph orch ps output: {services_json}") + + if not services_json.strip(): + log.warning("No RGW services found via 'ceph orch ps'") + return {} + + try: + services = json.loads(services_json) + log.info(f"Parsed RGW services: {services}") + except json.JSONDecodeError as e: + log.error(f"Failed to parse JSON from ceph orch ps: {e}") + log.error(f"Raw output was: {services_json}") + raise ConfigError(f"Invalid JSON from ceph orch ps: {e}") + + endpoints = {} + for service in services: + service_name = service.get("service_name", "") + hostname = service.get("hostname", "") + ports = service.get("ports", []) + status = service.get("status_desc", "") + + if not service_name.startswith("rgw."): + continue + + if status != "running": + log.warning(f"RGW service {service_name} is not running: {status}") + continue + + if not ports: + log.warning(f"No ports found for RGW service {service_name}") + continue + + # Extract port number (ports is typically ['8080/tcp'] format) + port = None + for port_spec in ports: + if isinstance(port_spec, str) and "/" in port_spec: + port = int(port_spec.split("/")[0]) + break + elif isinstance(port_spec, int): + port = port_spec + break + + if port is None: + log.warning(f"Could not parse port for RGW service {service_name}: {ports}") + continue + + endpoints[service_name] = { + "hostname": hostname, + "port": port, + "service_name": service_name, + "status": status, + } + + log.info(f"Discovered RGW endpoints: {endpoints}") + return endpoints + + +def map_roles_to_endpoints(ctx, config, discovered_endpoints): + """ + Map teuthology roles to discovered RGW endpoints. + """ + role_endpoints = {} + + for role, client_config in config.items(): + if not client_config.get("discover_from_cephadm"): + continue + + log.info(f"Mapping role {role} to cephadm RGW endpoint") + + target_service = client_config.get("rgw_service") + if target_service and target_service in discovered_endpoints: + endpoint_info = discovered_endpoints[target_service] + log.info(f"Using explicit service mapping: {role} -> {target_service}") + else: + if not discovered_endpoints: + raise ConfigError(f"No RGW endpoints discovered for role {role}") + + service_name = list(discovered_endpoints.keys())[0] + endpoint_info = discovered_endpoints[service_name] + log.info(f"Using first available RGW service: {role} -> {service_name}") + + hostname = endpoint_info["hostname"] + port = endpoint_info["port"] + + dns_name = client_config.get("dns_name", hostname) + + rgw_endpoint = RGWEndpoint( + hostname=hostname, + port=port, + cert=None, + dns_name=dns_name, + website_dns_name=None, + ) + + role_endpoints[role] = rgw_endpoint + log.info(f"Created endpoint for {role}: {hostname}:{port} (dns: {dns_name})") + + return role_endpoints + + +def wait_for_rgw_accessibility(ctx, role_endpoints, timeout=60): + """ + Wait for RGW endpoints to be accessible via HTTP. + """ + log.info("Verifying RGW endpoint accessibility") + + cluster_roles = list(ctx.cluster.remotes.keys()) + test_remote = cluster_roles[0] + + for role, endpoint in role_endpoints.items(): + log.info( + f"Testing accessibility of {role} at {endpoint.hostname}:{endpoint.port}" + ) + + start_time = time.time() + accessible = False + + while time.time() - start_time < timeout: + try: + result = test_remote.run( + args=[ + "curl", + "-s", + "-o", + "/dev/null", + "-w", + "%{http_code}", + "--connect-timeout", + "5", + f"http://{endpoint.hostname}:{endpoint.port}/", + ], + stdout=StringIO(), + check_status=False, + ) + + http_code = result.stdout.getvalue().strip() + log.info(f"HTTP response from {role}: {http_code}") + + if http_code and http_code.isdigit(): + accessible = True + break + + except Exception as e: + log.debug(f"Accessibility test failed for {role}: {e}") + + log.info(f"Waiting for {role} to become accessible...") + time.sleep(2) + + if not accessible: + raise ConfigError(f"RGW endpoint {role} not accessible after {timeout}s") + + log.info(f"RGW endpoint {role} is accessible") + + +def task(ctx, config): + """ + Bridge task to make cephadm-deployed RGW compatible with s3tests. + + Example usage: + - cephadm_s3_bridge: + client.0: + discover_from_cephadm: true + dns_name: rgw.example.com # optional + rgw_service: rgw.myservice # optional, defaults to first found + """ + if config is None: + config = {} + + log.info("🚀 STARTING cephadm s3tests bridge task") + log.info(f"🔍 DEBUG: Bridge task config: {config}") + + # Extensive context debugging + log.info("🔍 DEBUG: Checking available context attributes...") + log.info(f"🔍 DEBUG: hasattr(ctx, 'ceph') = {hasattr(ctx, 'ceph')}") + log.info(f"🔍 DEBUG: hasattr(ctx, 'cephadm') = {hasattr(ctx, 'cephadm')}") + log.info(f"🔍 DEBUG: hasattr(ctx, 'cluster') = {hasattr(ctx, 'cluster')}") + log.info(f"🔍 DEBUG: hasattr(ctx, 'rgw') = {hasattr(ctx, 'rgw')} (should be False initially)") + + if hasattr(ctx, 'ceph'): + log.info(f"🔍 DEBUG: ctx.ceph keys: {list(ctx.ceph.keys())}") + for cluster_name in ctx.ceph.keys(): + log.info(f"🔍 DEBUG: ctx.ceph[{cluster_name}] attributes: {dir(ctx.ceph[cluster_name])}") + if hasattr(ctx.ceph[cluster_name], 'image'): + log.info(f"🔍 DEBUG: ctx.ceph[{cluster_name}].image = {ctx.ceph[cluster_name].image}") + if hasattr(ctx.ceph[cluster_name], 'fsid'): + log.info(f"🔍 DEBUG: ctx.ceph[{cluster_name}].fsid = {ctx.ceph[cluster_name].fsid}") + else: + log.error("❌ ERROR: ctx.ceph not found - this is critical!") + + if hasattr(ctx, 'cephadm'): + log.info(f"🔍 DEBUG: type(ctx.cephadm) = {type(ctx.cephadm)}") + log.info(f"🔍 DEBUG: ctx.cephadm = {ctx.cephadm}") + else: + log.error("❌ ERROR: ctx.cephadm not found") + + try: + log.info("🔍 Phase 1: Attempting RGW endpoint discovery...") + discovered_endpoints = discover_cephadm_rgw_endpoints(ctx) + log.info(f"✅ SUCCESS: Discovered {len(discovered_endpoints)} RGW endpoints") + except Exception as e: + log.error(f"❌ CRITICAL: RGW endpoint discovery failed: {e}") + log.error(f"❌ Bridge task cannot continue - ctx.rgw will NOT be created!") + raise e + + if not discovered_endpoints: + log.error("❌ CRITICAL: No RGW services found via cephadm orchestrator") + log.error("❌ Bridge task cannot continue - ctx.rgw will NOT be created!") + raise ConfigError("No RGW services found via cephadm orchestrator") + + log.info("🔍 Phase 2: Mapping roles to endpoints...") + role_endpoints = map_roles_to_endpoints(ctx, config, discovered_endpoints) + + if not role_endpoints: + log.error("❌ CRITICAL: No roles configured for RGW endpoint mapping") + log.error("❌ Bridge task cannot continue - ctx.rgw will NOT be created!") + log.error("❌ Check your bridge task configuration - you need at least one role with 'discover_from_cephadm: true'") + return + + log.info(f"✅ SUCCESS: Mapped {len(role_endpoints)} roles to endpoints") + for role, endpoint in role_endpoints.items(): + log.info(f" 🔗 {role} -> {endpoint.hostname}:{endpoint.port}") + + log.info("🔍 Phase 3: Testing RGW endpoint accessibility...") + try: + wait_for_rgw_accessibility(ctx, role_endpoints) + log.info("✅ SUCCESS: All RGW endpoints are accessible") + except Exception as e: + log.error(f"❌ ERROR: RGW accessibility test failed: {e}") + log.error("❌ Continuing anyway - ctx.rgw will still be created") + + log.info("🔍 Phase 4: Creating ctx.rgw structure for s3tests compatibility...") + + # Store original state for debugging + original_rgw_exists = hasattr(ctx, 'rgw') + log.info(f"🔍 DEBUG: Before creation - hasattr(ctx, 'rgw') = {original_rgw_exists}") + + # Phase 4: Create ctx.rgw structure for s3tests compatibility + # Using simple class instead of dynamic type creation for better compatibility + class RGWContext: + pass + + ctx.rgw = RGWContext() + ctx.rgw.role_endpoints = role_endpoints + + log.info(f"🔍 DEBUG: After creation - hasattr(ctx, 'rgw') = {hasattr(ctx, 'rgw')}") + log.info(f"🔍 DEBUG: type(ctx.rgw) = {type(ctx.rgw)}") + log.info(f"🔍 DEBUG: hasattr(ctx.rgw, 'role_endpoints') = {hasattr(ctx.rgw, 'role_endpoints')}") + log.info(f"🔍 DEBUG: len(ctx.rgw.role_endpoints) = {len(ctx.rgw.role_endpoints)}") + + log.info(f"✅ SUCCESS: Created ctx.rgw.role_endpoints with {len(role_endpoints)} endpoints") + for role, endpoint in role_endpoints.items(): + log.info(f" 🔗 {role} -> {endpoint.hostname}:{endpoint.port}") + + # Phase 5: Store discovery info and activate bridge + ctx.rgw.cephadm_discovered_endpoints = discovered_endpoints + ctx.rgw.cephadm_bridge_active = True + + log.info(f"🔍 DEBUG: Set ctx.rgw.cephadm_bridge_active = {ctx.rgw.cephadm_bridge_active}") + + # Phase 6: Patch radosgw-admin commands for cephadm compatibility + log.info("🔍 Phase 5: Setting up radosgw-admin monkey patching...") + try: + patch_s3tests_radosgw_admin(ctx) + log.info("✅ SUCCESS: Monkey patch for radosgw-admin commands activated") + except Exception as e: + log.error(f"❌ ERROR: Monkey patch setup failed: {e}") + raise e + + # Final verification for s3tests compatibility + log.info("🔍 FINAL VERIFICATION: Checking s3tests compatibility...") + log.info(f"✅ hasattr(ctx, 'rgw') = {hasattr(ctx, 'rgw')}") + log.info(f"✅ type(ctx.rgw) = {type(ctx.rgw)}") + log.info(f"✅ hasattr(ctx.rgw, 'role_endpoints') = {hasattr(ctx.rgw, 'role_endpoints')}") + log.info(f"✅ ctx.rgw.cephadm_bridge_active = {getattr(ctx.rgw, 'cephadm_bridge_active', 'MISSING')}") + log.info(f"✅ len(ctx.rgw.role_endpoints) = {len(getattr(ctx.rgw, 'role_endpoints', {}))}") + + log.info("🎉 SUCCESS: cephadm s3tests bridge task completed successfully!") + log.info("🎉 ctx.rgw is now ready for s3tests - the assertion should PASS!") + + try: + yield + finally: + # Cleanup logging with more detail + log.info("🔄 BRIDGE CLEANUP: Starting bridge task cleanup...") + log.info("🔄 Note: Monkey patch remains active for test duration (this is expected)") + log.info(f"🔄 Final state: hasattr(ctx, 'rgw') = {hasattr(ctx, 'rgw')}") + if hasattr(ctx, 'rgw'): + log.info(f"🔄 Final state: hasattr(ctx.rgw, 'cephadm_bridge_active') = {hasattr(ctx.rgw, 'cephadm_bridge_active')}") + log.info(f"🔄 Final state: len(ctx.rgw.role_endpoints) = {len(getattr(ctx.rgw, 'role_endpoints', {}))}") + log.info(f"🔄 Final state: ctx.rgw.cephadm_bridge_active = {getattr(ctx.rgw, 'cephadm_bridge_active', 'MISSING')}") + else: + log.error("🔄 ❌ CRITICAL: ctx.rgw was lost during test execution!") + log.info("🔄 Bridge task cleanup completed") diff --git a/qa/tasks/python.py b/qa/tasks/python.py index 4ddb14f714538..aacb01ee45426 100644 --- a/qa/tasks/python.py +++ b/qa/tasks/python.py @@ -35,11 +35,18 @@ def task(ctx, config): (remote,) = ctx.cluster.only(role).remotes.keys() log.info('Running python on role %s host %s', role, remote.name) log.info(code) + + # Handle both string and list input for code + if isinstance(code, list): + code_str = '\n'.join(code) + else: + code_str = code + args=[ 'TESTDIR={tdir}'.format(tdir=testdir), 'python3', ] if sudo: args = ['sudo'] + args - remote.run(args=args, stdin=subst_vip(ctx, code)) + remote.run(args=args, stdin=subst_vip(ctx, code_str)) From 0857e18563208584d63b257baddab169e60fb6b8 Mon Sep 17 00:00:00 2001 From: Joshua Blanch Date: Fri, 8 Aug 2025 04:26:47 +0000 Subject: [PATCH 03/17] qa/s3bridge: asserts for ctx properties Signed-off-by: Joshua Blanch --- qa/tasks/cephadm_s3_bridge.py | 112 +++++++--------------------------- 1 file changed, 22 insertions(+), 90 deletions(-) diff --git a/qa/tasks/cephadm_s3_bridge.py b/qa/tasks/cephadm_s3_bridge.py index d870679fc207f..f4dcdca69365b 100644 --- a/qa/tasks/cephadm_s3_bridge.py +++ b/qa/tasks/cephadm_s3_bridge.py @@ -314,128 +314,60 @@ def task(ctx, config): if config is None: config = {} - log.info("🚀 STARTING cephadm s3tests bridge task") - log.info(f"🔍 DEBUG: Bridge task config: {config}") - - # Extensive context debugging - log.info("🔍 DEBUG: Checking available context attributes...") - log.info(f"🔍 DEBUG: hasattr(ctx, 'ceph') = {hasattr(ctx, 'ceph')}") - log.info(f"🔍 DEBUG: hasattr(ctx, 'cephadm') = {hasattr(ctx, 'cephadm')}") - log.info(f"🔍 DEBUG: hasattr(ctx, 'cluster') = {hasattr(ctx, 'cluster')}") - log.info(f"🔍 DEBUG: hasattr(ctx, 'rgw') = {hasattr(ctx, 'rgw')} (should be False initially)") - - if hasattr(ctx, 'ceph'): - log.info(f"🔍 DEBUG: ctx.ceph keys: {list(ctx.ceph.keys())}") - for cluster_name in ctx.ceph.keys(): - log.info(f"🔍 DEBUG: ctx.ceph[{cluster_name}] attributes: {dir(ctx.ceph[cluster_name])}") - if hasattr(ctx.ceph[cluster_name], 'image'): - log.info(f"🔍 DEBUG: ctx.ceph[{cluster_name}].image = {ctx.ceph[cluster_name].image}") - if hasattr(ctx.ceph[cluster_name], 'fsid'): - log.info(f"🔍 DEBUG: ctx.ceph[{cluster_name}].fsid = {ctx.ceph[cluster_name].fsid}") - else: - log.error("❌ ERROR: ctx.ceph not found - this is critical!") - - if hasattr(ctx, 'cephadm'): - log.info(f"🔍 DEBUG: type(ctx.cephadm) = {type(ctx.cephadm)}") - log.info(f"🔍 DEBUG: ctx.cephadm = {ctx.cephadm}") - else: - log.error("❌ ERROR: ctx.cephadm not found") + # Critical context assertions - fail fast if something is missing + assert hasattr(ctx, 'ceph'), 'ctx.ceph not found - cephadm bridge requires ceph context' + assert hasattr(ctx, 'cephadm'), 'ctx.cephadm not found - cephadm bridge requires cephadm context' + assert hasattr(ctx, 'cluster'), 'ctx.cluster not found - cephadm bridge requires cluster context' + assert not hasattr(ctx, 'rgw'), 'ctx.rgw already exists - bridge should run before rgw task' try: - log.info("🔍 Phase 1: Attempting RGW endpoint discovery...") discovered_endpoints = discover_cephadm_rgw_endpoints(ctx) - log.info(f"✅ SUCCESS: Discovered {len(discovered_endpoints)} RGW endpoints") except Exception as e: - log.error(f"❌ CRITICAL: RGW endpoint discovery failed: {e}") - log.error(f"❌ Bridge task cannot continue - ctx.rgw will NOT be created!") + log.error(f"RGW endpoint discovery failed: {e}") raise e if not discovered_endpoints: - log.error("❌ CRITICAL: No RGW services found via cephadm orchestrator") - log.error("❌ Bridge task cannot continue - ctx.rgw will NOT be created!") raise ConfigError("No RGW services found via cephadm orchestrator") - log.info("🔍 Phase 2: Mapping roles to endpoints...") role_endpoints = map_roles_to_endpoints(ctx, config, discovered_endpoints) if not role_endpoints: - log.error("❌ CRITICAL: No roles configured for RGW endpoint mapping") - log.error("❌ Bridge task cannot continue - ctx.rgw will NOT be created!") - log.error("❌ Check your bridge task configuration - you need at least one role with 'discover_from_cephadm: true'") + log.error("No roles configured for RGW endpoint mapping") + log.error("Check your bridge task configuration - you need at least one role with 'discover_from_cephadm: true'") return - log.info(f"✅ SUCCESS: Mapped {len(role_endpoints)} roles to endpoints") - for role, endpoint in role_endpoints.items(): - log.info(f" 🔗 {role} -> {endpoint.hostname}:{endpoint.port}") - - log.info("🔍 Phase 3: Testing RGW endpoint accessibility...") try: wait_for_rgw_accessibility(ctx, role_endpoints) - log.info("✅ SUCCESS: All RGW endpoints are accessible") except Exception as e: - log.error(f"❌ ERROR: RGW accessibility test failed: {e}") - log.error("❌ Continuing anyway - ctx.rgw will still be created") - - log.info("🔍 Phase 4: Creating ctx.rgw structure for s3tests compatibility...") - - # Store original state for debugging - original_rgw_exists = hasattr(ctx, 'rgw') - log.info(f"🔍 DEBUG: Before creation - hasattr(ctx, 'rgw') = {original_rgw_exists}") + log.error(f"RGW accessibility test failed: {e}") + log.error("Continuing anyway - ctx.rgw will still be created") - # Phase 4: Create ctx.rgw structure for s3tests compatibility - # Using simple class instead of dynamic type creation for better compatibility + # Create ctx.rgw structure for s3tests compatibility class RGWContext: pass ctx.rgw = RGWContext() ctx.rgw.role_endpoints = role_endpoints - - log.info(f"🔍 DEBUG: After creation - hasattr(ctx, 'rgw') = {hasattr(ctx, 'rgw')}") - log.info(f"🔍 DEBUG: type(ctx.rgw) = {type(ctx.rgw)}") - log.info(f"🔍 DEBUG: hasattr(ctx.rgw, 'role_endpoints') = {hasattr(ctx.rgw, 'role_endpoints')}") - log.info(f"🔍 DEBUG: len(ctx.rgw.role_endpoints) = {len(ctx.rgw.role_endpoints)}") - - log.info(f"✅ SUCCESS: Created ctx.rgw.role_endpoints with {len(role_endpoints)} endpoints") - for role, endpoint in role_endpoints.items(): - log.info(f" 🔗 {role} -> {endpoint.hostname}:{endpoint.port}") - - # Phase 5: Store discovery info and activate bridge ctx.rgw.cephadm_discovered_endpoints = discovered_endpoints ctx.rgw.cephadm_bridge_active = True - - log.info(f"🔍 DEBUG: Set ctx.rgw.cephadm_bridge_active = {ctx.rgw.cephadm_bridge_active}") - # Phase 6: Patch radosgw-admin commands for cephadm compatibility - log.info("🔍 Phase 5: Setting up radosgw-admin monkey patching...") + # Setup radosgw-admin monkey patching try: patch_s3tests_radosgw_admin(ctx) - log.info("✅ SUCCESS: Monkey patch for radosgw-admin commands activated") except Exception as e: - log.error(f"❌ ERROR: Monkey patch setup failed: {e}") + log.error(f"Monkey patch setup failed: {e}") raise e - # Final verification for s3tests compatibility - log.info("🔍 FINAL VERIFICATION: Checking s3tests compatibility...") - log.info(f"✅ hasattr(ctx, 'rgw') = {hasattr(ctx, 'rgw')}") - log.info(f"✅ type(ctx.rgw) = {type(ctx.rgw)}") - log.info(f"✅ hasattr(ctx.rgw, 'role_endpoints') = {hasattr(ctx.rgw, 'role_endpoints')}") - log.info(f"✅ ctx.rgw.cephadm_bridge_active = {getattr(ctx.rgw, 'cephadm_bridge_active', 'MISSING')}") - log.info(f"✅ len(ctx.rgw.role_endpoints) = {len(getattr(ctx.rgw, 'role_endpoints', {}))}") - - log.info("🎉 SUCCESS: cephadm s3tests bridge task completed successfully!") - log.info("🎉 ctx.rgw is now ready for s3tests - the assertion should PASS!") + # Final verification assertions + assert hasattr(ctx, 'rgw'), 'ctx.rgw was not created successfully' + assert hasattr(ctx.rgw, 'role_endpoints'), 'ctx.rgw.role_endpoints was not created' + assert hasattr(ctx.rgw, 'cephadm_bridge_active'), 'ctx.rgw.cephadm_bridge_active was not set' + assert ctx.rgw.cephadm_bridge_active, 'ctx.rgw.cephadm_bridge_active is not True' + assert len(ctx.rgw.role_endpoints) > 0, 'ctx.rgw.role_endpoints is empty' try: yield finally: - # Cleanup logging with more detail - log.info("🔄 BRIDGE CLEANUP: Starting bridge task cleanup...") - log.info("🔄 Note: Monkey patch remains active for test duration (this is expected)") - log.info(f"🔄 Final state: hasattr(ctx, 'rgw') = {hasattr(ctx, 'rgw')}") - if hasattr(ctx, 'rgw'): - log.info(f"🔄 Final state: hasattr(ctx.rgw, 'cephadm_bridge_active') = {hasattr(ctx.rgw, 'cephadm_bridge_active')}") - log.info(f"🔄 Final state: len(ctx.rgw.role_endpoints) = {len(getattr(ctx.rgw, 'role_endpoints', {}))}") - log.info(f"🔄 Final state: ctx.rgw.cephadm_bridge_active = {getattr(ctx.rgw, 'cephadm_bridge_active', 'MISSING')}") - else: - log.error("🔄 ❌ CRITICAL: ctx.rgw was lost during test execution!") - log.info("🔄 Bridge task cleanup completed") + # Verify ctx.rgw survived test execution + assert hasattr(ctx, 'rgw'), 'ctx.rgw was lost during test execution' + assert hasattr(ctx.rgw, 'cephadm_bridge_active'), 'ctx.rgw.cephadm_bridge_active was lost' From 917432095f1d62002e9713e9293cc9815f2de648 Mon Sep 17 00:00:00 2001 From: Joshua Blanch Date: Fri, 8 Aug 2025 04:34:11 +0000 Subject: [PATCH 04/17] qa/s3bridge: more asserts Signed-off-by: Joshua Blanch --- qa/tasks/cephadm_s3_bridge.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/qa/tasks/cephadm_s3_bridge.py b/qa/tasks/cephadm_s3_bridge.py index f4dcdca69365b..a935fd3fdec8e 100644 --- a/qa/tasks/cephadm_s3_bridge.py +++ b/qa/tasks/cephadm_s3_bridge.py @@ -318,7 +318,10 @@ def task(ctx, config): assert hasattr(ctx, 'ceph'), 'ctx.ceph not found - cephadm bridge requires ceph context' assert hasattr(ctx, 'cephadm'), 'ctx.cephadm not found - cephadm bridge requires cephadm context' assert hasattr(ctx, 'cluster'), 'ctx.cluster not found - cephadm bridge requires cluster context' - assert not hasattr(ctx, 'rgw'), 'ctx.rgw already exists - bridge should run before rgw task' + + # Allow ctx.rgw to exist from cephadm tasks, but ensure it doesn't have role_endpoints + if hasattr(ctx, 'rgw') and hasattr(ctx.rgw, 'role_endpoints'): + raise ConfigError('ctx.rgw.role_endpoints already exists - bridge should run before other rgw configuration tasks') try: discovered_endpoints = discover_cephadm_rgw_endpoints(ctx) @@ -343,10 +346,11 @@ def task(ctx, config): log.error("Continuing anyway - ctx.rgw will still be created") # Create ctx.rgw structure for s3tests compatibility - class RGWContext: - pass - - ctx.rgw = RGWContext() + if not hasattr(ctx, 'rgw'): + class RGWContext: + pass + ctx.rgw = RGWContext() + ctx.rgw.role_endpoints = role_endpoints ctx.rgw.cephadm_discovered_endpoints = discovered_endpoints ctx.rgw.cephadm_bridge_active = True From a53ad01feaa45cd2326efc6e8cc80520a2989ffa Mon Sep 17 00:00:00 2001 From: Joshua Blanch Date: Fri, 8 Aug 2025 04:42:24 +0000 Subject: [PATCH 05/17] s3bridge: debug logs Signed-off-by: Joshua Blanch --- qa/tasks/cephadm_s3_bridge.py | 62 +++++++++++++++++++++-------------- 1 file changed, 37 insertions(+), 25 deletions(-) diff --git a/qa/tasks/cephadm_s3_bridge.py b/qa/tasks/cephadm_s3_bridge.py index a935fd3fdec8e..e4be2a24fed25 100644 --- a/qa/tasks/cephadm_s3_bridge.py +++ b/qa/tasks/cephadm_s3_bridge.py @@ -80,16 +80,12 @@ def cephadm_aware_run(self, **kwargs): return original_run(self, **kwargs) - teuthology.orchestra.remote.Remote.run = cephadm_aware_run def restore_original_remote_run(): """Restore original remote run method (for cleanup)""" - # TODO: In practice, this is tricky to implement cleanly since we don't - # store the original reference. The monkey patch will remain active - # for the duration of the test run, which is typically desired. - log.info("Note: Monkey patch cleanup not implemented - patch remains active") + log.info("not implemented - patch remains active") def discover_cephadm_rgw_endpoints(ctx): @@ -314,14 +310,25 @@ def task(ctx, config): if config is None: config = {} - # Critical context assertions - fail fast if something is missing - assert hasattr(ctx, 'ceph'), 'ctx.ceph not found - cephadm bridge requires ceph context' - assert hasattr(ctx, 'cephadm'), 'ctx.cephadm not found - cephadm bridge requires cephadm context' - assert hasattr(ctx, 'cluster'), 'ctx.cluster not found - cephadm bridge requires cluster context' - + log.info("Starting cephadm s3tests bridge task") + + assert hasattr(ctx, "ceph"), ( + "ctx.ceph not found - cephadm bridge requires ceph context" + ) + assert hasattr(ctx, "cephadm"), ( + "ctx.cephadm not found - cephadm bridge requires cephadm context" + ) + assert hasattr(ctx, "cluster"), ( + "ctx.cluster not found - cephadm bridge requires cluster context" + ) + + log.info("Context assertions passed, checking for existing ctx.rgw...") + # Allow ctx.rgw to exist from cephadm tasks, but ensure it doesn't have role_endpoints - if hasattr(ctx, 'rgw') and hasattr(ctx.rgw, 'role_endpoints'): - raise ConfigError('ctx.rgw.role_endpoints already exists - bridge should run before other rgw configuration tasks') + if hasattr(ctx, "rgw") and hasattr(ctx.rgw, "role_endpoints"): + raise ConfigError( + "ctx.rgw.role_endpoints already exists - bridge should run before other rgw configuration tasks" + ) try: discovered_endpoints = discover_cephadm_rgw_endpoints(ctx) @@ -336,7 +343,9 @@ def task(ctx, config): if not role_endpoints: log.error("No roles configured for RGW endpoint mapping") - log.error("Check your bridge task configuration - you need at least one role with 'discover_from_cephadm: true'") + log.error( + "Check your bridge task configuration - you need at least one role with 'discover_from_cephadm: true'" + ) return try: @@ -346,32 +355,35 @@ def task(ctx, config): log.error("Continuing anyway - ctx.rgw will still be created") # Create ctx.rgw structure for s3tests compatibility - if not hasattr(ctx, 'rgw'): + if not hasattr(ctx, "rgw"): + class RGWContext: pass + ctx.rgw = RGWContext() - + ctx.rgw.role_endpoints = role_endpoints ctx.rgw.cephadm_discovered_endpoints = discovered_endpoints ctx.rgw.cephadm_bridge_active = True - # Setup radosgw-admin monkey patching try: patch_s3tests_radosgw_admin(ctx) except Exception as e: log.error(f"Monkey patch setup failed: {e}") raise e - # Final verification assertions - assert hasattr(ctx, 'rgw'), 'ctx.rgw was not created successfully' - assert hasattr(ctx.rgw, 'role_endpoints'), 'ctx.rgw.role_endpoints was not created' - assert hasattr(ctx.rgw, 'cephadm_bridge_active'), 'ctx.rgw.cephadm_bridge_active was not set' - assert ctx.rgw.cephadm_bridge_active, 'ctx.rgw.cephadm_bridge_active is not True' - assert len(ctx.rgw.role_endpoints) > 0, 'ctx.rgw.role_endpoints is empty' + assert hasattr(ctx, "rgw"), "ctx.rgw was not created successfully" + assert hasattr(ctx.rgw, "role_endpoints"), "ctx.rgw.role_endpoints was not created" + assert hasattr(ctx.rgw, "cephadm_bridge_active"), ( + "ctx.rgw.cephadm_bridge_active was not set" + ) + assert ctx.rgw.cephadm_bridge_active, "ctx.rgw.cephadm_bridge_active is not True" + assert len(ctx.rgw.role_endpoints) > 0, "ctx.rgw.role_endpoints is empty" try: yield finally: - # Verify ctx.rgw survived test execution - assert hasattr(ctx, 'rgw'), 'ctx.rgw was lost during test execution' - assert hasattr(ctx.rgw, 'cephadm_bridge_active'), 'ctx.rgw.cephadm_bridge_active was lost' + assert hasattr(ctx, "rgw"), "ctx.rgw was lost during test execution" + assert hasattr(ctx.rgw, "cephadm_bridge_active"), ( + "ctx.rgw.cephadm_bridge_active was lost" + ) From db85d1e3cff13591c7a1844fff31a5d3c2cb57cf Mon Sep 17 00:00:00 2001 From: Joshua Blanch Date: Fri, 8 Aug 2025 06:00:07 +0000 Subject: [PATCH 06/17] s3bridge: finds 'radosgw-admin' anywhere in args, splits prefix/admin_and_rest, builds new_args = prefix + cephadm_prefix + admin_and_rest, Signed-off-by: Joshua Blanch --- qa/tasks/cephadm_s3_bridge.py | 74 +++++++++++++++++++++-------------- 1 file changed, 44 insertions(+), 30 deletions(-) diff --git a/qa/tasks/cephadm_s3_bridge.py b/qa/tasks/cephadm_s3_bridge.py index e4be2a24fed25..b22245ffe2138 100644 --- a/qa/tasks/cephadm_s3_bridge.py +++ b/qa/tasks/cephadm_s3_bridge.py @@ -38,45 +38,59 @@ def patch_s3tests_radosgw_admin(ctx): """ Monkey patch teuthology remote execution to make radosgw-admin commands work inside cephadm containers when running s3tests. + + Many teuthology tasks (eg. s3tests, rgw helpers) invoke radosgw-admin with + wrapper prefixes like ["adjust-ulimits", "ceph-coverage", , ... , + "radosgw-admin", ...]. The original patch only matched when args[0] was + "radosgw-admin" which missed these cases. Here we detect radosgw-admin at + any position, split the prefix, and wrap only the radosgw-admin portion + inside a 'sudo shell -c ... -k ... -- ' call. """ - log.info("convert radosgw-admin to cephadm command") + log.info("Enabling cephadm-aware radosgw-admin monkey patch for s3tests") original_run = teuthology.orchestra.remote.Remote.run def cephadm_aware_run(self, **kwargs): args = kwargs.get("args", []) - if args and len(args) > 0 and args[0] == "radosgw-admin": - if detect_cephadm_deployment(ctx): + try: + # Locate the radosgw-admin binary within args (not just at index 0) + admin_idx = -1 + for i, a in enumerate(args): + if isinstance(a, str) and a == "radosgw-admin": + admin_idx = i + break + + if admin_idx != -1 and detect_cephadm_deployment(ctx): log.info(f"Intercepting radosgw-admin command: {args}") - try: - cluster_name = ( - list(ctx.ceph.keys())[0] if hasattr(ctx, "ceph") else "ceph" - ) - image = ctx.ceph[cluster_name].image - - cephadm_args = [ - "sudo", - "cephadm", - "--image", - image, - "shell", - "-c", - f"/etc/ceph/{cluster_name}.conf", - "-k", - f"/etc/ceph/{cluster_name}.client.admin.keyring", - "--fsid", - ctx.ceph[cluster_name].fsid, - "--", - ] + args - - log.info(f"Converted to cephadm shell command: {cephadm_args}") - kwargs["args"] = cephadm_args - - except Exception as e: - log.error(f"Failed to convert radosgw-admin to cephadm shell: {e}") - pass + cluster_name = list(ctx.ceph.keys())[0] if hasattr(ctx, "ceph") else "ceph" + image = ctx.ceph[cluster_name].image + fsid = ctx.ceph[cluster_name].fsid + cephadm_bin = getattr(ctx, "cephadm", "cephadm") + + # Everything before radosgw-admin should remain as-is + prefix = list(args[:admin_idx]) + admin_and_rest = list(args[admin_idx:]) + + cephadm_prefix = [ + "sudo", + cephadm_bin, + "--image", image, + "shell", + "-c", f"/etc/ceph/{cluster_name}.conf", + "-k", f"/etc/ceph/{cluster_name}.client.admin.keyring", + "--fsid", fsid, + "--", + ] + + new_args = prefix + cephadm_prefix + admin_and_rest + log.info(f"Converted to cephadm shell command: {new_args}") + kwargs["args"] = new_args + + except Exception as e: + # On any failure, fall back to original behavior + log.error(f"cephadm radosgw-admin monkey patch error: {e}") return original_run(self, **kwargs) From 8f2fa488e6b397c36f9725e86c7fe15f69371e4d Mon Sep 17 00:00:00 2001 From: Joshua Blanch Date: Fri, 8 Aug 2025 06:30:53 +0000 Subject: [PATCH 07/17] bump Signed-off-by: Joshua Blanch --- .../s3tests-bridge/bridge-test-only.yaml | 92 ++++--------------- qa/tasks/cephadm_s3_bridge.py | 44 ++++++--- 2 files changed, 45 insertions(+), 91 deletions(-) diff --git a/qa/suites/orch/cephadm/s3tests-bridge/bridge-test-only.yaml b/qa/suites/orch/cephadm/s3tests-bridge/bridge-test-only.yaml index d1cd13f3664d2..80c42d2339fdb 100644 --- a/qa/suites/orch/cephadm/s3tests-bridge/bridge-test-only.yaml +++ b/qa/suites/orch/cephadm/s3tests-bridge/bridge-test-only.yaml @@ -25,64 +25,17 @@ tasks: - echo "Bridge task completed - ctx.rgw should now be available for s3tests" - echo "Testing RGW accessibility..." -# This uses the same execution path that s3tests uses, so it will trigger our monkey patch -- python: +# Test the monkey patch by directly executing radosgw-admin commands +# The bridge should intercept these and run them inside cephadm containers +- exec: client.0: - - | - import logging - from io import StringIO - log = logging.getLogger(__name__) - - log.info("=== Testing radosgw-admin monkey patch ===") - - # Get the remote connection (same way s3tests does it) - remote = list(ctx.cluster.only('client.0').remotes.keys())[0] - - log.info("Testing user creation via remote.run() - this should trigger monkey patch") - try: - # This should be intercepted by our monkey patch and converted to cephadm shell - result = remote.run( - args=[ - 'radosgw-admin', 'user', 'create', - '--uid=testuser', - '--display-name=Test User', - '--access-key=testkey', - '--secret-key=testsecret' - ], - stdout=StringIO() - ) - log.info("✅ SUCCESS: radosgw-admin user create executed through monkey patch!") - output = result.stdout.getvalue() if hasattr(result.stdout, 'getvalue') else str(result.stdout) - log.info(f"Command output length: {len(output)} chars") - if 'testuser' in output or 'access_key' in output: - log.info("✅ User creation output looks correct") - else: - log.warning(f"⚠️ Unexpected output: {output[:200]}...") - except Exception as e: - log.error(f"❌ FAILED: radosgw-admin user create failed: {e}") - raise - - log.info("Testing user info retrieval via remote.run()") - try: - result = remote.run( - args=[ - 'radosgw-admin', 'user', 'info', - '--uid=testuser' - ], - stdout=StringIO() - ) - log.info("✅ SUCCESS: radosgw-admin user info executed through monkey patch!") - output = result.stdout.getvalue() if hasattr(result.stdout, 'getvalue') else str(result.stdout) - if 'testuser' in output and 'access_key' in output: - log.info("✅ User info output contains expected fields") - else: - log.warning(f"⚠️ User info output: {output[:200]}...") - except Exception as e: - log.error(f"❌ FAILED: radosgw-admin user info failed: {e}") - raise - - log.info("=== ✅ Monkey patch test completed successfully! ===") - log.info("This confirms that s3tests radosgw-admin commands will work with cephadm") + - echo "=== Testing radosgw-admin monkey patch ===" + - echo "Creating test user (this should be intercepted by bridge)..." + - radosgw-admin user create --uid=testuser --display-name="Test User" --access-key=testkey --secret-key=testsecret + - echo "✅ User creation succeeded!" + - echo "Retrieving user info..." + - radosgw-admin user info --uid=testuser + - echo "✅ User info retrieval succeeded!" # Test S3 endpoint accessibility - exec: @@ -92,23 +45,10 @@ tasks: - echo "RGW Response: $response" - 'if echo "$response" | grep -q "ListBucketResult\|Error\|xml\|ACCESS_DENIED"; then echo "✅ RGW is responding with valid S3 response"; else echo "✗ RGW not responding correctly"; fi' -# Cleanup test user using monkey patch (another test) -- python: +# Cleanup test user (another test of the monkey patch) +- exec: client.0: - - | - import logging - log = logging.getLogger(__name__) - - log.info("=== Testing cleanup via monkey patch ===") - remote = list(ctx.cluster.only('client.0').remotes.keys())[0] - - try: - result = remote.run(args=[ - 'radosgw-admin', 'user', 'rm', - '--uid=testuser' - ]) - log.info("✅ SUCCESS: User cleanup via monkey patch worked!") - except Exception as e: - log.warning(f"⚠️ Cleanup failed (this is often expected): {e}") - - log.info("=== Bridge functionality test completed ===") + - echo "=== Testing cleanup via monkey patch ===" + - echo "Removing test user..." + - radosgw-admin user rm --uid=testuser || echo "⚠️ Cleanup failed (this is often expected if user doesn't exist)" + - echo "=== Bridge functionality test completed ===" diff --git a/qa/tasks/cephadm_s3_bridge.py b/qa/tasks/cephadm_s3_bridge.py index b22245ffe2138..2d6ea09d021ac 100644 --- a/qa/tasks/cephadm_s3_bridge.py +++ b/qa/tasks/cephadm_s3_bridge.py @@ -177,30 +177,36 @@ def discover_cephadm_rgw_endpoints(ctx): ports = service.get("ports", []) status = service.get("status_desc", "") + log.info(f"Processing service: {service_name}, hostname: {hostname}, ports: {ports}, status: {status}") + if not service_name.startswith("rgw."): + log.debug(f"Skipping non-RGW service: {service_name}") continue - if status != "running": + if "running" not in status.lower(): log.warning(f"RGW service {service_name} is not running: {status}") - continue - - if not ports: - log.warning(f"No ports found for RGW service {service_name}") - continue + # Allow non-running services through for now, s3tests might still work + log.info(f"Continuing with non-running service {service_name} - s3tests might still work") # Extract port number (ports is typically ['8080/tcp'] format) port = None - for port_spec in ports: - if isinstance(port_spec, str) and "/" in port_spec: - port = int(port_spec.split("/")[0]) - break - elif isinstance(port_spec, int): - port = port_spec - break + if ports: + for port_spec in ports: + if isinstance(port_spec, str) and "/" in port_spec: + try: + port = int(port_spec.split("/")[0]) + break + except ValueError: + continue + elif isinstance(port_spec, int): + port = port_spec + break if port is None: log.warning(f"Could not parse port for RGW service {service_name}: {ports}") - continue + # Fall back to default RGW port 8080 + port = 8080 + log.info(f"Using default port {port} for {service_name}") endpoints[service_name] = { "hostname": hostname, @@ -209,6 +215,8 @@ def discover_cephadm_rgw_endpoints(ctx): "status": status, } + log.info(f"Added endpoint: {service_name} -> {hostname}:{port} (status: {status})") + log.info(f"Discovered RGW endpoints: {endpoints}") return endpoints @@ -351,7 +359,13 @@ def task(ctx, config): raise e if not discovered_endpoints: - raise ConfigError("No RGW services found via cephadm orchestrator") + log.error("No RGW services found via cephadm orchestrator") + log.error("This usually means:") + log.error(" 1. No RGW services have been deployed yet") + log.error(" 2. RGW services haven't started yet (check with 'ceph orch ps')") + log.error(" 3. cephadm bridge is running before RGW deployment") + log.error("Make sure to run cephadm.apply (with RGW service) and cephadm.wait_for_service before this bridge") + raise ConfigError("No RGW services found via cephadm orchestrator - see logs for troubleshooting steps") role_endpoints = map_roles_to_endpoints(ctx, config, discovered_endpoints) From 377a31174b93c5ac2ca85e12e3c0d1433ebb402e Mon Sep 17 00:00:00 2001 From: Joshua Blanch Date: Fri, 8 Aug 2025 06:56:19 +0000 Subject: [PATCH 08/17] get rid of tox Signed-off-by: Joshua Blanch --- qa/suites/orch/cephadm/s3tests-bridge/basic-s3tests.yaml | 3 --- qa/suites/orch/cephadm/s3tests-bridge/minimal-test.yaml | 2 -- 2 files changed, 5 deletions(-) diff --git a/qa/suites/orch/cephadm/s3tests-bridge/basic-s3tests.yaml b/qa/suites/orch/cephadm/s3tests-bridge/basic-s3tests.yaml index 53d7dfd3844f0..732eb76745b81 100644 --- a/qa/suites/orch/cephadm/s3tests-bridge/basic-s3tests.yaml +++ b/qa/suites/orch/cephadm/s3tests-bridge/basic-s3tests.yaml @@ -30,9 +30,6 @@ tasks: client.0: discover_from_cephadm: true -# note: tox is needed -- tox: [client.0] - - s3tests: client.0: rgw_server: client.0 diff --git a/qa/suites/orch/cephadm/s3tests-bridge/minimal-test.yaml b/qa/suites/orch/cephadm/s3tests-bridge/minimal-test.yaml index f1424fcb6874a..ba9c8b1b16eee 100644 --- a/qa/suites/orch/cephadm/s3tests-bridge/minimal-test.yaml +++ b/qa/suites/orch/cephadm/s3tests-bridge/minimal-test.yaml @@ -21,8 +21,6 @@ tasks: discover_from_cephadm: true dns_name: client.0 -- tox: [client.0] - - s3tests: client.0: rgw_server: client.0 From 83c65d82431fe43373a88eb6ef0485c713f7faf8 Mon Sep 17 00:00:00 2001 From: Joshua Blanch Date: Fri, 8 Aug 2025 14:39:38 +0000 Subject: [PATCH 09/17] bump Signed-off-by: Joshua Blanch --- .../cephadm/s3tests-bridge/basic-s3tests.yaml | 26 +++++------ .../s3tests-bridge/bridge-test-only.yaml | 34 +-------------- .../cephadm/s3tests-bridge/minimal-test.yaml | 43 ++++++++++--------- qa/tasks/cephadm_s3_bridge.py | 2 +- 4 files changed, 39 insertions(+), 66 deletions(-) diff --git a/qa/suites/orch/cephadm/s3tests-bridge/basic-s3tests.yaml b/qa/suites/orch/cephadm/s3tests-bridge/basic-s3tests.yaml index 732eb76745b81..784cdf4d8476c 100644 --- a/qa/suites/orch/cephadm/s3tests-bridge/basic-s3tests.yaml +++ b/qa/suites/orch/cephadm/s3tests-bridge/basic-s3tests.yaml @@ -30,16 +30,16 @@ tasks: client.0: discover_from_cephadm: true -- s3tests: - client.0: - rgw_server: client.0 - force-branch: master - conf: - DEFAULT: - is_secure: false - port: 8080 - calling_format: ordinary - fixtures: - bucket prefix: test-{random}- - # Only run 3 basic tests to verify bridge works - filter: "test_bucket_list_empty or test_bucket_create_naming_good_long_255" +# - s3tests: +# client.0: +# rgw_server: client.0 +# force-branch: master +# conf: +# DEFAULT: +# is_secure: false +# port: 8080 +# calling_format: ordinary +# fixtures: +# bucket prefix: test-{random}- +# # Only run 3 basic tests to verify bridge works +# filter: "test_bucket_list_empty or test_bucket_create_naming_good_long_255" diff --git a/qa/suites/orch/cephadm/s3tests-bridge/bridge-test-only.yaml b/qa/suites/orch/cephadm/s3tests-bridge/bridge-test-only.yaml index 80c42d2339fdb..58c99d7eee03f 100644 --- a/qa/suites/orch/cephadm/s3tests-bridge/bridge-test-only.yaml +++ b/qa/suites/orch/cephadm/s3tests-bridge/bridge-test-only.yaml @@ -19,36 +19,6 @@ tasks: - cephadm_s3_bridge: client.0: discover_from_cephadm: true - -- exec: - client.0: - - echo "Bridge task completed - ctx.rgw should now be available for s3tests" - - echo "Testing RGW accessibility..." - -# Test the monkey patch by directly executing radosgw-admin commands -# The bridge should intercept these and run them inside cephadm containers -- exec: - client.0: - - echo "=== Testing radosgw-admin monkey patch ===" - - echo "Creating test user (this should be intercepted by bridge)..." - - radosgw-admin user create --uid=testuser --display-name="Test User" --access-key=testkey --secret-key=testsecret - - echo "✅ User creation succeeded!" - - echo "Retrieving user info..." - - radosgw-admin user info --uid=testuser - - echo "✅ User info retrieval succeeded!" - -# Test S3 endpoint accessibility -- exec: - client.0: - - echo "Testing S3 endpoint accessibility..." - - 'response=$(curl -s http://localhost:8080/ 2>/dev/null || echo "CONNECTION_FAILED")' - - echo "RGW Response: $response" - - 'if echo "$response" | grep -q "ListBucketResult\|Error\|xml\|ACCESS_DENIED"; then echo "✅ RGW is responding with valid S3 response"; else echo "✗ RGW not responding correctly"; fi' - -# Cleanup test user (another test of the monkey patch) -- exec: +- rgw: client.0: - - echo "=== Testing cleanup via monkey patch ===" - - echo "Removing test user..." - - radosgw-admin user rm --uid=testuser || echo "⚠️ Cleanup failed (this is often expected if user doesn't exist)" - - echo "=== Bridge functionality test completed ===" + diff --git a/qa/suites/orch/cephadm/s3tests-bridge/minimal-test.yaml b/qa/suites/orch/cephadm/s3tests-bridge/minimal-test.yaml index ba9c8b1b16eee..5b8ccef88a806 100644 --- a/qa/suites/orch/cephadm/s3tests-bridge/minimal-test.yaml +++ b/qa/suites/orch/cephadm/s3tests-bridge/minimal-test.yaml @@ -16,6 +16,9 @@ tasks: - cephadm.wait_for_service: service: rgw.test +- rgw: + client.0: + - cephadm_s3_bridge: client.0: discover_from_cephadm: true @@ -23,23 +26,23 @@ tasks: - s3tests: client.0: - rgw_server: client.0 - force-branch: master - conf: - DEFAULT: - is_secure: false - port: 8080 - calling_format: ordinary - fixtures: - bucket prefix: s3test-{random}- - # POC run - exclude: - - test_bucket_policy* - - test_lifecycle* - - test_encryption* - - test_multipart_upload_size_too_small - - test_cors* - - test_website* - - test_logging* - - test_versioning* - filter: "test_bucket_list_empty or test_bucket_create or test_object_write or test_object_read" +# rgw_server: client.0 +# force-branch: master +# conf: +# DEFAULT: +# is_secure: false +# port: 8080 +# calling_format: ordinary +# fixtures: +# bucket prefix: s3test-{random}- +# # POC run +# exclude: +# - test_bucket_policy* +# - test_lifecycle* +# - test_encryption* +# - test_multipart_upload_size_too_small +# - test_cors* +# - test_website* +# - test_logging* +# - test_versioning* +# filter: "test_bucket_list_empty or test_bucket_create or test_object_write or test_object_read" diff --git a/qa/tasks/cephadm_s3_bridge.py b/qa/tasks/cephadm_s3_bridge.py index 2d6ea09d021ac..8d563f9c5d104 100644 --- a/qa/tasks/cephadm_s3_bridge.py +++ b/qa/tasks/cephadm_s3_bridge.py @@ -36,7 +36,7 @@ def detect_cephadm_deployment(ctx): def patch_s3tests_radosgw_admin(ctx): """ - Monkey patch teuthology remote execution to make radosgw-admin commands + patch teuthology remote execution to make radosgw-admin commands work inside cephadm containers when running s3tests. Many teuthology tasks (eg. s3tests, rgw helpers) invoke radosgw-admin with From 0fc86f6a504717f3c6e84bb3139c17c0514fe089 Mon Sep 17 00:00:00 2001 From: Joshua Blanch Date: Fri, 8 Aug 2025 15:09:45 +0000 Subject: [PATCH 10/17] bump Signed-off-by: Joshua Blanch --- qa/suites/orch/cephadm/s3tests-bridge/bridge-test-only.yaml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/qa/suites/orch/cephadm/s3tests-bridge/bridge-test-only.yaml b/qa/suites/orch/cephadm/s3tests-bridge/bridge-test-only.yaml index 58c99d7eee03f..6f1bcdc03d395 100644 --- a/qa/suites/orch/cephadm/s3tests-bridge/bridge-test-only.yaml +++ b/qa/suites/orch/cephadm/s3tests-bridge/bridge-test-only.yaml @@ -19,6 +19,11 @@ tasks: - cephadm_s3_bridge: client.0: discover_from_cephadm: true +- cephadm.shell: + host.a: + - ceph osd pool ls detail + - ceph orch ls + - ceph orch ps - rgw: client.0: From 3e332fed6ac821be9ba33ff3e82fa93ec5a15280 Mon Sep 17 00:00:00 2001 From: Joshua Blanch Date: Fri, 8 Aug 2025 15:13:30 +0000 Subject: [PATCH 11/17] bump Signed-off-by: Joshua Blanch --- qa/tasks/cephadm_s3_bridge.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/qa/tasks/cephadm_s3_bridge.py b/qa/tasks/cephadm_s3_bridge.py index 8d563f9c5d104..330f0a0def67d 100644 --- a/qa/tasks/cephadm_s3_bridge.py +++ b/qa/tasks/cephadm_s3_bridge.py @@ -329,6 +329,7 @@ def task(ctx, config): dns_name: rgw.example.com # optional rgw_service: rgw.myservice # optional, defaults to first found """ + log.info(f"Config received: {config}") if config is None: config = {} @@ -368,7 +369,7 @@ def task(ctx, config): raise ConfigError("No RGW services found via cephadm orchestrator - see logs for troubleshooting steps") role_endpoints = map_roles_to_endpoints(ctx, config, discovered_endpoints) - + log.info(f"Available roles: {config.keys() if config else 'No config'}") if not role_endpoints: log.error("No roles configured for RGW endpoint mapping") log.error( From 3a5df518b804451e21ae655292063e489231e2ec Mon Sep 17 00:00:00 2001 From: Joshua Blanch Date: Fri, 8 Aug 2025 15:16:34 +0000 Subject: [PATCH 12/17] contextlib decorator for task Signed-off-by: Joshua Blanch --- qa/tasks/cephadm_s3_bridge.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/qa/tasks/cephadm_s3_bridge.py b/qa/tasks/cephadm_s3_bridge.py index 330f0a0def67d..6c91a2c61a525 100644 --- a/qa/tasks/cephadm_s3_bridge.py +++ b/qa/tasks/cephadm_s3_bridge.py @@ -14,6 +14,7 @@ from teuthology import misc as teuthology from teuthology.exceptions import ConfigError import teuthology.orchestra.remote +import contextlib import sys import os @@ -318,6 +319,7 @@ def wait_for_rgw_accessibility(ctx, role_endpoints, timeout=60): log.info(f"RGW endpoint {role} is accessible") +@contextlib.contextmanager def task(ctx, config): """ Bridge task to make cephadm-deployed RGW compatible with s3tests. From ee5aa3953a32e255e3bfc02c2e990fdcf2acb327 Mon Sep 17 00:00:00 2001 From: Joshua Blanch Date: Fri, 8 Aug 2025 15:17:19 +0000 Subject: [PATCH 13/17] bump Signed-off-by: Joshua Blanch --- .../cephadm/s3tests-bridge/basic-s3tests.yaml | 26 +++++++++---------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/qa/suites/orch/cephadm/s3tests-bridge/basic-s3tests.yaml b/qa/suites/orch/cephadm/s3tests-bridge/basic-s3tests.yaml index 784cdf4d8476c..732eb76745b81 100644 --- a/qa/suites/orch/cephadm/s3tests-bridge/basic-s3tests.yaml +++ b/qa/suites/orch/cephadm/s3tests-bridge/basic-s3tests.yaml @@ -30,16 +30,16 @@ tasks: client.0: discover_from_cephadm: true -# - s3tests: -# client.0: -# rgw_server: client.0 -# force-branch: master -# conf: -# DEFAULT: -# is_secure: false -# port: 8080 -# calling_format: ordinary -# fixtures: -# bucket prefix: test-{random}- -# # Only run 3 basic tests to verify bridge works -# filter: "test_bucket_list_empty or test_bucket_create_naming_good_long_255" +- s3tests: + client.0: + rgw_server: client.0 + force-branch: master + conf: + DEFAULT: + is_secure: false + port: 8080 + calling_format: ordinary + fixtures: + bucket prefix: test-{random}- + # Only run 3 basic tests to verify bridge works + filter: "test_bucket_list_empty or test_bucket_create_naming_good_long_255" From e869d9eb94a6270c98c1ae97976c4aacedb7c34b Mon Sep 17 00:00:00 2001 From: Joshua Blanch Date: Fri, 8 Aug 2025 15:41:53 +0000 Subject: [PATCH 14/17] add back tox Signed-off-by: Joshua Blanch --- .../cephadm/s3tests-bridge/basic-s3tests.yaml | 1 + .../cephadm/s3tests-bridge/bridge-test-only.yaml | 16 ++++++++++++++-- .../cephadm/s3tests-bridge/minimal-test.yaml | 1 + 3 files changed, 16 insertions(+), 2 deletions(-) diff --git a/qa/suites/orch/cephadm/s3tests-bridge/basic-s3tests.yaml b/qa/suites/orch/cephadm/s3tests-bridge/basic-s3tests.yaml index 732eb76745b81..3540a36025670 100644 --- a/qa/suites/orch/cephadm/s3tests-bridge/basic-s3tests.yaml +++ b/qa/suites/orch/cephadm/s3tests-bridge/basic-s3tests.yaml @@ -30,6 +30,7 @@ tasks: client.0: discover_from_cephadm: true +- tox: [client.0] - s3tests: client.0: rgw_server: client.0 diff --git a/qa/suites/orch/cephadm/s3tests-bridge/bridge-test-only.yaml b/qa/suites/orch/cephadm/s3tests-bridge/bridge-test-only.yaml index 6f1bcdc03d395..8ac134f5170ce 100644 --- a/qa/suites/orch/cephadm/s3tests-bridge/bridge-test-only.yaml +++ b/qa/suites/orch/cephadm/s3tests-bridge/bridge-test-only.yaml @@ -24,6 +24,18 @@ tasks: - ceph osd pool ls detail - ceph orch ls - ceph orch ps -- rgw: +- tox: [client.0] + +- s3tests: client.0: - + rgw_server: client.0 + force-branch: master + conf: + DEFAULT: + is_secure: false + port: 8080 + calling_format: ordinary + fixtures: + bucket prefix: test-{random}- + # Only run 3 basic tests to verify bridge works + filter: "test_bucket_list_empty or test_bucket_create_naming_good_long_255" diff --git a/qa/suites/orch/cephadm/s3tests-bridge/minimal-test.yaml b/qa/suites/orch/cephadm/s3tests-bridge/minimal-test.yaml index 5b8ccef88a806..cc1967540d8c7 100644 --- a/qa/suites/orch/cephadm/s3tests-bridge/minimal-test.yaml +++ b/qa/suites/orch/cephadm/s3tests-bridge/minimal-test.yaml @@ -24,6 +24,7 @@ tasks: discover_from_cephadm: true dns_name: client.0 +- tox: [client.0] - s3tests: client.0: # rgw_server: client.0 From 6a2dd0238e51d7e3afcf260090d9090871ce81ac Mon Sep 17 00:00:00 2001 From: Joshua Blanch Date: Fri, 8 Aug 2025 18:59:11 +0000 Subject: [PATCH 15/17] remove adjust-ulimits and ceph-coverage Signed-off-by: Joshua Blanch --- qa/tasks/s3tests.py | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/qa/tasks/s3tests.py b/qa/tasks/s3tests.py index afef388fb8460..daae50ab5d824 100644 --- a/qa/tasks/s3tests.py +++ b/qa/tasks/s3tests.py @@ -133,8 +133,6 @@ def create_users(ctx, config, s3tests_conf): # create user ctx.cluster.only(client).run( args=[ - 'adjust-ulimits', - 'ceph-coverage', '{tdir}/archive/coverage'.format(tdir=testdir), 'radosgw-admin', '-n', client_with_id, @@ -152,8 +150,6 @@ def create_users(ctx, config, s3tests_conf): if not ctx.dbstore_variable: ctx.cluster.only(client).run( args=[ - 'adjust-ulimits', - 'ceph-coverage', '{tdir}/archive/coverage'.format(tdir=testdir), 'radosgw-admin', '-n', client_with_id, @@ -172,8 +168,6 @@ def create_users(ctx, config, s3tests_conf): if section=='iam': ctx.cluster.only(client).run( args=[ - 'adjust-ulimits', - 'ceph-coverage', '{tdir}/archive/coverage'.format(tdir=testdir), 'radosgw-admin', '-n', client_with_id, @@ -185,8 +179,6 @@ def create_users(ctx, config, s3tests_conf): ) ctx.cluster.only(client).run( args=[ - 'adjust-ulimits', - 'ceph-coverage', '{tdir}/archive/coverage'.format(tdir=testdir), 'radosgw-admin', '-n', client_with_id, @@ -220,8 +212,6 @@ def create_users(ctx, config, s3tests_conf): client_with_id = daemon_type + '.' + client_id ctx.cluster.only(client).run( args=[ - 'adjust-ulimits', - 'ceph-coverage', '{tdir}/archive/coverage'.format(tdir=testdir), 'radosgw-admin', '-n', client_with_id, From 00209f19d7ca44527133d91c0ee1e94cd8635e07 Mon Sep 17 00:00:00 2001 From: Joshua Blanch Date: Fri, 8 Aug 2025 20:31:33 +0000 Subject: [PATCH 16/17] removed coverage script from s3test Signed-off-by: Joshua Blanch --- qa/tasks/s3tests.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/qa/tasks/s3tests.py b/qa/tasks/s3tests.py index daae50ab5d824..c56545ab0385b 100644 --- a/qa/tasks/s3tests.py +++ b/qa/tasks/s3tests.py @@ -133,7 +133,6 @@ def create_users(ctx, config, s3tests_conf): # create user ctx.cluster.only(client).run( args=[ - '{tdir}/archive/coverage'.format(tdir=testdir), 'radosgw-admin', '-n', client_with_id, 'user', 'create', @@ -150,7 +149,6 @@ def create_users(ctx, config, s3tests_conf): if not ctx.dbstore_variable: ctx.cluster.only(client).run( args=[ - '{tdir}/archive/coverage'.format(tdir=testdir), 'radosgw-admin', '-n', client_with_id, 'mfa', 'create', @@ -168,7 +166,6 @@ def create_users(ctx, config, s3tests_conf): if section=='iam': ctx.cluster.only(client).run( args=[ - '{tdir}/archive/coverage'.format(tdir=testdir), 'radosgw-admin', '-n', client_with_id, 'caps', 'add', @@ -179,7 +176,6 @@ def create_users(ctx, config, s3tests_conf): ) ctx.cluster.only(client).run( args=[ - '{tdir}/archive/coverage'.format(tdir=testdir), 'radosgw-admin', '-n', client_with_id, 'caps', 'add', @@ -212,7 +208,6 @@ def create_users(ctx, config, s3tests_conf): client_with_id = daemon_type + '.' + client_id ctx.cluster.only(client).run( args=[ - '{tdir}/archive/coverage'.format(tdir=testdir), 'radosgw-admin', '-n', client_with_id, 'user', 'rm', From 887bd0a3d30339ad1e6a5ce74e9c42566b991126 Mon Sep 17 00:00:00 2001 From: Joshua Blanch Date: Sat, 9 Aug 2025 05:03:23 +0000 Subject: [PATCH 17/17] delete other tests Signed-off-by: Joshua Blanch --- .../s3tests-bridge/bridge-test-only.yaml | 41 ---------------- .../cephadm/s3tests-bridge/minimal-test.yaml | 49 ------------------- 2 files changed, 90 deletions(-) delete mode 100644 qa/suites/orch/cephadm/s3tests-bridge/bridge-test-only.yaml delete mode 100644 qa/suites/orch/cephadm/s3tests-bridge/minimal-test.yaml diff --git a/qa/suites/orch/cephadm/s3tests-bridge/bridge-test-only.yaml b/qa/suites/orch/cephadm/s3tests-bridge/bridge-test-only.yaml deleted file mode 100644 index 8ac134f5170ce..0000000000000 --- a/qa/suites/orch/cephadm/s3tests-bridge/bridge-test-only.yaml +++ /dev/null @@ -1,41 +0,0 @@ -roles: -- [host.a, mon.a, mgr.a, osd.0, osd.1, osd.2, client.0] - -tasks: -- cephadm: - -- cephadm.apply: - specs: - - service_type: rgw - service_id: test - placement: - host_pattern: "*" - spec: - rgw_frontend_port: 8080 - -- cephadm.wait_for_service: - service: rgw.test - -- cephadm_s3_bridge: - client.0: - discover_from_cephadm: true -- cephadm.shell: - host.a: - - ceph osd pool ls detail - - ceph orch ls - - ceph orch ps -- tox: [client.0] - -- s3tests: - client.0: - rgw_server: client.0 - force-branch: master - conf: - DEFAULT: - is_secure: false - port: 8080 - calling_format: ordinary - fixtures: - bucket prefix: test-{random}- - # Only run 3 basic tests to verify bridge works - filter: "test_bucket_list_empty or test_bucket_create_naming_good_long_255" diff --git a/qa/suites/orch/cephadm/s3tests-bridge/minimal-test.yaml b/qa/suites/orch/cephadm/s3tests-bridge/minimal-test.yaml deleted file mode 100644 index cc1967540d8c7..0000000000000 --- a/qa/suites/orch/cephadm/s3tests-bridge/minimal-test.yaml +++ /dev/null @@ -1,49 +0,0 @@ -roles: -- [host.a, mon.a, mgr.a, osd.0, osd.1, osd.2, client.0] - -tasks: -- cephadm: - -- cephadm.apply: - specs: - - service_type: rgw - service_id: test - placement: - host_pattern: "*" - spec: - rgw_frontend_port: 8080 - -- cephadm.wait_for_service: - service: rgw.test - -- rgw: - client.0: - -- cephadm_s3_bridge: - client.0: - discover_from_cephadm: true - dns_name: client.0 - -- tox: [client.0] -- s3tests: - client.0: -# rgw_server: client.0 -# force-branch: master -# conf: -# DEFAULT: -# is_secure: false -# port: 8080 -# calling_format: ordinary -# fixtures: -# bucket prefix: s3test-{random}- -# # POC run -# exclude: -# - test_bucket_policy* -# - test_lifecycle* -# - test_encryption* -# - test_multipart_upload_size_too_small -# - test_cors* -# - test_website* -# - test_logging* -# - test_versioning* -# filter: "test_bucket_list_empty or test_bucket_create or test_object_write or test_object_read"