Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Empty file.
45 changes: 45 additions & 0 deletions qa/suites/orch/cephadm/clyso/upgrade/1-start-ces.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
roles:
- - host.a
- mon.a
- mgr.a
- osd.0
- - host.b
- osd.1
- mgr.b
- client.0
- - host.b
- osd.2
- mgr.c

tasks:
# Install system packages via pexec (avoids CEPH RPM installation)
- pexec:
all:
- sudo dnf install s3cmd curl jq -y

- cephadm:

- cephadm.shell:
host.a:
- ceph status
- ceph orch ps
- ceph version
- echo "Starting with CES version (from override YAML)"

openstack:
- volumes:
count: 4
size: 20

overrides:
ceph:
conf:
osd:
osd shutdown pgref assert: true
log-only-match:
- CEPHADM_
log-ignorelist:
- CEPHADM_DAEMON_PLACE_FAIL
- CEPHADM_FAILED_DAEMON
- CEPHADM_STRAY_DAEMON
- CEPHADM_AGENT_DOWN
43 changes: 43 additions & 0 deletions qa/suites/orch/cephadm/clyso/upgrade/2-create-baseline.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
tasks:
- cephadm.apply:
specs:
- service_type: rgw
service_id: foo
placement:
host_pattern: "*"
spec:
rgw_frontend_port: 8080

# it will be named rgw.foo for some reason
- cephadm.wait_for_service:
service: rgw.foo

- cephadm.shell:
host.a:
- ceph status

# - cephadm.shell:
# host.a:
# - ceph_test_rgw_obj

# - workunit:
# clients:
# client.0:
# - rgw/test_rgw_obj.sh

# client.0:
# - |
# cat > /tmp/s3cfg << 'EOF'
# [default]
# access_key = ceskey
# secret_key = cessecret
# host_base = host.b:8080
# host_bucket = host.b:8080
# use_https = False
# signature_v2 = True
# EOF
# - s3cmd -c /tmp/s3cfg mb s3://ces-baseline-bucket
# - echo "CES S3 baseline data - must survive downgrade!" > /tmp/ces-s3-baseline.txt
# - s3cmd -c /tmp/s3cfg put /tmp/ces-s3-baseline.txt s3://ces-baseline-bucket/ces-baseline-s3-object.txt
# - s3cmd -c /tmp/s3cfg ls s3://ces-baseline-bucket/
# - echo "CES S3 baseline data created successfully"
149 changes: 149 additions & 0 deletions qa/suites/orch/cephadm/clyso/upgrade/3-downgrade-upstream.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
tasks:
- cephadm.shell:
host.a:
- echo "PRE-DOWNGRADE CES VERSION:"
- ceph version
- ceph orch ps

- echo "Starting downgrade from CES to upstream CEPH v18.2.7..."
- ceph orch upgrade start --image quay.io/ceph/ceph:v18.2.7
- sleep 30

- |
cat > /tmp/upgrade_monitor.sh << 'EOF'
#!/bin/bash

# Upgrade monitoring script for cephadm upgrade tests
# Monitors upgrade/downgrade completion by checking both upgrade status and daemon versions

set -e

TARGET_VERSION="$1"
OPERATION="$2" # "upgrade" or "downgrade"
BASE_IMAGE_NAME="${3:-base}"
TARGET_IMAGE_NAME="${4:-target}"
TIMEOUT="${5:-2400}"

if [ -z "$TARGET_VERSION" ] || [ -z "$OPERATION" ]; then
echo "Usage: $0 <target_version> <upgrade|downgrade> [base_image_name] [target_image_name] [timeout_seconds]"
exit 1
fi

echo "=== CEPH Upgrade Monitor Started ==="
echo "Base image: $BASE_IMAGE_NAME"
echo "Target image: $TARGET_IMAGE_NAME"
echo "Operation: $OPERATION to $TARGET_VERSION"
echo "Timeout: ${TIMEOUT}s"
echo "Start time: $(date)"

echo "=== Capturing Baseline Version ==="
ceph versions
baseline_version=$(ceph versions --format json | jq -r ".overall | keys[0]")
echo "Baseline version: $baseline_version"

echo "=== Starting Upgrade Monitoring ==="
start_time=$(date +%s)

while true; do
current_time=$(date +%s)
elapsed=$((current_time - start_time))

echo ""
echo "=== Upgrade Status (Elapsed: ${elapsed}s) ==="
echo "Time: $(date)"

echo "--- Orchestrator Upgrade Status ---"
upgrade_status=$(ceph orch upgrade status --format json)
echo "$upgrade_status"

echo "--- Daemon Versions ---"
ceph versions

in_progress=$(echo "$upgrade_status" | jq -r ".in_progress")
version_count=$(ceph versions --format json | jq ".overall | length")

echo "Upgrade in progress: $in_progress"
echo "Number of different versions running: $version_count"

if [ "$in_progress" = "false" ] && [ "$version_count" -eq 1 ]; then
current_version=$(ceph versions --format json | jq -r ".overall | keys[0]")
echo "All daemons now on: $current_version"

if [ "$current_version" != "$baseline_version" ]; then
echo ""
echo "=== SUCCESS: Upgrade Completed ==="
echo "From: $baseline_version"
echo "To: $current_version"
echo "Base image: $BASE_IMAGE_NAME"
echo "Target image: $TARGET_IMAGE_NAME"
echo "Total time: ${elapsed}s"
echo "End time: $(date)"
break
else
echo ""
echo "=== SUCCESS: Already on Target Version ==="
echo "Current version: $current_version"
echo "Base image: $BASE_IMAGE_NAME"
echo "Target image: $TARGET_IMAGE_NAME"
echo "Total time: ${elapsed}s"
echo "End time: $(date)"
break
fi
else
echo "Upgrade still in progress or daemons on mixed versions"
if [ "$version_count" -gt 1 ]; then
echo "--- Version Breakdown ---"
ceph versions --format json | jq ".overall"
fi
fi

if echo "$upgrade_status" | jq -r ".message" | grep -q -i "error\|fail"; then
echo ""
echo "=== ERROR: Upgrade Failed ==="
echo "Upgrade status shows error or failure"
echo "$upgrade_status"
exit 1
fi

if [ $elapsed -ge $TIMEOUT ]; then
echo ""
echo "=== ERROR: Upgrade Timeout ==="
echo "Upgrade did not complete within $TIMEOUT seconds"
echo "Current status:"
echo "$upgrade_status"
ceph versions
exit 1
fi

echo "Waiting 60 seconds before next check..."
sleep 60
done

echo ""
echo "=== Final Verification ==="
ceph health detail
ceph orch ps
ceph status

echo ""
echo "=== Upgrade Monitor Completed Successfully ==="
EOF

chmod +x /tmp/upgrade_monitor.sh
/tmp/upgrade_monitor.sh "v18.2.7" "downgrade" "CES" "Upstream v18.2.7" "1800"

- echo "POST-DOWNGRADE UPSTREAM VERSION:"
- ceph version
- ceph orch ps
- ceph -s


overrides:
ceph:
log-ignorelist:
- CEPHADM_STRAY_DAEMON
- CEPHADM_FAILED_DAEMON
- CEPHADM_AGENT_DOWN
- CEPHADM_DAEMON_PLACE_FAIL
log-only-match:
- CEPHADM_
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# Cross-distro test: Start with CES, upgrade to Upstream, downgrade back to CES
base_image: "harbor.clyso.com/ces/ceph/ceph:ces-v25.03.2-rc.4"
target_image: "quay.io/ceph/ceph:v18.2.7"
base_image_name: "CES-v25.03.2-rc.4"
target_image_name: "Upstream-18.2.7"

overrides:
ceph:
image: "harbor.clyso.com/ces/ceph/ceph:ces-v25.03.2-rc.4"
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# Cross-distro test: Start with Upstream, upgrade to CES, downgrade back to Upstream
base_image: "quay.io/ceph/ceph:v18.2.7"
target_image: "harbor.clyso.com/ces/ceph/ceph:ces-v25.03.2-rc.4"
base_image_name: "Upstream-18.2.7"
target_image_name: "CES-v25.03.2-rc.4"

overrides:
ceph:
image: "quay.io/ceph/ceph:v18.2.7"
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
roles:
- - host.a
- mon.a
- mgr.a
- osd.0
- - host.b
- mgr.b
- osd.1
- client.0

tasks:
# Install system packages via pexec (avoids CEPH RPM installation)
- pexec:
all:
- sudo dnf install s3cmd curl jq -y

- cephadm:

- cephadm.shell:
host.a:
- echo "=== BOOTSTRAP COMPLETE ==="
- ceph orch status
- ceph orch ps
- ceph version
- ceph -s
- ceph orch device ls

openstack:
- volumes:
count: 4
size: 20

overrides:
ceph:
conf:
osd:
osd shutdown pgref assert: true
log-only-match:
- CEPHADM_
log-ignorelist:
- CEPHADM_DAEMON_PLACE_FAIL
- CEPHADM_FAILED_DAEMON
- CEPHADM_STRAY_DAEMON
- CEPHADM_AGENT_DOWN
Loading
Loading