From 53a1a4587549a35c9694b0689f07ceb1d5a27e11 Mon Sep 17 00:00:00 2001 From: "a.b.christie" Date: Mon, 26 Jan 2026 19:01:13 +0000 Subject: [PATCH 1/5] fix: Fix recovery (s3 fuse) --- sql-recovery/docker-entrypoint.sh | 4 +-- sql-recovery/recovery.py | 45 ++++++++++++++++++++++++++++++- 2 files changed, 46 insertions(+), 3 deletions(-) diff --git a/sql-recovery/docker-entrypoint.sh b/sql-recovery/docker-entrypoint.sh index 439453c..3de62ac 100755 --- a/sql-recovery/docker-entrypoint.sh +++ b/sql-recovery/docker-entrypoint.sh @@ -60,13 +60,13 @@ if [ -v BACKUP_VOLUME_IS_S3 ]; then S3FS_EXTRA_OPTIONS+=" -o ${BACKUP_VOLUME_S3_REQUEST_STYLE}" fi - # Create the target directory ('/backup') + # Create the target directory ('/recovery') # and then invoke s3fs mkdir -p /backup S3FS_CMD_OPTIONS="-o passwd_file=/tmp/.passwd-s3fs ${S3FS_EXTRA_OPTIONS}" echo "--] s3fs AWS_BUCKET_NAME=${AWS_BUCKET_NAME}" echo "--] s3fs S3FS_CMD_OPTIONS=${S3FS_CMD_OPTIONS}" - s3fs ${AWS_BUCKET_NAME} /backup ${S3FS_CMD_OPTIONS} + s3fs ${AWS_BUCKET_NAME} /recovery ${S3FS_CMD_OPTIONS} # And then wait (to avoid weird /backup root directories not existing) echo "--] Sleeping for 4 seconds..." diff --git a/sql-recovery/recovery.py b/sql-recovery/recovery.py index 9dba772..c71d395 100755 --- a/sql-recovery/recovery.py +++ b/sql-recovery/recovery.py @@ -83,6 +83,30 @@ If you set this to any value the recovery will not stop if there are errors. The default is to stop the recovery if there are errors. +Variables for (AWS) S3 backup (synchronisation). +If the AWS_BUCKET_NAME is set the code assumes that the bucket is mapped +to the expected backup mount point (RECOVERY_ROOT_DIR) by the +'docker-entrypoint.sh' script, which is achieved using 's3fs'. + +If you are using S3 you must not mount a backup directory at '/recovery' +as the underlying 'docker-entrypoint.sh' script will do this for you and +any existing backup directory may cause an error or at the very least confusion. + +- AWS_BUCKET_NAME + + If set, it's the name of an AWS S3 bucket (default ''). + If set AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY must be set. + +- AWS_ACCESS_KEY_ID + + The access key ID for write-access to the named bucket. + This must be set if AWS_BUCKET_NAME is set. + +- AWS_SECRET_ACCESS_KEY + + The secret access key. + This must be set if AWS_BUCKET_NAME is set. + Variables for rclone bucket support. If the USE_RCLONE variable is set the code assumes that the user wants to synchronise the recovery directory with a remote bucket using rclone @@ -126,6 +150,7 @@ ERROR_MISSING_RCLONE_BUCKET_AND_PATH = 6 ERROR_MISSING_RCLONE_VARIABLE = 7 ERROR_RCLONE_FAILED = 8 +ERROR_INCOMPLETE_AWS = 9 # Supported database flavours... FLAVOUR_POSTGRESQL = 'postgresql' @@ -155,6 +180,7 @@ PGADMINPASS = os.environ.get('PGADMINPASS', '-') HOME = os.environ['HOME'] # AWS/rclone S3 material... +AWS_BUCKET_NAME = os.environ.get('AWS_BUCKET_NAME', '') AWS_ACCESS_KEY_ID = os.environ.get('AWS_ACCESS_KEY_ID', '') AWS_SECRET_ACCESS_KEY = os.environ.get('AWS_SECRET_ACCESS_KEY', '') AWS_DEFAULT_REGION = os.environ.get('AWS_DEFAULT_REGION', '') @@ -268,6 +294,22 @@ def error(error_no): sys.exit(0) +if AWS_BUCKET_NAME: + print('# AWS_BUCKET_NAME = %s' % AWS_BUCKET_NAME) + +# If AWS_BUCKET_NAME is defined +# we must have all or nothing with regard to AWS info +AWS_VAR_COUNT = 0 +if AWS_BUCKET_NAME: + AWS_VAR_COUNT += 1 +if AWS_ACCESS_KEY_ID: + AWS_VAR_COUNT += 1 +if AWS_SECRET_ACCESS_KEY: + AWS_VAR_COUNT += 1 +if AWS_BUCKET_NAME and AWS_VAR_COUNT != 3: + print('--] If specifying AWS_BUCKET_NAME you must define all the AWS variables') + error(ERROR_INCOMPLETE_AWS) + RECOVERY_START_TIME = datetime.now() print('--] Hello [%s]' % RECOVERY_START_TIME) @@ -438,7 +480,8 @@ def error(error_no): elif LATEST_BACKUP_MAXIMUM_AGE_H and not LATEST_BACKUP: # Given maximum age but there appear to be no backups! print('--] There is no "latest" backup!') - print('--] I was told to expect an age of no more than %s hours.') + print('--] I was told to expect an age of no more than %s hours.' + % LATEST_BACKUP_MAXIMUM_AGE_H) print('--] You need to check that backups are running.') error(ERROR_NO_LATEST) From ddf176cf30e141e19f38b591c5caae91b3b14e54 Mon Sep 17 00:00:00 2001 From: "a.b.christie" Date: Tue, 27 Jan 2026 07:46:55 +0000 Subject: [PATCH 2/5] fix: Fix "current user cannot be dropped" on recovery --- sql-recovery/recovery.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/sql-recovery/recovery.py b/sql-recovery/recovery.py index c71d395..55488aa 100755 --- a/sql-recovery/recovery.py +++ b/sql-recovery/recovery.py @@ -173,6 +173,7 @@ DATABASE_EXPECTED_COUNT = os.environ.get('DATABASE_EXPECTED_COUNT', '') # A specific database? DATABASE = os.environ.get('DATABASE', '') +RECOVERY_DATABASE_ROOT_USER = os.environ.get('RECOVERY_DATABASE_ROOT_USER', 'postgres') # Extract configuration from the environment. # Postgres material... PGHOST = os.environ.get('PGHOST', '') @@ -544,6 +545,18 @@ def error(error_no): write_termination_message('Unpack failed') sys.exit(0) +# Filter - Remove actions on the root (postgres) user, which will otherwise +# cause the recovery to fail with "current user cannot be dropped". +# We only need to do this if the recovery is from a dumpall. +if DATABASE: + TARGET_FILE = "%s/%s.sql" % (RECOVERY_ROOT_DIR, DECOMPRESSED_RECOVERY_NAME) + FILTER_PG_CMD = "egrep -v '^(CREATE|DROP) ROLE %s;' %s > filtered.sql ; mv filtered.sql %s" % (RECOVERY_DATABASE_ROOT_USER, TARGET_FILE, TARGET_FILE) + print(" $", FILTER_PG_CMD) + COMPLETED_PROCESS = subprocess.run(FILTER_PG_CMD, + shell=True, + stderr=subprocess.PIPE, + check=False) + print(" $", RECOVERY_CMD) COMPLETED_PROCESS = subprocess.run(RECOVERY_CMD, shell=True, From 8281c2ca9e2d759908500dfe537bb9398c63a121 Mon Sep 17 00:00:00 2001 From: "a.b.christie" Date: Tue, 27 Jan 2026 08:00:36 +0000 Subject: [PATCH 3/5] fix: Fix recovery typo --- sql-recovery/recovery.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql-recovery/recovery.py b/sql-recovery/recovery.py index 55488aa..05a2722 100755 --- a/sql-recovery/recovery.py +++ b/sql-recovery/recovery.py @@ -548,7 +548,7 @@ def error(error_no): # Filter - Remove actions on the root (postgres) user, which will otherwise # cause the recovery to fail with "current user cannot be dropped". # We only need to do this if the recovery is from a dumpall. -if DATABASE: +if not DATABASE: TARGET_FILE = "%s/%s.sql" % (RECOVERY_ROOT_DIR, DECOMPRESSED_RECOVERY_NAME) FILTER_PG_CMD = "egrep -v '^(CREATE|DROP) ROLE %s;' %s > filtered.sql ; mv filtered.sql %s" % (RECOVERY_DATABASE_ROOT_USER, TARGET_FILE, TARGET_FILE) print(" $", FILTER_PG_CMD) From ec2228f9211f437ddc049ac97787d49c86ce328d Mon Sep 17 00:00:00 2001 From: "a.b.christie" Date: Tue, 27 Jan 2026 08:50:54 +0000 Subject: [PATCH 4/5] fix: Fix for IF EXISTS --- sql-recovery/recovery.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sql-recovery/recovery.py b/sql-recovery/recovery.py index 05a2722..187cfda 100755 --- a/sql-recovery/recovery.py +++ b/sql-recovery/recovery.py @@ -1,7 +1,9 @@ #!/usr/bin/env python """A simple module to recover a backup created with pg_dumpall (or pg_dump) -(as performed by our sql-backup container). +(as performed by our sql-backup container). Importantly, +any pg_dumpall backup is expected to have been created with +"--clean" and "-if-exists" options. The backup directory (BACKUP_ROOT_DIR) is expected to have been mounted as a volume in the container image. @@ -550,7 +552,7 @@ def error(error_no): # We only need to do this if the recovery is from a dumpall. if not DATABASE: TARGET_FILE = "%s/%s.sql" % (RECOVERY_ROOT_DIR, DECOMPRESSED_RECOVERY_NAME) - FILTER_PG_CMD = "egrep -v '^(CREATE|DROP) ROLE %s;' %s > filtered.sql ; mv filtered.sql %s" % (RECOVERY_DATABASE_ROOT_USER, TARGET_FILE, TARGET_FILE) + FILTER_PG_CMD = "egrep -v '^(CREATE|DROP) ROLE IF EXISTS %s;' %s > filtered.sql ; mv filtered.sql %s" % (RECOVERY_DATABASE_ROOT_USER, TARGET_FILE, TARGET_FILE) print(" $", FILTER_PG_CMD) COMPLETED_PROCESS = subprocess.run(FILTER_PG_CMD, shell=True, From 3e6d11585a83e0e03a1b4ef890cc919a35780196 Mon Sep 17 00:00:00 2001 From: "a.b.christie" Date: Tue, 27 Jan 2026 09:36:52 +0000 Subject: [PATCH 5/5] fix: Fix filter command --- sql-recovery/recovery.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/sql-recovery/recovery.py b/sql-recovery/recovery.py index 187cfda..5d77fea 100755 --- a/sql-recovery/recovery.py +++ b/sql-recovery/recovery.py @@ -205,6 +205,7 @@ # The recovery root dir, where backups are unpacked. RECOVERY_ROOT_DIR = '/recovery' DECOMPRESSED_RECOVERY_NAME = 'recovery' +DECOMPRESSED_FILTERED_NAME = 'filtered' BACKUP_FILE_PREFIX = 'backup' # Recovery commands for the various database flavours... @@ -551,8 +552,9 @@ def error(error_no): # cause the recovery to fail with "current user cannot be dropped". # We only need to do this if the recovery is from a dumpall. if not DATABASE: - TARGET_FILE = "%s/%s.sql" % (RECOVERY_ROOT_DIR, DECOMPRESSED_RECOVERY_NAME) - FILTER_PG_CMD = "egrep -v '^(CREATE|DROP) ROLE IF EXISTS %s;' %s > filtered.sql ; mv filtered.sql %s" % (RECOVERY_DATABASE_ROOT_USER, TARGET_FILE, TARGET_FILE) + RECOVERY_FILE = "%s/%s.sql" % (RECOVERY_ROOT_DIR, DECOMPRESSED_RECOVERY_NAME) + FILTERED_FILE = "%s/%s.sql" % (RECOVERY_ROOT_DIR, DECOMPRESSED_FILTERED_NAME) + FILTER_PG_CMD = "egrep -v '^(CREATE ROLE|DROP ROLE IF EXISTS) %s;' %s > %s ; mv %s %s" % (RECOVERY_DATABASE_ROOT_USER, RECOVERY_FILE, FILTERED_FILE, FILTERED_FILE, RECOVERY_FILE) print(" $", FILTER_PG_CMD) COMPLETED_PROCESS = subprocess.run(FILTER_PG_CMD, shell=True, @@ -626,6 +628,16 @@ def error(error_no): print('--] OK - Found the expected number of databases (%s).' % DATABASE_EXPECTED_COUNT) +# Remove any files we may have created. +RECOVERY_FILE = "%s/%s.sql" % (RECOVERY_ROOT_DIR, DECOMPRESSED_RECOVERY_NAME) +FILTERED_FILE = "%s/%s.sql" % (RECOVERY_ROOT_DIR, DECOMPRESSED_FILTERED_NAME) +if os.path.isfile(RECOVERY_FILE): + print('--] Removing %s' % RECOVERY_FILE) + os.remove(RECOVERY_FILE) +if os.path.isfile(FILTERED_FILE): + print('--] Removing %s' % FILTERED_FILE) + os.remove(FILTERED_FILE) + # Success if we get here write_termination_message()