Add database and file restore tools

tools/restore_databases.sh: Script to restore PostgreSQL and Qdrant
backups from encrypted backup files.

tools/restore_files.py: Python script to restore Fernet-encrypted
file backups.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Daniel O'Connell 2025-12-19 18:38:25 +01:00
parent 116d0362a2
commit a1444efaac
2 changed files with 390 additions and 0 deletions

208
tools/restore_databases.sh Executable file
View File

@ -0,0 +1,208 @@
#!/bin/bash
# Restore Postgres and Qdrant databases from S3 backups
# Usage: ./restore_databases.sh [DATE]
# Example: ./restore_databases.sh 20251219
set -euo pipefail
# Configuration - read from environment or use defaults
BUCKET="${S3_BACKUP_BUCKET:-equistamp-memory-backup}"
PREFIX="${S3_BACKUP_PREFIX:-Daniel}/databases"
REGION="${S3_BACKUP_REGION:-eu-central-1}"
PASSWORD="${BACKUP_ENCRYPTION_KEY:?BACKUP_ENCRYPTION_KEY not set}"
# Target services - adjust for your environment
POSTGRES_HOST="${POSTGRES_HOST:-localhost}"
POSTGRES_PORT="${POSTGRES_PORT:-5432}"
POSTGRES_USER="${POSTGRES_USER:-kb}"
POSTGRES_DB="${POSTGRES_DB:-kb}"
QDRANT_URL="${QDRANT_URL:-http://localhost:6333}"
# Date to restore (default: list available backups)
DATE="${1:-}"
# Temp directory for downloads
TEMP_DIR=$(mktemp -d)
trap "rm -rf ${TEMP_DIR}" EXIT
log() {
echo "[$(date '+%Y-%m-%d %H:%M:%S')] $*"
}
error() {
echo "[$(date '+%Y-%m-%d %H:%M:%S')] ERROR: $*" >&2
}
# List available backups
list_backups() {
log "Available PostgreSQL backups:"
aws s3 ls "s3://${BUCKET}/${PREFIX}/" --region "${REGION}" | grep "postgres-" | awk '{print " " $4}' | sort -r | head -10
echo ""
log "Available Qdrant backups:"
aws s3 ls "s3://${BUCKET}/${PREFIX}/" --region "${REGION}" | grep "qdrant-" | awk '{print " " $4}' | sort -r | head -10
}
# Restore PostgreSQL
restore_postgres() {
local date=$1
local s3_path="s3://${BUCKET}/${PREFIX}/postgres-${date}.sql.gz.enc"
local sql_file="${TEMP_DIR}/postgres_restore.sql"
log "Checking if Postgres backup exists: ${s3_path}"
if ! aws s3 ls "${s3_path}" --region "${REGION}" >/dev/null 2>&1; then
error "Postgres backup not found: ${s3_path}"
return 1
fi
log "Downloading and decrypting Postgres backup..."
if ! aws s3 cp "${s3_path}" - --region "${REGION}" | \
openssl enc -d -aes-256-cbc -pbkdf2 -pass "pass:${PASSWORD}" | \
gunzip > "${sql_file}"; then
error "Failed to download/decrypt Postgres backup"
return 1
fi
log "Postgres backup decrypted ($(du -h "${sql_file}" | cut -f1))"
# Check if we can connect to postgres
log "Testing PostgreSQL connection..."
if ! PGPASSWORD="${PGPASSWORD:-}" psql -h "${POSTGRES_HOST}" -p "${POSTGRES_PORT}" -U "${POSTGRES_USER}" -d "${POSTGRES_DB}" -c "SELECT 1" >/dev/null 2>&1; then
error "Cannot connect to PostgreSQL at ${POSTGRES_HOST}:${POSTGRES_PORT}"
error "Set PGPASSWORD environment variable or check connection settings"
log "SQL dump saved to: ${sql_file}"
log "You can restore manually with: psql -h ${POSTGRES_HOST} -U ${POSTGRES_USER} -d ${POSTGRES_DB} < ${sql_file}"
return 1
fi
log "Restoring to PostgreSQL..."
if PGPASSWORD="${PGPASSWORD:-}" psql -h "${POSTGRES_HOST}" -p "${POSTGRES_PORT}" -U "${POSTGRES_USER}" -d "${POSTGRES_DB}" < "${sql_file}"; then
log "PostgreSQL restore completed successfully"
return 0
else
error "PostgreSQL restore failed (some errors may be expected for existing objects)"
return 1
fi
}
# Restore Qdrant
restore_qdrant() {
local date=$1
local s3_path="s3://${BUCKET}/${PREFIX}/qdrant-${date}.snapshot.enc"
local snapshot_file="${TEMP_DIR}/qdrant_restore.snapshot"
log "Checking if Qdrant backup exists: ${s3_path}"
if ! aws s3 ls "${s3_path}" --region "${REGION}" >/dev/null 2>&1; then
error "Qdrant backup not found: ${s3_path}"
return 1
fi
log "Downloading and decrypting Qdrant backup..."
if ! aws s3 cp "${s3_path}" - --region "${REGION}" | \
openssl enc -d -aes-256-cbc -pbkdf2 -pass "pass:${PASSWORD}" \
> "${snapshot_file}"; then
error "Failed to download/decrypt Qdrant backup"
return 1
fi
log "Qdrant backup decrypted ($(du -h "${snapshot_file}" | cut -f1))"
# Check if Qdrant is reachable
log "Testing Qdrant connection..."
if ! curl -sf "${QDRANT_URL}/readyz" >/dev/null 2>&1; then
error "Cannot connect to Qdrant at ${QDRANT_URL}"
log "Snapshot saved to: ${snapshot_file}"
log "You can restore manually by uploading to Qdrant"
return 1
fi
log "Uploading snapshot to Qdrant..."
local upload_response
if ! upload_response=$(curl -sf -X POST "${QDRANT_URL}/snapshots/upload?wait=true" \
-H "Content-Type: multipart/form-data" \
-F "snapshot=@${snapshot_file}" 2>&1); then
error "Failed to upload snapshot to Qdrant: ${upload_response}"
return 1
fi
log "Snapshot uploaded, recovering..."
# Extract the snapshot filename from the response
local snapshot_name
if command -v jq >/dev/null 2>&1; then
snapshot_name=$(echo "${upload_response}" | jq -r '.result.name // empty')
else
snapshot_name=$(echo "${upload_response}" | grep -o '"name":"[^"]*"' | cut -d'"' -f4)
fi
if [ -z "${snapshot_name}" ]; then
log "Upload response: ${upload_response}"
log "Snapshot uploaded but could not extract name. Check Qdrant manually."
return 0
fi
log "Recovering from snapshot: ${snapshot_name}"
if curl -sf -X PUT "${QDRANT_URL}/snapshots/recover" \
-H "Content-Type: application/json" \
-d "{\"location\": \"file:///qdrant/snapshots/${snapshot_name}\"}" >/dev/null; then
log "Qdrant restore completed successfully"
return 0
else
error "Qdrant recovery failed"
return 1
fi
}
# Main
main() {
if [ -z "${DATE}" ]; then
log "No date specified. Listing available backups..."
echo ""
list_backups
echo ""
log "Usage: $0 <DATE>"
log "Example: $0 20251219"
exit 0
fi
log "Starting database restore for date: ${DATE}"
echo ""
local postgres_result=0
local qdrant_result=0
# Restore Postgres
echo "=========================================="
echo " PostgreSQL Restore"
echo "=========================================="
if ! restore_postgres "${DATE}"; then
postgres_result=1
fi
echo ""
# Restore Qdrant
echo "=========================================="
echo " Qdrant Restore"
echo "=========================================="
if ! restore_qdrant "${DATE}"; then
qdrant_result=1
fi
echo ""
# Summary
echo "=========================================="
echo " Summary"
echo "=========================================="
if [ $postgres_result -eq 0 ] && [ $qdrant_result -eq 0 ]; then
log "All database restores completed successfully"
exit 0
elif [ $postgres_result -ne 0 ] && [ $qdrant_result -ne 0 ]; then
error "All database restores failed"
exit 1
else
error "Some restores failed (Postgres: ${postgres_result}, Qdrant: ${qdrant_result})"
exit 1
fi
}
main

182
tools/restore_files.py Executable file
View File

@ -0,0 +1,182 @@
#!/usr/bin/env python3
"""Restore Fernet-encrypted file backups from S3.
Usage:
# List available backups
python restore_files.py --list
# Restore a specific backup
python restore_files.py emails.tar.gz.enc --output ./restored_files
# Restore from local file
python restore_files.py /path/to/backup.tar.gz.enc --output ./restored_files
"""
import argparse
import base64
import hashlib
import io
import os
import sys
import tarfile
from pathlib import Path
import boto3
from cryptography.fernet import Fernet
def get_cipher(password: str) -> Fernet:
"""Create Fernet cipher from password (same derivation as backup.py)."""
key_bytes = hashlib.sha256(password.encode()).digest()
key = base64.urlsafe_b64encode(key_bytes)
return Fernet(key)
def list_backups(bucket: str, prefix: str, region: str) -> list[str]:
"""List available encrypted file backups in S3."""
s3 = boto3.client("s3", region_name=region)
try:
response = s3.list_objects_v2(Bucket=bucket, Prefix=prefix)
except Exception as e:
print(f"Error listing S3 bucket: {e}", file=sys.stderr)
return []
backups = []
for obj in response.get("Contents", []):
key = obj["Key"]
if key.endswith(".tar.gz.enc"):
name = key.split("/")[-1]
size_mb = obj["Size"] / (1024 * 1024)
modified = obj["LastModified"].strftime("%Y-%m-%d %H:%M:%S")
backups.append(f"{name:40} {size_mb:8.2f} MB {modified}")
return backups
def download_from_s3(
bucket: str, prefix: str, filename: str, region: str
) -> bytes | None:
"""Download encrypted backup from S3."""
s3 = boto3.client("s3", region_name=region)
key = f"{prefix}/{filename}"
try:
print(f"Downloading s3://{bucket}/{key}...")
response = s3.get_object(Bucket=bucket, Key=key)
return response["Body"].read()
except Exception as e:
print(f"Error downloading from S3: {e}", file=sys.stderr)
return None
def decrypt_and_extract(
encrypted_data: bytes, password: str, output_dir: Path
) -> bool:
"""Decrypt Fernet-encrypted tarball and extract contents."""
cipher = get_cipher(password)
try:
print("Decrypting...")
decrypted = cipher.decrypt(encrypted_data)
except Exception as e:
print(f"Decryption failed: {e}", file=sys.stderr)
print("Check that BACKUP_ENCRYPTION_KEY is correct", file=sys.stderr)
return False
print(f"Decrypted {len(decrypted)} bytes")
try:
print(f"Extracting to {output_dir}...")
output_dir.mkdir(parents=True, exist_ok=True)
tar_buffer = io.BytesIO(decrypted)
with tarfile.open(fileobj=tar_buffer, mode="r:gz") as tar:
tar.extractall(output_dir)
print("Extraction complete")
return True
except Exception as e:
print(f"Extraction failed: {e}", file=sys.stderr)
return False
def main():
parser = argparse.ArgumentParser(
description="Restore Fernet-encrypted file backups from S3"
)
parser.add_argument(
"backup",
nargs="?",
help="Backup filename (e.g., emails.tar.gz.enc) or local path",
)
parser.add_argument(
"--output",
"-o",
type=Path,
default=Path("./restored_files"),
help="Output directory for restored files",
)
parser.add_argument("--list", "-l", action="store_true", help="List available backups")
parser.add_argument(
"--bucket",
default=os.getenv("S3_BACKUP_BUCKET", "equistamp-memory-backup"),
help="S3 bucket name",
)
parser.add_argument(
"--prefix",
default=os.getenv("S3_BACKUP_PREFIX", "Daniel"),
help="S3 prefix",
)
parser.add_argument(
"--region",
default=os.getenv("S3_BACKUP_REGION", "eu-central-1"),
help="AWS region",
)
args = parser.parse_args()
# Get encryption key
password = os.getenv("BACKUP_ENCRYPTION_KEY")
if not password and not args.list:
print("Error: BACKUP_ENCRYPTION_KEY environment variable not set", file=sys.stderr)
sys.exit(1)
# List mode
if args.list:
print(f"Available backups in s3://{args.bucket}/{args.prefix}/:\n")
backups = list_backups(args.bucket, args.prefix, args.region)
if backups:
print("Name Size Modified")
print("-" * 70)
for backup in backups:
print(backup)
else:
print("No encrypted backups found")
return
# Restore mode
if not args.backup:
parser.print_help()
sys.exit(1)
# Check if it's a local file or S3 key
local_path = Path(args.backup)
if local_path.exists():
print(f"Reading local file: {local_path}")
encrypted_data = local_path.read_bytes()
else:
# Download from S3
encrypted_data = download_from_s3(
args.bucket, args.prefix, args.backup, args.region
)
if not encrypted_data:
sys.exit(1)
# Decrypt and extract
if decrypt_and_extract(encrypted_data, password, args.output):
print(f"\nFiles restored to: {args.output.absolute()}")
else:
sys.exit(1)
if __name__ == "__main__":
main()