diff --git a/tools/restore_databases.sh b/tools/restore_databases.sh new file mode 100755 index 0000000..d80b099 --- /dev/null +++ b/tools/restore_databases.sh @@ -0,0 +1,208 @@ +#!/bin/bash +# Restore Postgres and Qdrant databases from S3 backups +# Usage: ./restore_databases.sh [DATE] +# Example: ./restore_databases.sh 20251219 + +set -euo pipefail + +# Configuration - read from environment or use defaults +BUCKET="${S3_BACKUP_BUCKET:-equistamp-memory-backup}" +PREFIX="${S3_BACKUP_PREFIX:-Daniel}/databases" +REGION="${S3_BACKUP_REGION:-eu-central-1}" +PASSWORD="${BACKUP_ENCRYPTION_KEY:?BACKUP_ENCRYPTION_KEY not set}" + +# Target services - adjust for your environment +POSTGRES_HOST="${POSTGRES_HOST:-localhost}" +POSTGRES_PORT="${POSTGRES_PORT:-5432}" +POSTGRES_USER="${POSTGRES_USER:-kb}" +POSTGRES_DB="${POSTGRES_DB:-kb}" +QDRANT_URL="${QDRANT_URL:-http://localhost:6333}" + +# Date to restore (default: list available backups) +DATE="${1:-}" + +# Temp directory for downloads +TEMP_DIR=$(mktemp -d) +trap "rm -rf ${TEMP_DIR}" EXIT + +log() { + echo "[$(date '+%Y-%m-%d %H:%M:%S')] $*" +} + +error() { + echo "[$(date '+%Y-%m-%d %H:%M:%S')] ERROR: $*" >&2 +} + +# List available backups +list_backups() { + log "Available PostgreSQL backups:" + aws s3 ls "s3://${BUCKET}/${PREFIX}/" --region "${REGION}" | grep "postgres-" | awk '{print " " $4}' | sort -r | head -10 + + echo "" + log "Available Qdrant backups:" + aws s3 ls "s3://${BUCKET}/${PREFIX}/" --region "${REGION}" | grep "qdrant-" | awk '{print " " $4}' | sort -r | head -10 +} + +# Restore PostgreSQL +restore_postgres() { + local date=$1 + local s3_path="s3://${BUCKET}/${PREFIX}/postgres-${date}.sql.gz.enc" + local sql_file="${TEMP_DIR}/postgres_restore.sql" + + log "Checking if Postgres backup exists: ${s3_path}" + if ! aws s3 ls "${s3_path}" --region "${REGION}" >/dev/null 2>&1; then + error "Postgres backup not found: ${s3_path}" + return 1 + fi + + log "Downloading and decrypting Postgres backup..." + if ! aws s3 cp "${s3_path}" - --region "${REGION}" | \ + openssl enc -d -aes-256-cbc -pbkdf2 -pass "pass:${PASSWORD}" | \ + gunzip > "${sql_file}"; then + error "Failed to download/decrypt Postgres backup" + return 1 + fi + + log "Postgres backup decrypted ($(du -h "${sql_file}" | cut -f1))" + + # Check if we can connect to postgres + log "Testing PostgreSQL connection..." + if ! PGPASSWORD="${PGPASSWORD:-}" psql -h "${POSTGRES_HOST}" -p "${POSTGRES_PORT}" -U "${POSTGRES_USER}" -d "${POSTGRES_DB}" -c "SELECT 1" >/dev/null 2>&1; then + error "Cannot connect to PostgreSQL at ${POSTGRES_HOST}:${POSTGRES_PORT}" + error "Set PGPASSWORD environment variable or check connection settings" + log "SQL dump saved to: ${sql_file}" + log "You can restore manually with: psql -h ${POSTGRES_HOST} -U ${POSTGRES_USER} -d ${POSTGRES_DB} < ${sql_file}" + return 1 + fi + + log "Restoring to PostgreSQL..." + if PGPASSWORD="${PGPASSWORD:-}" psql -h "${POSTGRES_HOST}" -p "${POSTGRES_PORT}" -U "${POSTGRES_USER}" -d "${POSTGRES_DB}" < "${sql_file}"; then + log "PostgreSQL restore completed successfully" + return 0 + else + error "PostgreSQL restore failed (some errors may be expected for existing objects)" + return 1 + fi +} + +# Restore Qdrant +restore_qdrant() { + local date=$1 + local s3_path="s3://${BUCKET}/${PREFIX}/qdrant-${date}.snapshot.enc" + local snapshot_file="${TEMP_DIR}/qdrant_restore.snapshot" + + log "Checking if Qdrant backup exists: ${s3_path}" + if ! aws s3 ls "${s3_path}" --region "${REGION}" >/dev/null 2>&1; then + error "Qdrant backup not found: ${s3_path}" + return 1 + fi + + log "Downloading and decrypting Qdrant backup..." + if ! aws s3 cp "${s3_path}" - --region "${REGION}" | \ + openssl enc -d -aes-256-cbc -pbkdf2 -pass "pass:${PASSWORD}" \ + > "${snapshot_file}"; then + error "Failed to download/decrypt Qdrant backup" + return 1 + fi + + log "Qdrant backup decrypted ($(du -h "${snapshot_file}" | cut -f1))" + + # Check if Qdrant is reachable + log "Testing Qdrant connection..." + if ! curl -sf "${QDRANT_URL}/readyz" >/dev/null 2>&1; then + error "Cannot connect to Qdrant at ${QDRANT_URL}" + log "Snapshot saved to: ${snapshot_file}" + log "You can restore manually by uploading to Qdrant" + return 1 + fi + + log "Uploading snapshot to Qdrant..." + local upload_response + if ! upload_response=$(curl -sf -X POST "${QDRANT_URL}/snapshots/upload?wait=true" \ + -H "Content-Type: multipart/form-data" \ + -F "snapshot=@${snapshot_file}" 2>&1); then + error "Failed to upload snapshot to Qdrant: ${upload_response}" + return 1 + fi + + log "Snapshot uploaded, recovering..." + + # Extract the snapshot filename from the response + local snapshot_name + if command -v jq >/dev/null 2>&1; then + snapshot_name=$(echo "${upload_response}" | jq -r '.result.name // empty') + else + snapshot_name=$(echo "${upload_response}" | grep -o '"name":"[^"]*"' | cut -d'"' -f4) + fi + + if [ -z "${snapshot_name}" ]; then + log "Upload response: ${upload_response}" + log "Snapshot uploaded but could not extract name. Check Qdrant manually." + return 0 + fi + + log "Recovering from snapshot: ${snapshot_name}" + if curl -sf -X PUT "${QDRANT_URL}/snapshots/recover" \ + -H "Content-Type: application/json" \ + -d "{\"location\": \"file:///qdrant/snapshots/${snapshot_name}\"}" >/dev/null; then + log "Qdrant restore completed successfully" + return 0 + else + error "Qdrant recovery failed" + return 1 + fi +} + +# Main +main() { + if [ -z "${DATE}" ]; then + log "No date specified. Listing available backups..." + echo "" + list_backups + echo "" + log "Usage: $0 " + log "Example: $0 20251219" + exit 0 + fi + + log "Starting database restore for date: ${DATE}" + echo "" + + local postgres_result=0 + local qdrant_result=0 + + # Restore Postgres + echo "==========================================" + echo " PostgreSQL Restore" + echo "==========================================" + if ! restore_postgres "${DATE}"; then + postgres_result=1 + fi + echo "" + + # Restore Qdrant + echo "==========================================" + echo " Qdrant Restore" + echo "==========================================" + if ! restore_qdrant "${DATE}"; then + qdrant_result=1 + fi + echo "" + + # Summary + echo "==========================================" + echo " Summary" + echo "==========================================" + if [ $postgres_result -eq 0 ] && [ $qdrant_result -eq 0 ]; then + log "All database restores completed successfully" + exit 0 + elif [ $postgres_result -ne 0 ] && [ $qdrant_result -ne 0 ]; then + error "All database restores failed" + exit 1 + else + error "Some restores failed (Postgres: ${postgres_result}, Qdrant: ${qdrant_result})" + exit 1 + fi +} + +main diff --git a/tools/restore_files.py b/tools/restore_files.py new file mode 100755 index 0000000..0bbbac1 --- /dev/null +++ b/tools/restore_files.py @@ -0,0 +1,182 @@ +#!/usr/bin/env python3 +"""Restore Fernet-encrypted file backups from S3. + +Usage: + # List available backups + python restore_files.py --list + + # Restore a specific backup + python restore_files.py emails.tar.gz.enc --output ./restored_files + + # Restore from local file + python restore_files.py /path/to/backup.tar.gz.enc --output ./restored_files +""" + +import argparse +import base64 +import hashlib +import io +import os +import sys +import tarfile +from pathlib import Path + +import boto3 +from cryptography.fernet import Fernet + + +def get_cipher(password: str) -> Fernet: + """Create Fernet cipher from password (same derivation as backup.py).""" + key_bytes = hashlib.sha256(password.encode()).digest() + key = base64.urlsafe_b64encode(key_bytes) + return Fernet(key) + + +def list_backups(bucket: str, prefix: str, region: str) -> list[str]: + """List available encrypted file backups in S3.""" + s3 = boto3.client("s3", region_name=region) + + try: + response = s3.list_objects_v2(Bucket=bucket, Prefix=prefix) + except Exception as e: + print(f"Error listing S3 bucket: {e}", file=sys.stderr) + return [] + + backups = [] + for obj in response.get("Contents", []): + key = obj["Key"] + if key.endswith(".tar.gz.enc"): + name = key.split("/")[-1] + size_mb = obj["Size"] / (1024 * 1024) + modified = obj["LastModified"].strftime("%Y-%m-%d %H:%M:%S") + backups.append(f"{name:40} {size_mb:8.2f} MB {modified}") + + return backups + + +def download_from_s3( + bucket: str, prefix: str, filename: str, region: str +) -> bytes | None: + """Download encrypted backup from S3.""" + s3 = boto3.client("s3", region_name=region) + key = f"{prefix}/{filename}" + + try: + print(f"Downloading s3://{bucket}/{key}...") + response = s3.get_object(Bucket=bucket, Key=key) + return response["Body"].read() + except Exception as e: + print(f"Error downloading from S3: {e}", file=sys.stderr) + return None + + +def decrypt_and_extract( + encrypted_data: bytes, password: str, output_dir: Path +) -> bool: + """Decrypt Fernet-encrypted tarball and extract contents.""" + cipher = get_cipher(password) + + try: + print("Decrypting...") + decrypted = cipher.decrypt(encrypted_data) + except Exception as e: + print(f"Decryption failed: {e}", file=sys.stderr) + print("Check that BACKUP_ENCRYPTION_KEY is correct", file=sys.stderr) + return False + + print(f"Decrypted {len(decrypted)} bytes") + + try: + print(f"Extracting to {output_dir}...") + output_dir.mkdir(parents=True, exist_ok=True) + tar_buffer = io.BytesIO(decrypted) + with tarfile.open(fileobj=tar_buffer, mode="r:gz") as tar: + tar.extractall(output_dir) + print("Extraction complete") + return True + except Exception as e: + print(f"Extraction failed: {e}", file=sys.stderr) + return False + + +def main(): + parser = argparse.ArgumentParser( + description="Restore Fernet-encrypted file backups from S3" + ) + parser.add_argument( + "backup", + nargs="?", + help="Backup filename (e.g., emails.tar.gz.enc) or local path", + ) + parser.add_argument( + "--output", + "-o", + type=Path, + default=Path("./restored_files"), + help="Output directory for restored files", + ) + parser.add_argument("--list", "-l", action="store_true", help="List available backups") + parser.add_argument( + "--bucket", + default=os.getenv("S3_BACKUP_BUCKET", "equistamp-memory-backup"), + help="S3 bucket name", + ) + parser.add_argument( + "--prefix", + default=os.getenv("S3_BACKUP_PREFIX", "Daniel"), + help="S3 prefix", + ) + parser.add_argument( + "--region", + default=os.getenv("S3_BACKUP_REGION", "eu-central-1"), + help="AWS region", + ) + + args = parser.parse_args() + + # Get encryption key + password = os.getenv("BACKUP_ENCRYPTION_KEY") + if not password and not args.list: + print("Error: BACKUP_ENCRYPTION_KEY environment variable not set", file=sys.stderr) + sys.exit(1) + + # List mode + if args.list: + print(f"Available backups in s3://{args.bucket}/{args.prefix}/:\n") + backups = list_backups(args.bucket, args.prefix, args.region) + if backups: + print("Name Size Modified") + print("-" * 70) + for backup in backups: + print(backup) + else: + print("No encrypted backups found") + return + + # Restore mode + if not args.backup: + parser.print_help() + sys.exit(1) + + # Check if it's a local file or S3 key + local_path = Path(args.backup) + if local_path.exists(): + print(f"Reading local file: {local_path}") + encrypted_data = local_path.read_bytes() + else: + # Download from S3 + encrypted_data = download_from_s3( + args.bucket, args.prefix, args.backup, args.region + ) + if not encrypted_data: + sys.exit(1) + + # Decrypt and extract + if decrypt_and_extract(encrypted_data, password, args.output): + print(f"\nFiles restored to: {args.output.absolute()}") + else: + sys.exit(1) + + +if __name__ == "__main__": + main()