Backup¶

Complete backup and disaster recovery strategy for GitPulse.

Overview¶

graph LR
    subgraph "Production"
        DB[("PostgreSQL")]
        FILES["Files"]
    end

    subgraph "Backups"
        LOCAL["Local backups"]
        REMOTE["Remote storage"]
    end

    DB --> |pg_dump| LOCAL
    FILES --> |tar| LOCAL
    LOCAL --> |rsync/S3| REMOTE

What to Back Up¶

Component	Priority	Frequency	Retention
PostgreSQL database	Critical	Daily	30 days
Configuration files	High	On change	90 days
Docker volumes	Medium	Weekly	14 days
Logs	Low	Daily	7 days

Automatic Backup¶

Backup Script¶

Bash
#!/bin/bash
# scripts/backup.sh

set -euo pipefail

# === Configuration ===
BACKUP_DIR="/home/gitpulse/backups"
RETENTION_DAYS=30
TIMESTAMP=$(date +%Y%m%d_%H%M%S)
BACKUP_NAME="gitpulse_backup_${TIMESTAMP}"

# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
NC='\033[0m'

log() {
    echo -e "${GREEN}[$(date +'%Y-%m-%d %H:%M:%S')]${NC} $1"
}

error() {
    echo -e "${RED}[ERROR]${NC} $1" >&2
    exit 1
}

# === Create directory ===
mkdir -p "${BACKUP_DIR}"

# === 1. PostgreSQL Backup ===
log "Backing up PostgreSQL database..."
docker compose exec -T postgres \
    pg_dump -U gitpulse -Fc gitpulse \
    > "${BACKUP_DIR}/${BACKUP_NAME}.dump" \
    || error "Database backup failed"

log "Database backed up: ${BACKUP_NAME}.dump ($(du -h ${BACKUP_DIR}/${BACKUP_NAME}.dump | cut -f1))"

# === 2. Configuration Backup ===
log "Backing up configuration files..."
tar -czf "${BACKUP_DIR}/${BACKUP_NAME}_config.tar.gz" \
    --exclude='.git' \
    --exclude='__pycache__' \
    --exclude='.venv' \
    .env Caddyfile docker-compose.yml \
    || error "Configuration backup failed"

# === 3. Redis Backup (optional) ===
log "Backing up Redis..."
docker compose exec -T redis \
    redis-cli BGSAVE
sleep 5
docker cp "$(docker compose ps -q redis):/data/dump.rdb" \
    "${BACKUP_DIR}/${BACKUP_NAME}_redis.rdb" 2>/dev/null || true

# === 4. Clean old backups ===
log "Cleaning backups older than ${RETENTION_DAYS} days..."
find "${BACKUP_DIR}" -name "gitpulse_backup_*" -mtime +${RETENTION_DAYS} -delete

# === 5. Verification ===
log "Verifying backup..."
pg_restore --list "${BACKUP_DIR}/${BACKUP_NAME}.dump" > /dev/null \
    || error "Backup verification failed"

# === Summary ===
log "Backup completed successfully!"
echo "========================================"
echo "Backup files:"
ls -lh "${BACKUP_DIR}/${BACKUP_NAME}"*
echo "========================================"

Bash
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69	#!/bin/bash # scripts/backup.sh set -euo pipefail # === Configuration === BACKUP_DIR="/home/gitpulse/backups" RETENTION_DAYS=30 TIMESTAMP=$(date +%Y%m%d_%H%M%S) BACKUP_NAME="gitpulse_backup_${TIMESTAMP}" # Colors for output RED='\033[0;31m' GREEN='\033[0;32m' NC='\033[0m' log() { echo -e "${GREEN}[$(date +'%Y-%m-%d %H:%M:%S')]${NC} $1" } error() { echo -e "${RED}[ERROR]${NC} $1" >&2 exit 1 } # === Create directory === mkdir -p "${BACKUP_DIR}" # === 1. PostgreSQL Backup === log "Backing up PostgreSQL database..." docker compose exec -T postgres \ pg_dump -U gitpulse -Fc gitpulse \ > "${BACKUP_DIR}/${BACKUP_NAME}.dump" \ \|\| error "Database backup failed" log "Database backed up: ${BACKUP_NAME}.dump ($(du -h ${BACKUP_DIR}/${BACKUP_NAME}.dump \| cut -f1))" # === 2. Configuration Backup === log "Backing up configuration files..." tar -czf "${BACKUP_DIR}/${BACKUP_NAME}_config.tar.gz" \ --exclude='.git' \ --exclude='__pycache__' \ --exclude='.venv' \ .env Caddyfile docker-compose.yml \ \|\| error "Configuration backup failed" # === 3. Redis Backup (optional) === log "Backing up Redis..." docker compose exec -T redis \ redis-cli BGSAVE sleep 5 docker cp "$(docker compose ps -q redis):/data/dump.rdb" \ "${BACKUP_DIR}/${BACKUP_NAME}_redis.rdb" 2>/dev/null \|\| true # === 4. Clean old backups === log "Cleaning backups older than ${RETENTION_DAYS} days..." find "${BACKUP_DIR}" -name "gitpulse_backup_" -mtime +${RETENTION_DAYS} -delete # === 5. Verification === log "Verifying backup..." pg_restore --list "${BACKUP_DIR}/${BACKUP_NAME}.dump" > /dev/null \ \|\| error "Backup verification failed" # === Summary === log "Backup completed successfully!" echo "========================================" echo "Backup files:" ls -lh "${BACKUP_DIR}/${BACKUP_NAME}" echo "========================================"

Cron Job¶

Bash
# Daily backup at 2:00
0 2 * * * /home/gitpulse/gitpulse/scripts/backup.sh >> /var/log/gitpulse-backup.log 2>&1

# Weekly full backup on Sunday at 3:00
0 3 * * 0 /home/gitpulse/gitpulse/scripts/backup.sh --full >> /var/log/gitpulse-backup.log 2>&1

Bash
1 2 3 4 5	`# Daily backup at 2:00 0 2 * * * /home/gitpulse/gitpulse/scripts/backup.sh >> /var/log/gitpulse-backup.log 2>&1 # Weekly full backup on Sunday at 3:00 0 3 * * 0 /home/gitpulse/gitpulse/scripts/backup.sh --full >> /var/log/gitpulse-backup.log 2>&1`

Remote Backups¶

S3 Compatible Storage¶

Bash
#!/bin/bash
# scripts/backup-to-s3.sh

# Configuration
S3_BUCKET="s3://gitpulse-backups"
S3_ENDPOINT="https://s3.example.com"

# Upload
aws s3 cp "${BACKUP_DIR}/${BACKUP_NAME}.dump" \
    "${S3_BUCKET}/database/" \
    --endpoint-url "${S3_ENDPOINT}"

# Retention is handled by S3 lifecycle policy

Bash
1 2 3 4 5 6 7 8 9 10 11 12 13	`#!/bin/bash # scripts/backup-to-s3.sh # Configuration S3_BUCKET="s3://gitpulse-backups" S3_ENDPOINT="https://s3.example.com" # Upload aws s3 cp "${BACKUP_DIR}/${BACKUP_NAME}.dump" \ "${S3_BUCKET}/database/" \ --endpoint-url "${S3_ENDPOINT}" # Retention is handled by S3 lifecycle policy`

Rsync to Remote Server¶

Bash
#!/bin/bash
# scripts/sync-backups.sh

REMOTE_HOST="backup.example.com"
REMOTE_DIR="/backups/gitpulse"

rsync -avz --delete \
    "${BACKUP_DIR}/" \
    "${REMOTE_HOST}:${REMOTE_DIR}/"

Bash
1 2 3 4 5 6 7 8 9	`#!/bin/bash # scripts/sync-backups.sh REMOTE_HOST="backup.example.com" REMOTE_DIR="/backups/gitpulse" rsync -avz --delete \ "${BACKUP_DIR}/" \ "${REMOTE_HOST}:${REMOTE_DIR}/"`

Recovery (Restore)¶

Database Recovery¶

Bash
#!/bin/bash
# scripts/restore.sh

set -euo pipefail

BACKUP_FILE="${1:-}"

if [ -z "${BACKUP_FILE}" ]; then
    echo "Usage: $0 <backup_file>"
    echo "Available backups:"
    ls -la /home/gitpulse/backups/*.dump
    exit 1
fi

echo "WARNING: This will delete the existing database!"
read -p "Continue? (yes/no): " confirm

if [ "${confirm}" != "yes" ]; then
    echo "Cancelled."
    exit 0
fi

# 1. Stop application
echo "Stopping application..."
docker compose stop api worker

# 2. Drop and recreate database
echo "Preparing database..."
docker compose exec -T postgres \
    psql -U gitpulse -c "DROP DATABASE IF EXISTS gitpulse_restore;"
docker compose exec -T postgres \
    psql -U gitpulse -c "CREATE DATABASE gitpulse_restore;"

# 3. Restore
echo "Restoring from ${BACKUP_FILE}..."
docker compose exec -T postgres \
    pg_restore -U gitpulse -d gitpulse_restore < "${BACKUP_FILE}"

# 4. Swap databases
echo "Swapping databases..."
docker compose exec -T postgres \
    psql -U gitpulse -c "
        SELECT pg_terminate_backend(pid) FROM pg_stat_activity 
        WHERE datname = 'gitpulse';
        DROP DATABASE gitpulse;
        ALTER DATABASE gitpulse_restore RENAME TO gitpulse;
    "

# 5. Start application
echo "Starting application..."
docker compose start api worker

echo "[OK] Recovery completed!"

Bash
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53	#!/bin/bash # scripts/restore.sh set -euo pipefail BACKUP_FILE="${1:-}" if [ -z "${BACKUP_FILE}" ]; then echo "Usage: $0 <backup_file>" echo "Available backups:" ls -la /home/gitpulse/backups/*.dump exit 1 fi echo "WARNING: This will delete the existing database!" read -p "Continue? (yes/no): " confirm if [ "${confirm}" != "yes" ]; then echo "Cancelled." exit 0 fi # 1. Stop application echo "Stopping application..." docker compose stop api worker # 2. Drop and recreate database echo "Preparing database..." docker compose exec -T postgres \ psql -U gitpulse -c "DROP DATABASE IF EXISTS gitpulse_restore;" docker compose exec -T postgres \ psql -U gitpulse -c "CREATE DATABASE gitpulse_restore;" # 3. Restore echo "Restoring from ${BACKUP_FILE}..." docker compose exec -T postgres \ pg_restore -U gitpulse -d gitpulse_restore < "${BACKUP_FILE}" # 4. Swap databases echo "Swapping databases..." docker compose exec -T postgres \ psql -U gitpulse -c " SELECT pg_terminate_backend(pid) FROM pg_stat_activity WHERE datname = 'gitpulse'; DROP DATABASE gitpulse; ALTER DATABASE gitpulse_restore RENAME TO gitpulse; " # 5. Start application echo "Starting application..." docker compose start api worker echo "[OK] Recovery completed!"

Point-in-Time Recovery (PITR)¶

For critical deployments with WAL archiving:

YAML
# docker-compose.yml
services:
  postgres:
    environment:
      POSTGRES_INITDB_ARGS: "--data-checksums"
    command: >
      postgres
      -c archive_mode=on
      -c archive_command='cp %p /var/lib/postgresql/wal_archive/%f'
      -c wal_level=replica
    volumes:
      - wal_archive:/var/lib/postgresql/wal_archive

YAML
1 2 3 4 5 6 7 8 9 10 11 12	`# docker-compose.yml services: postgres: environment: POSTGRES_INITDB_ARGS: "--data-checksums" command: > postgres -c archive_mode=on -c archive_command='cp %p /var/lib/postgresql/wal_archive/%f' -c wal_level=replica volumes: - wal_archive:/var/lib/postgresql/wal_archive`

Bash
# PITR recovery
pg_restore --target-time="2024-11-15 10:30:00" \
    -d gitpulse /backups/base_backup.dump

Bash
1 2 3	`# PITR recovery pg_restore --target-time="2024-11-15 10:30:00" \ -d gitpulse /backups/base_backup.dump`

Backup Testing¶

Automatic Test¶

Bash
#!/bin/bash
# scripts/test-backup.sh

# 1. Create backup
./scripts/backup.sh

# 2. Create test database
docker compose exec -T postgres \
    psql -U gitpulse -c "CREATE DATABASE backup_test;"

# 3. Restore to test database
LATEST_BACKUP=$(ls -t /home/gitpulse/backups/*.dump | head -1)
docker compose exec -T postgres \
    pg_restore -U gitpulse -d backup_test < "${LATEST_BACKUP}"

# 4. Verification
PROD_COUNT=$(docker compose exec -T postgres psql -U gitpulse -t -c "SELECT COUNT(*) FROM teams;")
TEST_COUNT=$(docker compose exec -T postgres psql -U gitpulse -t -d backup_test -c "SELECT COUNT(*) FROM teams;")

if [ "${PROD_COUNT}" = "${TEST_COUNT}" ]; then
    echo "[OK] Backup test PASSED"
else
    echo "[FAIL] Backup test FAILED: counts don't match"
    exit 1
fi

# 5. Cleanup
docker compose exec -T postgres \
    psql -U gitpulse -c "DROP DATABASE backup_test;"

Bash
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29	#!/bin/bash # scripts/test-backup.sh # 1. Create backup ./scripts/backup.sh # 2. Create test database docker compose exec -T postgres \ psql -U gitpulse -c "CREATE DATABASE backup_test;" # 3. Restore to test database LATEST_BACKUP=$(ls -t /home/gitpulse/backups/.dump \| head -1) docker compose exec -T postgres \ pg_restore -U gitpulse -d backup_test < "${LATEST_BACKUP}" # 4. Verification PROD_COUNT=$(docker compose exec -T postgres psql -U gitpulse -t -c "SELECT COUNT() FROM teams;") TEST_COUNT=$(docker compose exec -T postgres psql -U gitpulse -t -d backup_test -c "SELECT COUNT(*) FROM teams;") if [ "${PROD_COUNT}" = "${TEST_COUNT}" ]; then echo "[OK] Backup test PASSED" else echo "[FAIL] Backup test FAILED: counts don't match" exit 1 fi # 5. Cleanup docker compose exec -T postgres \ psql -U gitpulse -c "DROP DATABASE backup_test;"

Monthly Restore Test¶

Bash
# Add to crontab (first Sunday of the month)
0 4 1-7 * 0 /home/gitpulse/gitpulse/scripts/test-backup.sh

Bash
1 2	`# Add to crontab (first Sunday of the month) 0 4 1-7 * 0 /home/gitpulse/gitpulse/scripts/test-backup.sh`

Disaster Recovery¶

RTO and RPO¶

Scenario	RPO	RTO
Database failure	24h	1h
Server failure	24h	4h
Regional outage	24h	8h

DR Playbook¶

graph TD
    A["Incident"] --> B{Incident type?}
    B -->|"DB corruption"| C["Restore from backup"]
    B -->|"Server down"| D["🆕 Provision new server"]
    B -->|"Region outage"| E["<->Failover to DR site"]

    C --> F["Data verification"]
    D --> G["Deploy from Git"]
    E --> H["DNS failover"]

    F --> I["Testing"]
    G --> I
    H --> I

    I --> J["User notification"]

Recovery Steps¶

Assess - Determine the scope of the problem
Communicate - Notify stakeholders
Recover - Restore services
Validate - Test functionality
Document - Post-mortem

Backup Monitoring¶

Alerting¶

YAML
# monitoring/prometheus/alerts.yml
groups:
  - name: backups
    rules:
      - alert: BackupTooOld
        expr: time() - backup_last_success_timestamp > 86400 * 2
        for: 1h
        labels:
          severity: critical
        annotations:
          summary: "Backup is older than 2 days"

      - alert: BackupFailed
        expr: backup_last_status != 1
        for: 5m
        labels:
          severity: critical
        annotations:
          summary: "Last backup failed"

YAML
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19	`# monitoring/prometheus/alerts.yml groups: - name: backups rules: - alert: BackupTooOld expr: time() - backup_last_success_timestamp > 86400 * 2 for: 1h labels: severity: critical annotations: summary: "Backup is older than 2 days" - alert: BackupFailed expr: backup_last_status != 1 for: 5m labels: severity: critical annotations: summary: "Last backup failed"`

Metrics¶

Bash
# scripts/backup-metrics.sh
# Exports metrics for Prometheus

BACKUP_SIZE=$(du -b /home/gitpulse/backups/*.dump | tail -1 | cut -f1)
BACKUP_COUNT=$(ls /home/gitpulse/backups/*.dump 2>/dev/null | wc -l)
BACKUP_AGE=$(( $(date +%s) - $(stat -c %Y /home/gitpulse/backups/*.dump | sort -rn | head -1) ))

cat << EOF > /var/lib/prometheus/backups.prom
# HELP backup_size_bytes Size of latest backup
# TYPE backup_size_bytes gauge
backup_size_bytes ${BACKUP_SIZE}

# HELP backup_count Number of backup files
# TYPE backup_count gauge
backup_count ${BACKUP_COUNT}

# HELP backup_age_seconds Age of latest backup
# TYPE backup_age_seconds gauge
backup_age_seconds ${BACKUP_AGE}
EOF

Bash
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20	# scripts/backup-metrics.sh # Exports metrics for Prometheus BACKUP_SIZE=$(du -b /home/gitpulse/backups/.dump \| tail -1 \| cut -f1) BACKUP_COUNT=$(ls /home/gitpulse/backups/.dump 2>/dev/null \| wc -l) BACKUP_AGE=$(( $(date +%s) - $(stat -c %Y /home/gitpulse/backups/*.dump \| sort -rn \| head -1) )) cat << EOF > /var/lib/prometheus/backups.prom # HELP backup_size_bytes Size of latest backup # TYPE backup_size_bytes gauge backup_size_bytes ${BACKUP_SIZE} # HELP backup_count Number of backup files # TYPE backup_count gauge backup_count ${BACKUP_COUNT} # HELP backup_age_seconds Age of latest backup # TYPE backup_age_seconds gauge backup_age_seconds ${BACKUP_AGE} EOF

Checklist¶

Weekly¶

Verify automatic backups are running
Check backup log
Verify free disk space

Monthly¶

Test restore from backup
Verify remote backups
Review retention policy

Yearly¶

DR test (full restore on new server)
Update DR documentation
Review RTO/RPO targets

Backup¶

Overview¶

What to Back Up¶

Automatic Backup¶

Backup Script¶

Cron Job¶

Remote Backups¶

S3 Compatible Storage¶

Rsync to Remote Server¶

Recovery (Restore)¶

Database Recovery¶

Point-in-Time Recovery (PITR)¶

Backup Testing¶

Automatic Test¶

Monthly Restore Test¶

Disaster Recovery¶

RTO and RPO¶

DR Playbook¶

Recovery Steps¶

Backup Monitoring¶

Alerting¶

Metrics¶

Checklist¶

Weekly¶

Monthly¶

Yearly¶

Further Reading¶