164 lines
5.5 KiB
Bash
164 lines
5.5 KiB
Bash
#!/bin/bash
|
|
# Backup logico per-database con retry e cleanup automatico
|
|
# Sicuro per cluster Galera ad alto carico
|
|
# Autore: Marco + Copilot
|
|
|
|
set -euo pipefail
|
|
|
|
# Aumenta i file descriptor disponibili (mysqldump può aprirne molti)
|
|
ulimit -n 65536
|
|
|
|
# Directory base dei backup
|
|
BACKUP_BASE=/var/backups/tscale01
|
|
|
|
# Prefisso per distinguere i backup logici di questo nodo
|
|
PREFIX=eqn-bck-logic-
|
|
|
|
# Timestamp per la directory del backup
|
|
TIMESTAMP=$(date +%Y%m%d-%H%M%S)
|
|
|
|
# Directory finale del backup
|
|
TARGET="$BACKUP_BASE/${PREFIX}${TIMESTAMP}"
|
|
|
|
# File di log generale
|
|
LOGFILE=/var/log/mariadb-backup.log
|
|
|
|
# Lockfile per evitare esecuzioni concorrenti
|
|
LOCKFILE=/var/lock/mariadb-backup.lock
|
|
|
|
# Nome del server per i log (utile in ambienti multi-nodo)
|
|
SERVER_NAME=$(hostname -s)
|
|
|
|
# Binari necessari
|
|
MYSQL_BIN=$(command -v mariadb || true)
|
|
MYSQLDUMP_BIN=$(command -v mariadb-dump || true)
|
|
|
|
# Opzioni dump ottimizzate per Galera (no lock, snapshot consistente)
|
|
MYSQLDUMP_OPTS="--user=root --single-transaction --quick --skip-lock-tables"
|
|
|
|
# Numero massimo di retry per ogni database (deadlock = retry)
|
|
MAX_RETRIES=10
|
|
|
|
# Secondi di attesa tra un retry e l'altro
|
|
RETRY_SLEEP=30
|
|
|
|
# Assicura che il logfile esista e abbia permessi sicuri
|
|
touch "$LOGFILE"
|
|
chown root:root "$LOGFILE"
|
|
chmod 600 "$LOGFILE"
|
|
|
|
(
|
|
# FLOCK: evita che due backup partano insieme
|
|
flock -n 9 || { echo "[$(date '+%F %T')] [$SERVER_NAME] SKIP: another backup is running" >> "$LOGFILE"; exit 0; }
|
|
|
|
# Controllo binari
|
|
if [ -z "$MYSQL_BIN" ]; then
|
|
echo "[$(date '+%F %T')] [$SERVER_NAME] ERROR: mariadb client not found in PATH" >> "$LOGFILE"
|
|
exit 1
|
|
fi
|
|
|
|
if [ -z "$MYSQLDUMP_BIN" ]; then
|
|
echo "[$(date '+%F %T')] [$SERVER_NAME] ERROR: mariadb-dump not found in PATH" >> "$LOGFILE"
|
|
exit 1
|
|
fi
|
|
|
|
# Crea directory del backup
|
|
mkdir -p "$TARGET"
|
|
chown mysql:mysql "$TARGET"
|
|
chmod 750 "$TARGET"
|
|
|
|
# File temporaneo per catturare errori del dump
|
|
TMPLOG=$(mktemp /tmp/mariadb-backup-logic.XXXXXX)
|
|
|
|
echo "---------------------" >> "$LOGFILE"
|
|
echo "[$(date '+%F %T')] [$SERVER_NAME] START logic backup (per-database) $TARGET" >> "$LOGFILE"
|
|
|
|
# Elenco database utente (escludiamo schemi di sistema)
|
|
DB_LIST=$($MYSQL_BIN -N -e "SHOW DATABASES" | grep -vE '^(information_schema|performance_schema|mysql|sys)$' || true)
|
|
|
|
if [ -z "$DB_LIST" ]; then
|
|
echo "[$(date '+%F %T')] [$SERVER_NAME] WARNING: no user databases found to dump" >> "$LOGFILE"
|
|
fi
|
|
|
|
FAILED=0
|
|
FAILED_DBS=""
|
|
|
|
# Loop su ogni database
|
|
for db in $DB_LIST; do
|
|
DUMPFILE="$TARGET/${db}.sql"
|
|
echo "[$(date '+%F %T')] [$SERVER_NAME] START dump database '$db' -> $DUMPFILE" >> "$LOGFILE"
|
|
|
|
attempt=1
|
|
success=0
|
|
|
|
# Retry intelligente in caso di deadlock (errore 1213)
|
|
while [ "$attempt" -le "$MAX_RETRIES" ]; do
|
|
: >"$TMPLOG" # pulisce il file temporaneo
|
|
|
|
if $MYSQLDUMP_BIN $MYSQLDUMP_OPTS "$db" >"$DUMPFILE" 2>"$TMPLOG"; then
|
|
echo "[$(date '+%F %T')] [$SERVER_NAME] END dump database '$db' (attempt $attempt)" >> "$LOGFILE"
|
|
success=1
|
|
break
|
|
else
|
|
echo "[$(date '+%F %T')] [$SERVER_NAME] ERROR dumping database '$db' (attempt $attempt). See $TMPLOG" >> "$LOGFILE"
|
|
tail -n 50 "$TMPLOG" >> "$LOGFILE"
|
|
|
|
# Se è un deadlock, riproviamo
|
|
if grep -q "Error 1213" "$TMPLOG"; then
|
|
echo "[$(date '+%F %T')] [$SERVER_NAME] RETRY for database '$db' in ${RETRY_SLEEP}s due to deadlock (1213)" >> "$LOGFILE"
|
|
sleep "$RETRY_SLEEP"
|
|
else
|
|
# Errori non recuperabili → stop retry
|
|
echo "[$(date '+%F %T')] [$SERVER_NAME] NON-RETRYABLE error for database '$db'" >> "$LOGFILE"
|
|
break
|
|
fi
|
|
fi
|
|
|
|
attempt=$((attempt + 1))
|
|
done
|
|
|
|
# Se dopo i retry non è andata → segna come fallito
|
|
if [ "$success" -ne 1 ]; then
|
|
FAILED=1
|
|
FAILED_DBS="$FAILED_DBS $db"
|
|
fi
|
|
done
|
|
|
|
# Calcolo dimensioni del backup
|
|
size_apparent_bytes=$(find "$TARGET" -type f -printf '%s\n' 2>/dev/null | awk '{s+=$1} END{print s+0}')
|
|
size_apparent_human=$(numfmt --to=iec --suffix=B "$size_apparent_bytes" 2>/dev/null || echo "${size_apparent_bytes}B")
|
|
|
|
size_disk_bytes=$(du -s --block-size=1 "$TARGET" 2>/dev/null | cut -f1 || echo 0)
|
|
size_disk_human=$(numfmt --to=iec --suffix=B "$size_disk_bytes" 2>/dev/null || echo "${size_disk_bytes}B")
|
|
|
|
echo "[$(date '+%F %T')] [$SERVER_NAME] SIZE apparent: $size_apparent_human ($size_apparent_bytes bytes) for $TARGET" >> "$LOGFILE"
|
|
echo "[$(date '+%F %T')] [$SERVER_NAME] SIZE on-disk: $size_disk_human ($size_disk_bytes bytes) for $TARGET" >> "$LOGFILE"
|
|
|
|
# Risultato finale con cleanup in caso di fallimento
|
|
if [ "$FAILED" -eq 0 ]; then
|
|
echo "[$(date '+%F %T')] [$SERVER_NAME] RESULT: OK, all databases dumped successfully" >> "$LOGFILE"
|
|
else
|
|
echo "[$(date '+%F %T')] [$SERVER_NAME] RESULT: FAILED, databases with errors:$FAILED_DBS" >> "$LOGFILE"
|
|
echo "[$(date '+%F %T')] [$SERVER_NAME] CLEANUP: removing incomplete backup directory $TARGET" >> "$LOGFILE"
|
|
rm -rf "$TARGET"
|
|
rm -f "$TMPLOG"
|
|
exit 1
|
|
fi
|
|
|
|
rm -f "$TMPLOG"
|
|
|
|
# Rotazione: elimina backup più vecchi di 7 giorni
|
|
TO_DELETE=$(find "$BACKUP_BASE" -maxdepth 1 -type d -name "${PREFIX}*" -mtime +7 -print 2>/dev/null || true)
|
|
|
|
if [ -n "$TO_DELETE" ]; then
|
|
echo "[$(date '+%F %T')] [$SERVER_NAME] ROTATE: removing old logic backups:" >> "$LOGFILE"
|
|
echo "$TO_DELETE" >> "$LOGFILE"
|
|
echo "$TO_DELETE" | tr '\n' '\0' | xargs -0 -r rm -rf --
|
|
else
|
|
echo "[$(date '+%F %T')] [$SERVER_NAME] ROTATE: no logic backups to remove (<=7 days)" >> "$LOGFILE"
|
|
fi
|
|
|
|
echo " " >> "$LOGFILE"
|
|
|
|
) 9>"$LOCKFILE"
|