mirror of
https://github.com/jcwimer/wrestlingApp
synced 2026-03-24 17:04:43 +00:00
161 lines
7.8 KiB
YAML
161 lines
7.8 KiB
YAML
apiVersion: apps/v1
|
|
kind: Deployment
|
|
metadata:
|
|
name: mariadb-replica-watcher
|
|
labels:
|
|
app: wrestlingdev
|
|
component: mariadb-watcher
|
|
spec:
|
|
replicas: 1
|
|
selector:
|
|
matchLabels:
|
|
app: wrestlingdev
|
|
component: mariadb-watcher
|
|
template:
|
|
metadata:
|
|
labels:
|
|
app: wrestlingdev
|
|
component: mariadb-watcher
|
|
spec:
|
|
containers:
|
|
- name: replica-watcher
|
|
image: mariadb:10.3
|
|
env:
|
|
- name: MARIADB_ROOT_PASSWORD
|
|
valueFrom:
|
|
secretKeyRef:
|
|
name: wrestlingdev-secrets
|
|
key: dbpassword
|
|
- name: MYSQL_REPLICATION_USER
|
|
valueFrom:
|
|
secretKeyRef:
|
|
name: wrestlingdev-secrets
|
|
key: replication_user
|
|
- name: MYSQL_REPLICATION_PASSWORD
|
|
valueFrom:
|
|
secretKeyRef:
|
|
name: wrestlingdev-secrets
|
|
key: replication_password
|
|
- name: MASTER_SERVICE_HOST
|
|
valueFrom:
|
|
secretKeyRef:
|
|
name: wrestlingdev-secrets
|
|
key: replication_host
|
|
- name: REPLICA_SERVICE_HOST
|
|
value: "wrestlingdev-mariadb"
|
|
- name: DB_NAME
|
|
value: "wrestlingdev"
|
|
command:
|
|
- bash
|
|
- -c
|
|
- |
|
|
set -euo pipefail
|
|
LOG=/var/log/replica-watcher.log
|
|
echo "replica-watcher starting: $(date -u)" >>"$LOG"
|
|
|
|
trim() { sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//'; }
|
|
get_val() {
|
|
grep -m1 -E "^[[:space:]]*$1[[:space:]]*:" \
|
|
| sed -E "s/^[[:space:]]*$1[[:space:]]*:[[:space:]]*(.*)$/\1/" \
|
|
| tr -d '\r' \
|
|
| xargs
|
|
}
|
|
|
|
# initial wait
|
|
sleep 120
|
|
while true; do
|
|
echo "$(date -u) Checking SHOW SLAVE STATUS" | tee -a "$LOG"
|
|
SLAVE_RAW=$(mysql --protocol=TCP -h "$REPLICA_SERVICE_HOST" -uroot -p"$MARIADB_ROOT_PASSWORD" -e "SHOW SLAVE STATUS\\G" 2>>"$LOG" || true)
|
|
|
|
NEED=0
|
|
if [ -z "$SLAVE_RAW" ]; then
|
|
echo "SHOW SLAVE STATUS is empty (replication not configured / not running) -> will rebootstrap" | tee -a "$LOG"
|
|
NEED=1
|
|
else
|
|
SLAVE_IO=$(echo "$SLAVE_RAW" | get_val "Slave_IO_Running")
|
|
SLAVE_SQL=$(echo "$SLAVE_RAW" | get_val "Slave_SQL_Running")
|
|
LAST_IO_ERRNO=$(echo "$SLAVE_RAW" | get_val "Last_IO_Errno")
|
|
LAST_SQL_ERRNO=$(echo "$SLAVE_RAW" | get_val "Last_SQL_Errno")
|
|
LAST_IO_ERR=$(echo "$SLAVE_RAW" | get_val "Last_IO_Error")
|
|
LAST_SQL_ERR=$(echo "$SLAVE_RAW" | get_val "Last_SQL_Error")
|
|
|
|
echo "Slave IO='${SLAVE_IO:-}' Slave SQL='${SLAVE_SQL:-}'" | tee -a "$LOG"
|
|
echo "Last_IO_Errno='${LAST_IO_ERRNO:-}' Last_SQL_Errno='${LAST_SQL_ERRNO:-}'" | tee -a "$LOG"
|
|
echo "Last_IO_Error='${LAST_IO_ERR:-}' Last_SQL_Error='${LAST_SQL_ERR:-}'" | tee -a "$LOG"
|
|
|
|
if [ "${SLAVE_IO:-}" = "Yes" ] && [ "${SLAVE_SQL:-}" = "Yes" ] \
|
|
&& { [ -z "${LAST_IO_ERR:-}" ] || [ "${LAST_IO_ERR,,}" = "no error" ]; } \
|
|
&& { [ -z "${LAST_SQL_ERR:-}" ] || [ "${LAST_SQL_ERR,,}" = "no error" ]; } \
|
|
&& { [ -z "${LAST_IO_ERRNO:-}" ] || [ "${LAST_IO_ERRNO:-0}" = "0" ]; } \
|
|
&& { [ -z "${LAST_SQL_ERRNO:-}" ] || [ "${LAST_SQL_ERRNO:-0}" = "0" ]; }; then
|
|
echo "Both slave threads running and no replication errors -> no action" | tee -a "$LOG"
|
|
else
|
|
NOT_RUNNING=0
|
|
[ "${SLAVE_IO:-No}" != "Yes" ] && NOT_RUNNING=1
|
|
[ "${SLAVE_SQL:-No}" != "Yes" ] && NOT_RUNNING=1
|
|
HAS_ERROR=0
|
|
[ -n "${LAST_IO_ERRNO:-}" ] && [ "${LAST_IO_ERRNO:-0}" != "0" ] && HAS_ERROR=1
|
|
[ -n "${LAST_SQL_ERRNO:-}" ] && [ "${LAST_SQL_ERRNO:-0}" != "0" ] && HAS_ERROR=1
|
|
ERR_TEXT="$(printf '%s %s' "${LAST_IO_ERR:-}" "${LAST_SQL_ERR:-}" | tr '[:upper:]' '[:lower:]' | trim)"
|
|
[ -n "$ERR_TEXT" ] && [ "$ERR_TEXT" != "no error" ] && HAS_ERROR=1
|
|
|
|
echo "Decision: NOT_RUNNING=$NOT_RUNNING HAS_ERROR=$HAS_ERROR" | tee -a "$LOG"
|
|
[ $NOT_RUNNING -eq 1 ] || [ $HAS_ERROR -eq 1 ] && NEED=1 || echo "Threads healthy -> no action" | tee -a "$LOG"
|
|
fi
|
|
fi
|
|
|
|
if [ $NEED -eq 1 ]; then
|
|
echo "$(date -u) Starting rebootstrap flow" | tee -a "$LOG"
|
|
|
|
MASTER_STATUS=$(mysql --protocol=TCP -h "$MASTER_SERVICE_HOST" -uroot -p"$MARIADB_ROOT_PASSWORD" -sse "SHOW MASTER STATUS;" 2>>"$LOG" || true)
|
|
MASTER_LOG_FILE=$(echo "$MASTER_STATUS" | awk '{print $1}' | trim || true)
|
|
MASTER_LOG_POS=$(echo "$MASTER_STATUS" | awk '{print $2}' | trim || true)
|
|
if [ -z "$MASTER_LOG_FILE" ] || [ -z "$MASTER_LOG_POS" ]; then
|
|
echo "Failed to get master position from $MASTER_SERVICE_HOST" | tee -a "$LOG"
|
|
sleep 120; continue
|
|
fi
|
|
echo "Master position: ${MASTER_LOG_FILE}:${MASTER_LOG_POS}" | tee -a "$LOG"
|
|
|
|
echo "Stopping slave on replica host" | tee -a "$LOG"
|
|
mysql --protocol=TCP -h "$REPLICA_SERVICE_HOST" -uroot -p"$MARIADB_ROOT_PASSWORD" -e "STOP SLAVE;" >>"$LOG" 2>&1 || true
|
|
|
|
DUMP_FILE="/tmp/${DB_NAME}_backup.sql"
|
|
echo "Dumping ${DB_NAME} from master ${MASTER_SERVICE_HOST}" | tee -a "$LOG"
|
|
if command -v timeout >/dev/null 2>&1; then
|
|
if ! timeout 300 mysqldump --protocol=TCP -h "$MASTER_SERVICE_HOST" -uroot -p"$MARIADB_ROOT_PASSWORD" --single-transaction "$DB_NAME" \
|
|
| tee "$DUMP_FILE" >/dev/null 2>>"$LOG"; then
|
|
echo "Dump FAILED; aborting this cycle" | tee -a "$LOG"; sleep 120; continue
|
|
fi
|
|
else
|
|
if ! mysqldump --protocol=TCP -h "$MASTER_SERVICE_HOST" -uroot -p"$MARIADB_ROOT_PASSWORD" --single-transaction "$DB_NAME" \
|
|
| tee "$DUMP_FILE" >/dev/null 2>>"$LOG"; then
|
|
echo "Dump FAILED; aborting this cycle" | tee -a "$LOG"; sleep 120; continue
|
|
fi
|
|
fi
|
|
|
|
ls -lh $DUMP_FILE
|
|
|
|
echo "Ensuring database '$DB_NAME' exists on replica" | tee -a "$LOG"
|
|
mysql --protocol=TCP -h "$REPLICA_SERVICE_HOST" -uroot -p"$MARIADB_ROOT_PASSWORD" \
|
|
-e "CREATE DATABASE IF NOT EXISTS \`$DB_NAME\`;" >>"$LOG" 2>&1
|
|
|
|
echo "Importing dump into replica host" | tee -a "$LOG"
|
|
if ! cat "$DUMP_FILE" | mysql --protocol=TCP -h "$REPLICA_SERVICE_HOST" -uroot -p"$MARIADB_ROOT_PASSWORD" "$DB_NAME" >>"$LOG" 2>&1; then
|
|
echo "Import FAILED; aborting this cycle (replication will not be reconfigured)" | tee -a "$LOG"
|
|
sleep 120; continue
|
|
fi
|
|
echo "Import completed successfully" | tee -a "$LOG"
|
|
|
|
echo "Reconfiguring replication to ${MASTER_SERVICE_HOST}:${MASTER_LOG_FILE}:${MASTER_LOG_POS}" | tee -a "$LOG"
|
|
mysql --protocol=TCP -h "$REPLICA_SERVICE_HOST" -uroot -p"$MARIADB_ROOT_PASSWORD" -e "RESET SLAVE ALL;" >>"$LOG" 2>&1 || true
|
|
mysql --protocol=TCP -h "$REPLICA_SERVICE_HOST" -uroot -p"$MARIADB_ROOT_PASSWORD" -e "CHANGE MASTER TO MASTER_HOST='${MASTER_SERVICE_HOST}', MASTER_USER='${MYSQL_REPLICATION_USER}', MASTER_PASSWORD='${MYSQL_REPLICATION_PASSWORD}', MASTER_LOG_FILE='${MASTER_LOG_FILE}', MASTER_LOG_POS=${MASTER_LOG_POS}; START SLAVE;" >>"$LOG" 2>&1 || true
|
|
|
|
echo "SHOW SLAVE STATUS after rebootstrap:" | tee -a "$LOG"
|
|
mysql --protocol=TCP -h "$REPLICA_SERVICE_HOST" -uroot -p"$MARIADB_ROOT_PASSWORD" -e "SHOW SLAVE STATUS\\G" >>"$LOG" 2>&1 || true
|
|
fi
|
|
|
|
echo "Sleeping 120s before next check" | tee -a "$LOG"
|
|
sleep 120
|
|
done
|
|
restartPolicy: Always
|