1
0
mirror of https://github.com/jcwimer/wrestlingApp synced 2026-04-28 18:04:55 +00:00

Made mariadb's statefulsets, simplified the replica logic by used GTID.

This commit is contained in:
2026-04-27 18:54:46 -04:00
parent a031cfb446
commit c1b01f0dac
3 changed files with 145 additions and 189 deletions

View File

@@ -1,160 +0,0 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: mariadb-replica-watcher
labels:
app: wrestlingdev
component: mariadb-watcher
spec:
replicas: 1
selector:
matchLabels:
app: wrestlingdev
component: mariadb-watcher
template:
metadata:
labels:
app: wrestlingdev
component: mariadb-watcher
spec:
containers:
- name: replica-watcher
image: mariadb:10.3
env:
- name: MARIADB_ROOT_PASSWORD
valueFrom:
secretKeyRef:
name: wrestlingdev-secrets
key: dbpassword
- name: MYSQL_REPLICATION_USER
valueFrom:
secretKeyRef:
name: wrestlingdev-secrets
key: replication_user
- name: MYSQL_REPLICATION_PASSWORD
valueFrom:
secretKeyRef:
name: wrestlingdev-secrets
key: replication_password
- name: MASTER_SERVICE_HOST
valueFrom:
secretKeyRef:
name: wrestlingdev-secrets
key: replication_host
- name: REPLICA_SERVICE_HOST
value: "wrestlingdev-mariadb"
- name: DB_NAME
value: "wrestlingdev"
command:
- bash
- -c
- |
set -euo pipefail
LOG=/var/log/replica-watcher.log
echo "replica-watcher starting: $(date -u)" >>"$LOG"
trim() { sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//'; }
get_val() {
grep -m1 -E "^[[:space:]]*$1[[:space:]]*:" \
| sed -E "s/^[[:space:]]*$1[[:space:]]*:[[:space:]]*(.*)$/\1/" \
| tr -d '\r' \
| xargs
}
# initial wait
sleep 120
while true; do
echo "$(date -u) Checking SHOW SLAVE STATUS" | tee -a "$LOG"
SLAVE_RAW=$(mysql --protocol=TCP -h "$REPLICA_SERVICE_HOST" -uroot -p"$MARIADB_ROOT_PASSWORD" -e "SHOW SLAVE STATUS\\G" 2>>"$LOG" || true)
NEED=0
if [ -z "$SLAVE_RAW" ]; then
echo "SHOW SLAVE STATUS is empty (replication not configured / not running) -> will rebootstrap" | tee -a "$LOG"
NEED=1
else
SLAVE_IO=$(echo "$SLAVE_RAW" | get_val "Slave_IO_Running")
SLAVE_SQL=$(echo "$SLAVE_RAW" | get_val "Slave_SQL_Running")
LAST_IO_ERRNO=$(echo "$SLAVE_RAW" | get_val "Last_IO_Errno")
LAST_SQL_ERRNO=$(echo "$SLAVE_RAW" | get_val "Last_SQL_Errno")
LAST_IO_ERR=$(echo "$SLAVE_RAW" | get_val "Last_IO_Error")
LAST_SQL_ERR=$(echo "$SLAVE_RAW" | get_val "Last_SQL_Error")
echo "Slave IO='${SLAVE_IO:-}' Slave SQL='${SLAVE_SQL:-}'" | tee -a "$LOG"
echo "Last_IO_Errno='${LAST_IO_ERRNO:-}' Last_SQL_Errno='${LAST_SQL_ERRNO:-}'" | tee -a "$LOG"
echo "Last_IO_Error='${LAST_IO_ERR:-}' Last_SQL_Error='${LAST_SQL_ERR:-}'" | tee -a "$LOG"
if [ "${SLAVE_IO:-}" = "Yes" ] && [ "${SLAVE_SQL:-}" = "Yes" ] \
&& { [ -z "${LAST_IO_ERR:-}" ] || [ "${LAST_IO_ERR,,}" = "no error" ]; } \
&& { [ -z "${LAST_SQL_ERR:-}" ] || [ "${LAST_SQL_ERR,,}" = "no error" ]; } \
&& { [ -z "${LAST_IO_ERRNO:-}" ] || [ "${LAST_IO_ERRNO:-0}" = "0" ]; } \
&& { [ -z "${LAST_SQL_ERRNO:-}" ] || [ "${LAST_SQL_ERRNO:-0}" = "0" ]; }; then
echo "Both slave threads running and no replication errors -> no action" | tee -a "$LOG"
else
NOT_RUNNING=0
[ "${SLAVE_IO:-No}" != "Yes" ] && NOT_RUNNING=1
[ "${SLAVE_SQL:-No}" != "Yes" ] && NOT_RUNNING=1
HAS_ERROR=0
[ -n "${LAST_IO_ERRNO:-}" ] && [ "${LAST_IO_ERRNO:-0}" != "0" ] && HAS_ERROR=1
[ -n "${LAST_SQL_ERRNO:-}" ] && [ "${LAST_SQL_ERRNO:-0}" != "0" ] && HAS_ERROR=1
ERR_TEXT="$(printf '%s %s' "${LAST_IO_ERR:-}" "${LAST_SQL_ERR:-}" | tr '[:upper:]' '[:lower:]' | trim)"
[ -n "$ERR_TEXT" ] && [ "$ERR_TEXT" != "no error" ] && HAS_ERROR=1
echo "Decision: NOT_RUNNING=$NOT_RUNNING HAS_ERROR=$HAS_ERROR" | tee -a "$LOG"
[ $NOT_RUNNING -eq 1 ] || [ $HAS_ERROR -eq 1 ] && NEED=1 || echo "Threads healthy -> no action" | tee -a "$LOG"
fi
fi
if [ $NEED -eq 1 ]; then
echo "$(date -u) Starting rebootstrap flow" | tee -a "$LOG"
MASTER_STATUS=$(mysql --protocol=TCP -h "$MASTER_SERVICE_HOST" -uroot -p"$MARIADB_ROOT_PASSWORD" -sse "SHOW MASTER STATUS;" 2>>"$LOG" || true)
MASTER_LOG_FILE=$(echo "$MASTER_STATUS" | awk '{print $1}' | trim || true)
MASTER_LOG_POS=$(echo "$MASTER_STATUS" | awk '{print $2}' | trim || true)
if [ -z "$MASTER_LOG_FILE" ] || [ -z "$MASTER_LOG_POS" ]; then
echo "Failed to get master position from $MASTER_SERVICE_HOST" | tee -a "$LOG"
sleep 120; continue
fi
echo "Master position: ${MASTER_LOG_FILE}:${MASTER_LOG_POS}" | tee -a "$LOG"
echo "Stopping slave on replica host" | tee -a "$LOG"
mysql --protocol=TCP -h "$REPLICA_SERVICE_HOST" -uroot -p"$MARIADB_ROOT_PASSWORD" -e "STOP SLAVE;" >>"$LOG" 2>&1 || true
DUMP_FILE="/tmp/${DB_NAME}_backup.sql"
echo "Dumping ${DB_NAME} from master ${MASTER_SERVICE_HOST}" | tee -a "$LOG"
if command -v timeout >/dev/null 2>&1; then
if ! timeout 300 mysqldump --protocol=TCP -h "$MASTER_SERVICE_HOST" -uroot -p"$MARIADB_ROOT_PASSWORD" --single-transaction "$DB_NAME" \
| tee "$DUMP_FILE" >/dev/null 2>>"$LOG"; then
echo "Dump FAILED; aborting this cycle" | tee -a "$LOG"; sleep 120; continue
fi
else
if ! mysqldump --protocol=TCP -h "$MASTER_SERVICE_HOST" -uroot -p"$MARIADB_ROOT_PASSWORD" --single-transaction "$DB_NAME" \
| tee "$DUMP_FILE" >/dev/null 2>>"$LOG"; then
echo "Dump FAILED; aborting this cycle" | tee -a "$LOG"; sleep 120; continue
fi
fi
ls -lh $DUMP_FILE
echo "Ensuring database '$DB_NAME' exists on replica" | tee -a "$LOG"
mysql --protocol=TCP -h "$REPLICA_SERVICE_HOST" -uroot -p"$MARIADB_ROOT_PASSWORD" \
-e "CREATE DATABASE IF NOT EXISTS \`$DB_NAME\`;" >>"$LOG" 2>&1
echo "Importing dump into replica host" | tee -a "$LOG"
if ! cat "$DUMP_FILE" | mysql --protocol=TCP -h "$REPLICA_SERVICE_HOST" -uroot -p"$MARIADB_ROOT_PASSWORD" "$DB_NAME" >>"$LOG" 2>&1; then
echo "Import FAILED; aborting this cycle (replication will not be reconfigured)" | tee -a "$LOG"
sleep 120; continue
fi
echo "Import completed successfully" | tee -a "$LOG"
echo "Reconfiguring replication to ${MASTER_SERVICE_HOST}:${MASTER_LOG_FILE}:${MASTER_LOG_POS}" | tee -a "$LOG"
mysql --protocol=TCP -h "$REPLICA_SERVICE_HOST" -uroot -p"$MARIADB_ROOT_PASSWORD" -e "RESET SLAVE ALL;" >>"$LOG" 2>&1 || true
mysql --protocol=TCP -h "$REPLICA_SERVICE_HOST" -uroot -p"$MARIADB_ROOT_PASSWORD" -e "CHANGE MASTER TO MASTER_HOST='${MASTER_SERVICE_HOST}', MASTER_USER='${MYSQL_REPLICATION_USER}', MASTER_PASSWORD='${MYSQL_REPLICATION_PASSWORD}', MASTER_LOG_FILE='${MASTER_LOG_FILE}', MASTER_LOG_POS=${MASTER_LOG_POS}; START SLAVE;" >>"$LOG" 2>&1 || true
echo "SHOW SLAVE STATUS after rebootstrap:" | tee -a "$LOG"
mysql --protocol=TCP -h "$REPLICA_SERVICE_HOST" -uroot -p"$MARIADB_ROOT_PASSWORD" -e "SHOW SLAVE STATUS\\G" >>"$LOG" 2>&1 || true
fi
echo "Sleeping 120s before next check" | tee -a "$LOG"
sleep 120
done
restartPolicy: Always

View File

@@ -27,17 +27,19 @@ spec:
storage: 20Gi storage: 20Gi
--- ---
apiVersion: apps/v1 apiVersion: apps/v1
kind: Deployment kind: StatefulSet
metadata: metadata:
name: wrestlingdev-mariadb name: wrestlingdev-mariadb
labels: labels:
app: wrestlingdev app: wrestlingdev
spec: spec:
replicas: 1
serviceName: wrestlingdev-mariadb
selector: selector:
matchLabels: matchLabels:
app: wrestlingdev app: wrestlingdev
strategy: updateStrategy:
type: Recreate type: RollingUpdate
template: template:
metadata: metadata:
labels: labels:
@@ -47,6 +49,43 @@ spec:
prometheus.io/port: "9125" prometheus.io/port: "9125"
prometheus.io/scrape: "true" prometheus.io/scrape: "true"
spec: spec:
initContainers:
- name: bootstrap
image: mariadb:10.3
env:
- name: MARIADB_ROOT_PASSWORD
valueFrom:
secretKeyRef:
name: wrestlingdev-secrets
key: dbpassword
- name: MASTER_HOST
valueFrom:
secretKeyRef:
name: wrestlingdev-secrets
key: replication_host
command:
- bash
- -c
- |
if [ -d /var/lib/mysql/mysql ]; then
echo "Data directory already initialized, skipping bootstrap"
exit 0
fi
echo "Fresh data directory — bootstrapping replica from ${MASTER_HOST}"
DBS=$(mysql --protocol=TCP -h "$MASTER_HOST" -uroot -p"$MARIADB_ROOT_PASSWORD" \
-e "SHOW DATABASES;" --skip-column-names \
| grep -Ev '^(information_schema|performance_schema|mysql|sys)$' \
| tr '\n' ' ')
echo "Dumping databases: ${DBS}"
mysqldump --protocol=TCP -h "$MASTER_HOST" -uroot -p"$MARIADB_ROOT_PASSWORD" \
--single-transaction --master-data=2 --gtid --databases $DBS \
> /docker-entrypoint-initdb.d/dump.sql
echo "Bootstrap dump complete"
volumeMounts:
- name: wrestlingdev-mariadb-persistent-storage
mountPath: /var/lib/mysql
- name: init-scripts
mountPath: /docker-entrypoint-initdb.d
containers: containers:
- image: mariadb:10.3 - image: mariadb:10.3
name: mariadb name: mariadb
@@ -56,6 +95,48 @@ spec:
secretKeyRef: secretKeyRef:
name: wrestlingdev-secrets name: wrestlingdev-secrets
key: dbpassword key: dbpassword
- name: MASTER_HOST
valueFrom:
secretKeyRef:
name: wrestlingdev-secrets
key: replication_host
- name: MYSQL_REPLICATION_USER
valueFrom:
secretKeyRef:
name: wrestlingdev-secrets
key: replication_user
- name: MYSQL_REPLICATION_PASSWORD
valueFrom:
secretKeyRef:
name: wrestlingdev-secrets
key: replication_password
lifecycle:
postStart:
exec:
command:
- bash
- -c
- |
for i in $(seq 1 60); do
mysqladmin ping -uroot -p"$MARIADB_ROOT_PASSWORD" --protocol=TCP -h 127.0.0.1 --silent && break
sleep 2
done
SLAVE_STATUS=$(mysql -uroot -p"$MARIADB_ROOT_PASSWORD" -e "SHOW SLAVE STATUS\G" 2>/dev/null)
SLAVE_IO=$(echo "$SLAVE_STATUS" | grep -m1 "Slave_IO_Running" | awk '{print $2}')
SLAVE_SQL=$(echo "$SLAVE_STATUS" | grep -m1 "Slave_SQL_Running" | awk '{print $2}')
if [ "${SLAVE_IO}" = "Yes" ] && [ "${SLAVE_SQL}" = "Yes" ]; then
echo "Replication is already running"
exit 0
fi
mysql -uroot -p"$MARIADB_ROOT_PASSWORD" -e "STOP SLAVE; RESET SLAVE ALL;"
if [ -f /docker-entrypoint-initdb.d/dump.sql ]; then
GTID_POS=$(grep -m1 "SET GLOBAL gtid_slave_pos" /docker-entrypoint-initdb.d/dump.sql | sed "s/.*gtid_slave_pos='\([^']*\)'.*/\1/")
echo "Setting gtid_slave_pos from dump: '${GTID_POS}'"
mysql -uroot -p"$MARIADB_ROOT_PASSWORD" -e "SET GLOBAL gtid_slave_pos='${GTID_POS}';"
fi
mysql -uroot -p"$MARIADB_ROOT_PASSWORD" \
-e "CHANGE MASTER TO MASTER_HOST='${MASTER_HOST}', MASTER_USER='${MYSQL_REPLICATION_USER}', MASTER_PASSWORD='${MYSQL_REPLICATION_PASSWORD}', MASTER_USE_GTID=slave_pos;" \
-e "START SLAVE;"
ports: ports:
- containerPort: 3306 - containerPort: 3306
name: mariadb name: mariadb
@@ -64,6 +145,8 @@ spec:
mountPath: /var/lib/mysql mountPath: /var/lib/mysql
- name: mysettings-config-volume - name: mysettings-config-volume
mountPath: /etc/mysql/mariadb.conf.d mountPath: /etc/mysql/mariadb.conf.d
- name: init-scripts
mountPath: /docker-entrypoint-initdb.d
# resources: # resources:
# limits: # limits:
# memory: "512Mi" # memory: "512Mi"
@@ -180,6 +263,8 @@ spec:
- name: mysettings-config-volume - name: mysettings-config-volume
configMap: configMap:
name: mariadb-mysettings name: mariadb-mysettings
- name: init-scripts
emptyDir: {}
--- ---
apiVersion: v1 apiVersion: v1
kind: ConfigMap kind: ConfigMap
@@ -191,29 +276,44 @@ metadata:
data: data:
70-mysettings.cnf: | 70-mysettings.cnf: |
[mariadb] [mariadb]
# Slow log # Slow query log — records queries taking longer than long_query_time seconds
slow_query_log=1 slow_query_log=1
#slow_query_log_file=/var/log/mariadb/slow.log #slow_query_log_file=/var/log/mariadb/slow.log
slow_query_log_file=/var/lib/mysql/slow.log slow_query_log_file=/var/lib/mysql/slow.log
long_query_time=0.2 long_query_time=0.2
# mysqltunner recommendations # mysqltunner recommendations
# Max size for in-memory temp tables before spilling to disk
tmp_table_size=32M tmp_table_size=32M
max_heap_table_size=32M max_heap_table_size=32M
# Collect detailed query/table statistics (required by some monitoring tools)
performance_schema=ON performance_schema=ON
# Size of each InnoDB redo log file; increase for write-heavy workloads
innodb_log_file_size=32M innodb_log_file_size=32M
# Number of open table handles to cache; reduces overhead of reopening tables
table_open_cache=4000 table_open_cache=4000
# replica settings
server_id=2 # Default server_id, can be overridden for master/slave
log_bin=mysql-bin # Enable binary logging
binlog_format=ROW # Recommended for replication
log_slave_updates=ON # Ensure slaves log updates (useful for multi-source replication)
sync_binlog=1 # Flush binary logs after each transaction for safety
read_only=0 # Default, will be managed by the init script
expire_logs_days=7 # Retain binary logs for 7 days
# if you want to ignore dbs to replicate # Replication (replica)
# replicate-ignore-db=wrestlingtourney-queue # Must be unique and different from the master's server_id
# if you only want to replicate certain dbs server_id=2
# Enable binary logging on the replica (required for log_slave_updates)
log_bin=mysql-bin
# ROW format is safest: records exact row changes rather than SQL statements
binlog_format=ROW
# Write replicated events into this replica's own binlog (needed for chained replicas)
log_slave_updates=ON
# Enforce GTID consistency — rejects transactions that would break GTID sequences
gtid_strict_mode=ON
# Flush binlog to disk on every commit; prevents binlog loss on crash
sync_binlog=1
# Prevent accidental writes directly to the replica
read_only=1
# How many days to retain binary logs before automatic purge
expire_logs_days=7
# Only replicate the application database — rails-specific: excludes the solid_queue DB so
# background job workers can run independently on the replica cluster
replicate-do-db=wrestlingdev replicate-do-db=wrestlingdev
# replicate-ignore-db=wrestlingtourney-queue
# /etc/mysql/mariadb.conf.d/70-mysettings.cnf # /etc/mysql/mariadb.conf.d/70-mysettings.cnf

View File

@@ -27,17 +27,19 @@ spec:
storage: 20Gi storage: 20Gi
--- ---
apiVersion: apps/v1 apiVersion: apps/v1
kind: Deployment kind: StatefulSet
metadata: metadata:
name: wrestlingdev-mariadb name: wrestlingdev-mariadb
labels: labels:
app: wrestlingdev app: wrestlingdev
spec: spec:
replicas: 1
serviceName: wrestlingdev-mariadb
selector: selector:
matchLabels: matchLabels:
app: wrestlingdev app: wrestlingdev
strategy: updateStrategy:
type: Recreate type: RollingUpdate
template: template:
metadata: metadata:
labels: labels:
@@ -227,25 +229,39 @@ metadata:
data: data:
70-mysettings.cnf: | 70-mysettings.cnf: |
[mariadb] [mariadb]
# Slow log # Slow query log — records queries taking longer than long_query_time seconds
slow_query_log=1 slow_query_log=1
#slow_query_log_file=/var/log/mariadb/slow.log #slow_query_log_file=/var/log/mariadb/slow.log
slow_query_log_file=/var/lib/mysql/slow.log slow_query_log_file=/var/lib/mysql/slow.log
long_query_time=0.2 long_query_time=0.2
# mysqltunner recommendations # mysqltunner recommendations
# Max size for in-memory temp tables before spilling to disk
tmp_table_size=32M tmp_table_size=32M
max_heap_table_size=32M max_heap_table_size=32M
# Collect detailed query/table statistics (required by some monitoring tools)
performance_schema=ON performance_schema=ON
# Size of each InnoDB redo log file; increase for write-heavy workloads
innodb_log_file_size=32M innodb_log_file_size=32M
# Number of open table handles to cache; reduces overhead of reopening tables
table_open_cache=4000 table_open_cache=4000
# How many days to retain general error/slow logs
expire_logs_days=7 expire_logs_days=7
# master slave # Replication (master)
server_id=1 # Unique server ID for the master # Unique ID for this server across the whole replication topology
log_bin=mysql-bin # Enable binary logging server_id=1
binlog_format=ROW # Recommended format for replication (ROW, STATEMENT, or MIXED) # Enable binary logging — required for replication
log_slave_updates=ON # Ensure any changes replicated to the master are also logged to the binary log (useful for multi-source replication) log_bin=mysql-bin
sync_binlog=1 # Ensures binary logs are synchronized with disk after each transaction for data safety # ROW format is safest: records exact row changes rather than SQL statements
expire_logs_days=7 # Optional: Number of days to retain binary logs (helps with cleanup) binlog_format=ROW
# Include replicated events in this server's own binlog (needed for chained replicas)
# /etc/mysql/mariadb.conf.d/70-mysettings.cnf log_slave_updates=ON
# Enforce GTID consistency — rejects transactions that would break GTID sequences
gtid_strict_mode=ON
# Flush binlog to disk on every commit; prevents binlog loss on crash
sync_binlog=1
# How many days to retain binary logs before automatic purge
expire_logs_days=7
# /etc/mysql/mariadb.conf.d/70-mysettings.cnf is included by the main config and will override any conflicting settings in the default config files. This allows us to customize settings without modifying the base image.