Files
fedora-infra_ansible/roles/openshift-apps/forgejo/templates/valkey-configmap.yaml.j2
David Kirwan dd1d73e8e7 forgejo: debugging valkey configuration
Signed-off-by: David Kirwan <davidkirwanirl@gmail.com>
2025-11-11 10:32:09 +00:00

130 lines
4.6 KiB
Django/Jinja

---
apiVersion: v1
kind: ConfigMap
metadata:
name: valkey
namespace: forgejo
data:
valkey.conf: |
protected-mode no
cluster-enabled yes
cluster-config-file nodes.conf
cluster-node-timeout 5000
appendonly yes
port 6379
init-config.sh: |
#!/bin/sh
cp /etc/valkey/valkey.conf /tmp/valkey.conf
echo "requirepass ${VALKEY_PASSWORD}" >> /tmp/valkey.conf
echo "masterauth ${VALKEY_PASSWORD}" >> /tmp/valkey.conf
echo "cluster-announce-ip ${POD_IP}" >> /tmp/valkey.conf
cp /tmp/valkey.conf /config/valkey.conf
#mkdir -p /data
#chown -R 1000:1000 /data
init-cluster.sh: |
#!/bin/sh
set -e
# === Wait for local Valkey to be ready ================================
until valkey-cli -h localhost -p 6379 -a "${VALKEY_PASSWORD}" ping >/dev/null 2>&1; do
echo "[$(hostname)] Waiting for local Valkey to start..."
sleep 2
done
echo "[$(hostname)] Local Valkey is ready"
ORDINAL=$(hostname | rev | cut -d'-' -f1 | rev)
TOTAL_NODES=6
PRIMARIES=$(( (TOTAL_NODES + 1) / 2 ))
REPLICAS_PER_PRIMARY=$(( (TOTAL_NODES - PRIMARIES) / PRIMARIES ))
# === Only primary-0 creates the cluster ===============================
if [ "$ORDINAL" -ne 0 ]; then
echo "[$(hostname)] Not primary-0 (ordinal $ORDINAL). Skipping cluster creation."
exit 0
fi
# === Wait for ALL nodes to be reachable ================================
echo "[$(hostname)] Waiting for all $TOTAL_NODES nodes to be reachable..."
for i in $(seq 0 $((TOTAL_NODES - 1))); do
HOST="valkey-${i}.valkey.forgejo.svc.cluster.local"
until valkey-cli -h "$HOST" -p 6379 -a "${VALKEY_PASSWORD}" ping >/dev/null 2>&1; do
echo " → Waiting for $HOST..."
sleep 3
done
echo " ✓ $HOST is reachable"
done
# === Check if cluster is already healthy ===============================
if valkey-cli -a "${VALKEY_PASSWORD}" cluster info 2>/dev/null | grep -q "cluster_known_nodes:$TOTAL_NODES"; then
if valkey-cli -a "${VALKEY_PASSWORD}" cluster info | grep -q "cluster_state:ok"; then
echo "[$(hostname)] Cluster already healthy with $TOTAL_NODES nodes"
exit 0
fi
fi
# === Build node list ===================================================
NODES=""
for i in $(seq 0 $((TOTAL_NODES - 1))); do
HOST="valkey-${i}.valkey.forgejo.svc.cluster.local"
NODES="$NODES $HOST:6379"
done
echo "[$(hostname)] Creating cluster:"
echo " Primaries: $PRIMARIES"
echo " Replicas per primary: $REPLICAS_PER_PRIMARY"
echo " Nodes: $NODES"
# === Retry cluster creation up to 5 times ==============================
for attempt in $(seq 1 5); do
echo "[$(hostname)] Attempt $attempt to create cluster..."
if valkey-cli -a "${VALKEY_PASSWORD}" \
--cluster create $NODES \
--cluster-replicas $REPLICAS_PER_PRIMARY \
--cluster-yes; then
echo "[$(hostname)] Cluster created successfully"
break
else
echo "[$(hostname)] Failed. Retrying in 10s..."
sleep 10
fi
[ $attempt -eq 5 ] && echo "[$(hostname)] All attempts failed" && exit 1
done
# === Final health check ================================================
sleep 5
if valkey-cli -a "${VALKEY_PASSWORD}" cluster info | grep -q "cluster_state:ok"; then
echo "[$(hostname)] CLUSTER IS HEALTHY — ALL 16384 SLOTS COVERED"
else
echo "[$(hostname)] ERROR: Cluster not OK"
valkey-cli -a "${VALKEY_PASSWORD}" cluster nodes
exit 1
fi
# === NON-PRIMARY NODES: Join the cluster ===============================
if [ "$ORDINAL" -ge "$PRIMARIES" ]; then
# Replica N joins primary (N - PRIMARIES)
PRIMARY_IDX=$(( ORDINAL - PRIMARIES ))
PRIMARY_HOST="valkey-${PRIMARY_IDX}.valkey.forgejo.svc.cluster.local"
echo "[$(hostname)] Replica joining primary $PRIMARY_IDX ($PRIMARY_HOST)"
until valkey-cli -h "${PRIMARY_HOST}" -p 6379 -a "${VALKEY_PASSWORD}" ping >/dev/null 2>&1; do
sleep 5
done
valkey-cli -a "${VALKEY_PASSWORD}" \
--cluster add-node "${POD_IP}:6379" "${PRIMARY_HOST}:6379" --cluster-slave
elif [ "$ORDINAL" -gt 0 ] && [ "$ORDINAL" -lt "$PRIMARIES" ]; then
# Additional primary joins via primary-0
echo "[$(hostname)] Primary joining via valkey-0"
until valkey-cli -h valkey-0.valkey.forgejo.svc.cluster.local \
-p 6379 -a "${VALKEY_PASSWORD}" ping >/dev/null 2>&1; do
sleep 5
done
valkey-cli -a "${VALKEY_PASSWORD}" \
--cluster add-node "${POD_IP}:6379" valkey-0.valkey.forgejo.svc.cluster.local:6379
fi