fix: replace curl with Docker health status and wget for health check

The health check used curl which is not installed on the prod server.
Replace with a dual approach:
1. Primary: check Docker's own container health status (already running
   via docker-compose.prod.yml healthcheck with wget inside container)
2. Secondary: wget from host as fallback signal

Also add diagnostic logging (container status + recent backend logs)
before triggering rollback on health check failure.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-04-09 09:22:28 -04:00
parent 2aad137bd7
commit 3e7463cf46

View File

@@ -359,34 +359,42 @@ fi
# ====================================================================
echo ""
log "--- Step 5/6: Verifying application health ---"
log "Waiting ${HEALTH_START_WAIT}s for backend to initialize ..."
log "Waiting ${HEALTH_START_WAIT}s for backend to initialize (matches Docker start_period) ..."
sleep "$HEALTH_START_WAIT"
# Primary check: Docker's own container health status
# (docker-compose.prod.yml already defines a healthcheck using wget inside the container)
HEALTHY=false
for ((i=1; i<=HEALTH_RETRIES; i++)); do
if curl -sf "$HEALTH_URL" >/dev/null 2>&1; then
CONTAINER_HEALTH=$($COMPOSE_CMD ps backend --format '{{.Health}}' 2>/dev/null || echo "unknown")
if [ "$CONTAINER_HEALTH" = "healthy" ]; then
HEALTHY=true
break
fi
log " Health check attempt $i/$HEALTH_RETRIES failed, retrying in ${HEALTH_INTERVAL}s ..."
# Also try a direct HTTP check from the host as a secondary signal
# Use wget (available on Ubuntu) since curl may not be installed
if wget -qO- --timeout=5 "$HEALTH_URL" >/dev/null 2>&1; then
HEALTHY=true
break
fi
log " Health check attempt $i/$HEALTH_RETRIES — container status: ${CONTAINER_HEALTH}, retrying in ${HEALTH_INTERVAL}s ..."
sleep "$HEALTH_INTERVAL"
done
if [ "$HEALTHY" = true ]; then
ok "Backend is healthy and responding at $HEALTH_URL"
ok "Backend is healthy and responding"
else
# Log diagnostics before triggering rollback
err "Backend failed to respond after $((HEALTH_START_WAIT + HEALTH_RETRIES * HEALTH_INTERVAL))s"
warn "Container status: $($COMPOSE_CMD ps backend 2>/dev/null || echo 'unknown')"
warn "Recent backend logs:"
$COMPOSE_CMD logs --tail=20 backend 2>/dev/null || true
err "Triggering automatic rollback ..."
exit 1 # trap will handle rollback
fi
# Also verify the container reports healthy via Docker
if $COMPOSE_CMD ps backend 2>/dev/null | grep -q "healthy"; then
ok "Backend container health check: healthy"
else
warn "Backend container health status is not 'healthy' yet (may still be within start_period)"
fi
# ====================================================================
# STEP 7: Post-upgrade database backup
# ====================================================================