fix: replace curl with Docker health status and wget for health check

The health check used curl which is not installed on the prod server. Replace with a dual approach: 1. Primary: check Docker's own container health status (already running via docker-compose.prod.yml healthcheck with wget inside container) 2. Secondary: wget from host as fallback signal Also add diagnostic logging (container status + recent backend logs) before triggering rollback on health check failure. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-04-09 09:22:28 -04:00
parent 2aad137bd7
commit 3e7463cf46
1 changed files with 19 additions and 11 deletions
--- a/scripts/deploy-prod.sh
+++ b/scripts/deploy-prod.sh
@@ -359,34 +359,42 @@ fi
 # ====================================================================
 echo ""
 log "--- Step 5/6: Verifying application health ---"
-log "Waiting ${HEALTH_START_WAIT}s for backend to initialize ..."
+log "Waiting ${HEALTH_START_WAIT}s for backend to initialize (matches Docker start_period) ..."
 sleep "$HEALTH_START_WAIT"
 # Primary check: Docker's own container health status
 # (docker-compose.prod.yml already defines a healthcheck using wget inside the container)
 HEALTHY=false
 for ((i=1; i<=HEALTH_RETRIES; i++)); do
-  if curl -sf "$HEALTH_URL" >/dev/null 2>&1; then
+  CONTAINER_HEALTH=$($COMPOSE_CMD ps backend --format '{{.Health}}' 2>/dev/null || echo "unknown")
  if [ "$CONTAINER_HEALTH" = "healthy" ]; then
    HEALTHY=true
    break
  fi
-  log "  Health check attempt $i/$HEALTH_RETRIES failed, retrying in ${HEALTH_INTERVAL}s ..."
+
  # Also try a direct HTTP check from the host as a secondary signal
  # Use wget (available on Ubuntu) since curl may not be installed
  if wget -qO- --timeout=5 "$HEALTH_URL" >/dev/null 2>&1; then
    HEALTHY=true
    break
  fi
  log "  Health check attempt $i/$HEALTH_RETRIES — container status: ${CONTAINER_HEALTH}, retrying in ${HEALTH_INTERVAL}s ..."
  sleep "$HEALTH_INTERVAL"
 done
 if [ "$HEALTHY" = true ]; then
-  ok "Backend is healthy and responding at $HEALTH_URL"
+  ok "Backend is healthy and responding"
 else
  # Log diagnostics before triggering rollback
  err "Backend failed to respond after $((HEALTH_START_WAIT + HEALTH_RETRIES * HEALTH_INTERVAL))s"
  warn "Container status: $($COMPOSE_CMD ps backend 2>/dev/null || echo 'unknown')"
  warn "Recent backend logs:"
  $COMPOSE_CMD logs --tail=20 backend 2>/dev/null || true
  err "Triggering automatic rollback ..."
  exit 1  # trap will handle rollback
 fi
 # Also verify the container reports healthy via Docker
 if $COMPOSE_CMD ps backend 2>/dev/null | grep -q "healthy"; then
  ok "Backend container health check: healthy"
 else
  warn "Backend container health status is not 'healthy' yet (may still be within start_period)"
 fi
 # ====================================================================
 #  STEP 7: Post-upgrade database backup
 # ====================================================================