Compare commits
5 Commits
feature-de
...
4df796e977
| Author | SHA1 | Date | |
|---|---|---|---|
| 4df796e977 | |||
| a7e3f80eda | |||
| cefcc296fb | |||
| f5bea7cdc2 | |||
| 5144da4680 |
@@ -40,9 +40,9 @@ DB_USER="${POSTGRES_USER:-hoafinance}"
|
||||
DB_NAME="${POSTGRES_DB:-hoafinance}"
|
||||
MIGRATION_DIR="$PROJECT_DIR/db/migrations"
|
||||
HEALTH_URL="http://localhost:3000/api"
|
||||
HEALTH_RETRIES=36
|
||||
HEALTH_RETRIES=20
|
||||
HEALTH_INTERVAL=5
|
||||
HEALTH_START_WAIT=10
|
||||
HEALTH_START_WAIT=30
|
||||
LOG_DIR="$PROJECT_DIR/logs"
|
||||
LOG_FILE="$LOG_DIR/deploy-$(date +%Y%m%d_%H%M%S).log"
|
||||
|
||||
@@ -359,47 +359,38 @@ fi
|
||||
# ====================================================================
|
||||
echo ""
|
||||
log "--- Step 5/6: Verifying application health ---"
|
||||
|
||||
# After a fresh image build, NestJS cold-start can take 2-3 minutes:
|
||||
# New Relic init → TypeORM connections → Redis → BullMQ → NestJS bootstrap
|
||||
# Docker's own healthcheck (start_period:30s + 3×15s retries = ~75s) is too
|
||||
# aggressive and will mark the container "unhealthy" before the app finishes
|
||||
# booting. So we do NOT rely on Docker's health status — we probe the HTTP
|
||||
# endpoint directly from the host and give it up to ~3 minutes total.
|
||||
TOTAL_WAIT=$((HEALTH_START_WAIT + HEALTH_RETRIES * HEALTH_INTERVAL))
|
||||
log "Will wait up to ${TOTAL_WAIT}s for backend to respond at $HEALTH_URL ..."
|
||||
log "Waiting ${HEALTH_START_WAIT}s for backend to initialize (matches Docker start_period) ..."
|
||||
sleep "$HEALTH_START_WAIT"
|
||||
|
||||
# Primary check: Docker's own container health status
|
||||
# (docker-compose.prod.yml already defines a healthcheck using wget inside the container)
|
||||
HEALTHY=false
|
||||
for ((i=1; i<=HEALTH_RETRIES; i++)); do
|
||||
# Direct HTTP check from the host using wget (available on Ubuntu)
|
||||
CONTAINER_HEALTH=$($COMPOSE_CMD ps backend --format '{{.Health}}' 2>/dev/null || echo "unknown")
|
||||
if [ "$CONTAINER_HEALTH" = "healthy" ]; then
|
||||
HEALTHY=true
|
||||
break
|
||||
fi
|
||||
|
||||
# Also try a direct HTTP check from the host as a secondary signal
|
||||
# Use wget (available on Ubuntu) since curl may not be installed
|
||||
if wget -qO- --timeout=5 "$HEALTH_URL" >/dev/null 2>&1; then
|
||||
HEALTHY=true
|
||||
break
|
||||
fi
|
||||
|
||||
# Also check Docker's container health for informational logging
|
||||
CONTAINER_HEALTH=$($COMPOSE_CMD ps backend --format '{{.Health}}' 2>/dev/null || echo "unknown")
|
||||
|
||||
# If the container exited or was removed, fail immediately — no point waiting
|
||||
CONTAINER_STATUS=$($COMPOSE_CMD ps backend --format '{{.Status}}' 2>/dev/null || echo "unknown")
|
||||
if echo "$CONTAINER_STATUS" | grep -qi "exit\|dead\|removed"; then
|
||||
err "Backend container has stopped unexpectedly: $CONTAINER_STATUS"
|
||||
break
|
||||
fi
|
||||
|
||||
log " Health check attempt $i/$HEALTH_RETRIES — docker: ${CONTAINER_HEALTH}, retrying in ${HEALTH_INTERVAL}s ..."
|
||||
log " Health check attempt $i/$HEALTH_RETRIES — container status: ${CONTAINER_HEALTH}, retrying in ${HEALTH_INTERVAL}s ..."
|
||||
sleep "$HEALTH_INTERVAL"
|
||||
done
|
||||
|
||||
if [ "$HEALTHY" = true ]; then
|
||||
ok "Backend is healthy and responding at $HEALTH_URL"
|
||||
ok "Backend is healthy and responding"
|
||||
else
|
||||
# Log diagnostics before triggering rollback
|
||||
err "Backend failed to respond after ${TOTAL_WAIT}s"
|
||||
err "Backend failed to respond after $((HEALTH_START_WAIT + HEALTH_RETRIES * HEALTH_INTERVAL))s"
|
||||
warn "Container status: $($COMPOSE_CMD ps backend 2>/dev/null || echo 'unknown')"
|
||||
warn "Recent backend logs:"
|
||||
$COMPOSE_CMD logs --tail=30 backend 2>/dev/null || true
|
||||
$COMPOSE_CMD logs --tail=20 backend 2>/dev/null || true
|
||||
err "Triggering automatic rollback ..."
|
||||
exit 1 # trap will handle rollback
|
||||
fi
|
||||
|
||||
Reference in New Issue
Block a user