#!/usr/bin/env bash # ============================================================================= # run-integration.sh — Staging VM Integration Test Suite # # Verifies Docker daemon, compose stack, and service endpoint health. # Designed to run inside the staging VM as part of CI/CD pipeline. # # Usage: # ./tests/run-integration.sh # all defaults # ./tests/run-integration.sh --verbose # detailed output # ./tests/run-integration.sh --list-services # print detected services and exit # # Environment variables (all optional): # COMPOSE_DIR Path to compose service directories (default: /opt/infra/compose) # COMPOSE_PROJECT Docker Compose project name (default: staging) # STAGING_DOMAIN Base domain for health checks (default: staging.lazyworkhorse.net) # SERVICE_LIST Space-separated service dirs to check (default: auto-detect) # HEALTH_URLS Space-separated URLs for health checks (default: auto-detect from SERVICE_LIST) # HEALTH_TIMEOUT Curl timeout per check (seconds) (default: 5) # HEALTH_RETRIES Number of retries per endpoint (default: 1) # HEALTH_INTERVAL Seconds between retries (default: 2) # ============================================================================= set -euo pipefail # ---- Colors for readable output ---- RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[1;33m' CYAN='\033[0;36m' BOLD='\033[1m' NC='\033[0m' # No Color # ---- Configuration (all env-overridable) ---- COMPOSE_DIR="${COMPOSE_DIR:-/opt/infra/compose}" COMPOSE_PROJECT="${COMPOSE_PROJECT:-staging}" STAGING_DOMAIN="${STAGING_DOMAIN:-staging.lazyworkhorse.net}" HEALTH_TIMEOUT="${HEALTH_TIMEOUT:-5}" HEALTH_RETRIES="${HEALTH_RETRIES:-1}" HEALTH_INTERVAL="${HEALTH_INTERVAL:-2}" # Known compose service directories in order — override via SERVICE_LIST env var DEFAULT_SERVICES=( network authentification homepage ai cloudstorage versioncontrol backup coms finance homeautomation passwordmanager ) # Map service directory -> default health check URL (relative to STAGING_DOMAIN) # Override entirely via HEALTH_URLS env var. declare -A DEFAULT_HEALTH_URLS DEFAULT_HEALTH_URLS[network]="https://traefik.${STAGING_DOMAIN}/ping" DEFAULT_HEALTH_URLS[authentification]="https://auth.${STAGING_DOMAIN}/api/verify" DEFAULT_HEALTH_URLS[homepage]="https://${STAGING_DOMAIN}/" DEFAULT_HEALTH_URLS[ai]="https://hermes.${STAGING_DOMAIN}/health" DEFAULT_HEALTH_URLS[cloudstorage]="https://cloud.${STAGING_DOMAIN}/status.php" DEFAULT_HEALTH_URLS[versioncontrol]="https://code.${STAGING_DOMAIN}/api/healthz" # ---- Trackers ---- PASS_COUNT=0 FAIL_COUNT=0 WARN_COUNT=0 FAILURES=() # ---- Helpers ---- log_info() { echo -e "${CYAN}[INFO]${NC} $*"; } log_pass() { echo -e "${GREEN}[PASS]${NC} $*"; ((PASS_COUNT++)); } log_fail() { echo -e "${RED}[FAIL]${NC} $*"; ((FAIL_COUNT++)); FAILURES+=("$*"); } log_warn() { echo -e "${YELLOW}[WARN]${NC} $*"; ((WARN_COUNT++)); } log_step() { echo -e "\n${BOLD}── $* ──${NC}"; } log_raw() { echo -e " $*"; } # Check if a command exists require_cmd() { if ! command -v "$1" &>/dev/null; then log_fail "Required command not found: $1" return 1 fi } # Retry a command with exponential-like backoff retry() { local cmd="$*" local attempt=0 local max_attempts=$((HEALTH_RETRIES + 1)) local result while [[ $attempt -lt $max_attempts ]]; do if eval "$cmd" 2>/dev/null; then return 0 fi attempt=$((attempt + 1)) if [[ $attempt -lt $max_attempts ]]; then sleep "$HEALTH_INTERVAL" fi done return 1 } # ---- Parse arguments ---- VERBOSE=false LIST_SERVICES=false POSITIONAL=() while [[ $# -gt 0 ]]; do case "$1" in --verbose|-v) VERBOSE=true; shift ;; --list-services) LIST_SERVICES=true; shift ;; --) shift; POSITIONAL+=("$@"); break ;; *) POSITIONAL+=("$1"); shift ;; esac done set -- "${POSITIONAL[@]}" # Resolve service list if [[ -n "${SERVICE_LIST:-}" ]]; then IFS=' ' read -ra SERVICES <<< "$SERVICE_LIST" else SERVICES=("${DEFAULT_SERVICES[@]}") fi # Resolve health URLs — default map with overrides from env declare -A HEALTH_URLS if [[ -n "${HEALTH_URLS:-}" ]]; then # User-supplied mapping: "network=https://... authentification=https://..." for pair in $HEALTH_URLS; do key="${pair%%=*}" val="${pair#*=}" HEALTH_URLS["$key"]="$val" done else for svc in "${SERVICES[@]}"; do if [[ -n "${DEFAULT_HEALTH_URLS[$svc]:-}" ]]; then HEALTH_URLS["$svc"]="${DEFAULT_HEALTH_URLS[$svc]}" fi done fi # --list-services mode (for CI integration) if $LIST_SERVICES; then echo "Configured services:" for svc in "${SERVICES[@]}"; do url="${HEALTH_URLS[$svc]:-no-health-check}" echo " $svc -> $url" done exit 0 fi # ---- Pre-flight ---- echo -e "${BOLD}============================================${NC}" echo -e "${BOLD} Staging VM Integration Test Suite${NC}" echo -e "${BOLD} $(date -u '+%Y-%m-%dT%H:%M:%SZ')${NC}" echo -e "${BOLD}============================================${NC}" # ---- Phase 1: Prerequisites ---- log_step "Phase 1: Prerequisites" PREREQ_OK=true for cmd in docker curl jq; do if ! require_cmd "$cmd"; then PREREQ_OK=false fi done $PREREQ_OK && log_pass "All required commands available" || log_fail "Missing prerequisites" # ---- Phase 2: Docker daemon ---- log_step "Phase 2: Docker Daemon" if docker info --format '{{.ServerVersion}}' &>/dev/null; then DOCKER_VERSION=$(docker info --format '{{.ServerVersion}}' 2>/dev/null) log_pass "Docker daemon is running (version: $DOCKER_VERSION)" if docker info --format '{{.Driver}}' 2>/dev/null | grep -qi "overlay"; then log_pass "Storage driver: overlay" else log_warn "Non-overlay storage driver detected" fi else log_fail "Docker daemon is NOT running or not accessible" fi # ---- Phase 3: Docker Compose stack ---- log_step "Phase 3: Compose Stack Status" # Check if any compose files exist COMPOSE_FILES=() for svc in "${SERVICES[@]}"; do cf="${COMPOSE_DIR}/${svc}/compose.yml" if [[ -f "$cf" ]]; then COMPOSE_FILES+=("$cf") else cf2="${COMPOSE_DIR}/${svc}/docker-compose.yml" if [[ -f "$cf2" ]]; then COMPOSE_FILES+=("$cf2") else log_warn "No compose file found for service '$svc' (expected: ${cf})" fi fi done if [[ ${#COMPOSE_FILES[@]} -eq 0 ]]; then log_fail "No compose files found under COMPOSE_DIR=${COMPOSE_DIR}" log_info "Skipping stack checks" else log_info "Found ${#COMPOSE_FILES[@]} compose file(s) in ${COMPOSE_DIR}" # Build the compose file args COMPOSE_CMD="docker compose -p ${COMPOSE_PROJECT}" for cf in "${COMPOSE_FILES[@]}"; do COMPOSE_CMD+=" -f ${cf}" done log_info "Project name: ${COMPOSE_PROJECT}" # Check stack ps if $VERBOSE; then log_raw "--- docker compose ps output ---" eval "$COMPOSE_CMD ps" 2>&1 | while IFS= read -r line; do log_raw "$line"; done log_raw "--- end ---" fi # Get all services and their status if STACK_STATUS=$(eval "$COMPOSE_CMD ps --format '{{.Name}}\t{{.Status}}'" 2>/dev/null); then if [[ -z "$STACK_STATUS" ]]; then log_warn "Stack exists but no running services — VM may be freshly provisioned" else ALL_RUNNING=true RUNNING_COUNT=0 TOTAL_COUNT=0 while IFS=$'\t' read -r name status; do TOTAL_COUNT=$((TOTAL_COUNT + 1)) status_lower=$(echo "$status" | tr '[:upper:]' '[:lower:]') if echo "$status_lower" | grep -qE '^(up|running|healthy)'; then RUNNING_COUNT=$((RUNNING_COUNT + 1)) $VERBOSE && log_pass " $name — $status" else ALL_RUNNING=false log_warn " $name — $status (not healthy)" fi done <<< "$STACK_STATUS" if [[ "$TOTAL_COUNT" -eq 0 ]]; then log_fail "No services found in compose project" elif $ALL_RUNNING && [[ "$TOTAL_COUNT" -eq "$RUNNING_COUNT" ]]; then log_pass "All ${TOTAL_COUNT} service(s) running (${RUNNING_COUNT}/${TOTAL_COUNT})" else log_fail "${RUNNING_COUNT}/${TOTAL_COUNT} service(s) running — some services are down" fi fi else log_fail "Failed to query compose stack status" fi fi # ---- Phase 4: Service health checks ---- log_step "Phase 4: Service Endpoint Health Checks" ENDPOINT_CHECKS=0 ENDPOINT_PASS=0 for svc in "${SERVICES[@]}"; do url="${HEALTH_URLS[$svc]:-}" if [[ -z "$url" ]]; then $VERBOSE && log_info "No health check URL for service '$svc' — skipping" continue fi ENDPOINT_CHECKS=$((ENDPOINT_CHECKS + 1)) echo -ne " Checking ${svc} ... " # Perform the HTTP health check with retries if retry "curl -sf -o /dev/null -w '%{http_code}' --max-time ${HEALTH_TIMEOUT} '${url}' 2>/dev/null"; then HTTP_CODE=$(curl -sf -o /dev/null -w '%{http_code}' --max-time "${HEALTH_TIMEOUT}" "${url}" 2>/dev/null || true) ENDPOINT_PASS=$((ENDPOINT_PASS + 1)) echo -e "${GREEN}OK${NC} (HTTP ${HTTP_CODE})" else LAST_CODE=$(curl -s -o /dev/null -w '%{http_code}' --max-time "${HEALTH_TIMEOUT}" "${url}" 2>/dev/null || echo "000") echo -e "${RED}FAIL${NC} (HTTP ${LAST_CODE})" log_fail "Health check failed for ${svc} @ ${url}" fi done if [[ $ENDPOINT_CHECKS -eq 0 ]]; then log_warn "No health check URLs configured — skipping endpoint phase" elif [[ $ENDPOINT_PASS -eq $ENDPOINT_CHECKS ]]; then log_pass "All ${ENDPOINT_CHECKS} endpoint(s) healthy" else log_fail "${ENDPOINT_PASS}/${ENDPOINT_CHECKS} endpoint(s) healthy" fi # ---- Phase 5: Docker system sanity ---- log_step "Phase 5: Docker System Sanity" # Check disk space for Docker DOCKER_ROOT=$(docker info --format '{{.DockerRootDir}}' 2>/dev/null || echo "/var/lib/docker") log_info "Docker root: ${DOCKER_ROOT}" if command -v df &>/dev/null && [[ -d "$DOCKER_ROOT" ]]; then AVAIL_PCT=$(df -h "$DOCKER_ROOT" | awk 'NR==2 {print $5}' | tr -d '%') if [[ -n "$AVAIL_PCT" ]]; then if [[ "$AVAIL_PCT" -ge 90 ]]; then log_warn "Docker storage is ${AVAIL_PCT}% full — consider cleanup" else log_pass "Docker storage at ${AVAIL_PCT}% — within limits" fi fi fi # Check for dangling images DANGLING=$(docker images -f "dangling=true" -q 2>/dev/null | wc -l) if [[ "$DANGLING" -gt 10 ]]; then log_warn "${DANGLING} dangling images found — consider docker image prune" fi # ---- Summary ---- echo "" echo -e "${BOLD}============================================${NC}" echo -e "${BOLD} Test Summary${NC}" echo -e "${BOLD} $(date -u '+%Y-%m-%dT%H:%M:%SZ')${NC}" echo -e "${BOLD}============================================${NC}" echo -e " ${GREEN}Passed:${NC} ${PASS_COUNT}" echo -e " ${RED}Failed:${NC} ${FAIL_COUNT}" echo -e " ${YELLOW}Warnings:${NC} ${WARN_COUNT}" if [[ ${#FAILURES[@]} -gt 0 ]]; then echo -e "\n${BOLD}Failed checks:${NC}" for f in "${FAILURES[@]}"; do echo -e " ${RED}•${NC} $f" done fi echo "" if [[ $FAIL_COUNT -eq 0 ]]; then echo -e "${GREEN}${BOLD}✓ All integration checks passed${NC}" exit 0 else echo -e "${RED}${BOLD}✗ ${FAIL_COUNT} integration check(s) failed${NC}" exit 1 fi