Some checks failed
Build and test NixOS config / build (pull_request) Has been cancelled
Replace the stub placeholder with a comprehensive integration test script that verifyies Docker daemon, compose stack, and service endpoint health. All configuration via environment variables with sensible defaults. Changes: - tests/run-integration.sh: 5-phase test suite with color output, retry logic, env-var configuration, and CI-friendly exit codes - .gitea/workflows/build-nixos.yml: update CI step to document pr-test-vm usage with the new test script See also: pr-test-vm helper in modules/nixos/services/staging-vm.nix
348 lines
11 KiB
Bash
Executable File
348 lines
11 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
# =============================================================================
|
|
# run-integration.sh — Staging VM Integration Test Suite
|
|
#
|
|
# Verifies Docker daemon, compose stack, and service endpoint health.
|
|
# Designed to run inside the staging VM as part of CI/CD pipeline.
|
|
#
|
|
# Usage:
|
|
# ./tests/run-integration.sh # all defaults
|
|
# ./tests/run-integration.sh --verbose # detailed output
|
|
# ./tests/run-integration.sh --list-services # print detected services and exit
|
|
#
|
|
# Environment variables (all optional):
|
|
# COMPOSE_DIR Path to compose service directories (default: /opt/infra/compose)
|
|
# COMPOSE_PROJECT Docker Compose project name (default: staging)
|
|
# STAGING_DOMAIN Base domain for health checks (default: staging.lazyworkhorse.net)
|
|
# SERVICE_LIST Space-separated service dirs to check (default: auto-detect)
|
|
# HEALTH_URLS Space-separated URLs for health checks (default: auto-detect from SERVICE_LIST)
|
|
# HEALTH_TIMEOUT Curl timeout per check (seconds) (default: 5)
|
|
# HEALTH_RETRIES Number of retries per endpoint (default: 1)
|
|
# HEALTH_INTERVAL Seconds between retries (default: 2)
|
|
# =============================================================================
|
|
|
|
set -euo pipefail
|
|
|
|
# ---- Colors for readable output ----
|
|
RED='\033[0;31m'
|
|
GREEN='\033[0;32m'
|
|
YELLOW='\033[1;33m'
|
|
CYAN='\033[0;36m'
|
|
BOLD='\033[1m'
|
|
NC='\033[0m' # No Color
|
|
|
|
# ---- Configuration (all env-overridable) ----
|
|
COMPOSE_DIR="${COMPOSE_DIR:-/opt/infra/compose}"
|
|
COMPOSE_PROJECT="${COMPOSE_PROJECT:-staging}"
|
|
STAGING_DOMAIN="${STAGING_DOMAIN:-staging.lazyworkhorse.net}"
|
|
HEALTH_TIMEOUT="${HEALTH_TIMEOUT:-5}"
|
|
HEALTH_RETRIES="${HEALTH_RETRIES:-1}"
|
|
HEALTH_INTERVAL="${HEALTH_INTERVAL:-2}"
|
|
|
|
# Known compose service directories in order — override via SERVICE_LIST env var
|
|
DEFAULT_SERVICES=(
|
|
network
|
|
authentification
|
|
homepage
|
|
ai
|
|
cloudstorage
|
|
versioncontrol
|
|
backup
|
|
coms
|
|
finance
|
|
homeautomation
|
|
passwordmanager
|
|
)
|
|
|
|
# Map service directory -> default health check URL (relative to STAGING_DOMAIN)
|
|
# Override entirely via HEALTH_URLS env var.
|
|
declare -A DEFAULT_HEALTH_URLS
|
|
DEFAULT_HEALTH_URLS[network]="https://traefik.${STAGING_DOMAIN}/ping"
|
|
DEFAULT_HEALTH_URLS[authentification]="https://auth.${STAGING_DOMAIN}/api/verify"
|
|
DEFAULT_HEALTH_URLS[homepage]="https://${STAGING_DOMAIN}/"
|
|
DEFAULT_HEALTH_URLS[ai]="https://hermes.${STAGING_DOMAIN}/health"
|
|
DEFAULT_HEALTH_URLS[cloudstorage]="https://cloud.${STAGING_DOMAIN}/status.php"
|
|
DEFAULT_HEALTH_URLS[versioncontrol]="https://code.${STAGING_DOMAIN}/api/healthz"
|
|
|
|
# ---- Trackers ----
|
|
PASS_COUNT=0
|
|
FAIL_COUNT=0
|
|
WARN_COUNT=0
|
|
FAILURES=()
|
|
|
|
# ---- Helpers ----
|
|
|
|
log_info() { echo -e "${CYAN}[INFO]${NC} $*"; }
|
|
log_pass() { echo -e "${GREEN}[PASS]${NC} $*"; ((PASS_COUNT++)); }
|
|
log_fail() { echo -e "${RED}[FAIL]${NC} $*"; ((FAIL_COUNT++)); FAILURES+=("$*"); }
|
|
log_warn() { echo -e "${YELLOW}[WARN]${NC} $*"; ((WARN_COUNT++)); }
|
|
log_step() { echo -e "\n${BOLD}── $* ──${NC}"; }
|
|
log_raw() { echo -e " $*"; }
|
|
|
|
# Check if a command exists
|
|
require_cmd() {
|
|
if ! command -v "$1" &>/dev/null; then
|
|
log_fail "Required command not found: $1"
|
|
return 1
|
|
fi
|
|
}
|
|
|
|
# Retry a command with exponential-like backoff
|
|
retry() {
|
|
local cmd="$*"
|
|
local attempt=0
|
|
local max_attempts=$((HEALTH_RETRIES + 1))
|
|
local result
|
|
|
|
while [[ $attempt -lt $max_attempts ]]; do
|
|
if eval "$cmd" 2>/dev/null; then
|
|
return 0
|
|
fi
|
|
attempt=$((attempt + 1))
|
|
if [[ $attempt -lt $max_attempts ]]; then
|
|
sleep "$HEALTH_INTERVAL"
|
|
fi
|
|
done
|
|
return 1
|
|
}
|
|
|
|
# ---- Parse arguments ----
|
|
VERBOSE=false
|
|
LIST_SERVICES=false
|
|
POSITIONAL=()
|
|
while [[ $# -gt 0 ]]; do
|
|
case "$1" in
|
|
--verbose|-v) VERBOSE=true; shift ;;
|
|
--list-services) LIST_SERVICES=true; shift ;;
|
|
--) shift; POSITIONAL+=("$@"); break ;;
|
|
*) POSITIONAL+=("$1"); shift ;;
|
|
esac
|
|
done
|
|
set -- "${POSITIONAL[@]}"
|
|
|
|
# Resolve service list
|
|
if [[ -n "${SERVICE_LIST:-}" ]]; then
|
|
IFS=' ' read -ra SERVICES <<< "$SERVICE_LIST"
|
|
else
|
|
SERVICES=("${DEFAULT_SERVICES[@]}")
|
|
fi
|
|
|
|
# Resolve health URLs — default map with overrides from env
|
|
declare -A HEALTH_URLS
|
|
if [[ -n "${HEALTH_URLS:-}" ]]; then
|
|
# User-supplied mapping: "network=https://... authentification=https://..."
|
|
for pair in $HEALTH_URLS; do
|
|
key="${pair%%=*}"
|
|
val="${pair#*=}"
|
|
HEALTH_URLS["$key"]="$val"
|
|
done
|
|
else
|
|
for svc in "${SERVICES[@]}"; do
|
|
if [[ -n "${DEFAULT_HEALTH_URLS[$svc]:-}" ]]; then
|
|
HEALTH_URLS["$svc"]="${DEFAULT_HEALTH_URLS[$svc]}"
|
|
fi
|
|
done
|
|
fi
|
|
|
|
# --list-services mode (for CI integration)
|
|
if $LIST_SERVICES; then
|
|
echo "Configured services:"
|
|
for svc in "${SERVICES[@]}"; do
|
|
url="${HEALTH_URLS[$svc]:-no-health-check}"
|
|
echo " $svc -> $url"
|
|
done
|
|
exit 0
|
|
fi
|
|
|
|
# ---- Pre-flight ----
|
|
echo -e "${BOLD}============================================${NC}"
|
|
echo -e "${BOLD} Staging VM Integration Test Suite${NC}"
|
|
echo -e "${BOLD} $(date -u '+%Y-%m-%dT%H:%M:%SZ')${NC}"
|
|
echo -e "${BOLD}============================================${NC}"
|
|
|
|
# ---- Phase 1: Prerequisites ----
|
|
log_step "Phase 1: Prerequisites"
|
|
|
|
PREREQ_OK=true
|
|
for cmd in docker curl jq; do
|
|
if ! require_cmd "$cmd"; then
|
|
PREREQ_OK=false
|
|
fi
|
|
done
|
|
$PREREQ_OK && log_pass "All required commands available" || log_fail "Missing prerequisites"
|
|
|
|
# ---- Phase 2: Docker daemon ----
|
|
log_step "Phase 2: Docker Daemon"
|
|
|
|
if docker info --format '{{.ServerVersion}}' &>/dev/null; then
|
|
DOCKER_VERSION=$(docker info --format '{{.ServerVersion}}' 2>/dev/null)
|
|
log_pass "Docker daemon is running (version: $DOCKER_VERSION)"
|
|
|
|
if docker info --format '{{.Driver}}' 2>/dev/null | grep -qi "overlay"; then
|
|
log_pass "Storage driver: overlay"
|
|
else
|
|
log_warn "Non-overlay storage driver detected"
|
|
fi
|
|
else
|
|
log_fail "Docker daemon is NOT running or not accessible"
|
|
fi
|
|
|
|
# ---- Phase 3: Docker Compose stack ----
|
|
log_step "Phase 3: Compose Stack Status"
|
|
|
|
# Check if any compose files exist
|
|
COMPOSE_FILES=()
|
|
for svc in "${SERVICES[@]}"; do
|
|
cf="${COMPOSE_DIR}/${svc}/compose.yml"
|
|
if [[ -f "$cf" ]]; then
|
|
COMPOSE_FILES+=("$cf")
|
|
else
|
|
cf2="${COMPOSE_DIR}/${svc}/docker-compose.yml"
|
|
if [[ -f "$cf2" ]]; then
|
|
COMPOSE_FILES+=("$cf2")
|
|
else
|
|
log_warn "No compose file found for service '$svc' (expected: ${cf})"
|
|
fi
|
|
fi
|
|
done
|
|
|
|
if [[ ${#COMPOSE_FILES[@]} -eq 0 ]]; then
|
|
log_fail "No compose files found under COMPOSE_DIR=${COMPOSE_DIR}"
|
|
log_info "Skipping stack checks"
|
|
else
|
|
log_info "Found ${#COMPOSE_FILES[@]} compose file(s) in ${COMPOSE_DIR}"
|
|
|
|
# Build the compose file args
|
|
COMPOSE_CMD="docker compose -p ${COMPOSE_PROJECT}"
|
|
for cf in "${COMPOSE_FILES[@]}"; do
|
|
COMPOSE_CMD+=" -f ${cf}"
|
|
done
|
|
|
|
log_info "Project name: ${COMPOSE_PROJECT}"
|
|
|
|
# Check stack ps
|
|
if $VERBOSE; then
|
|
log_raw "--- docker compose ps output ---"
|
|
eval "$COMPOSE_CMD ps" 2>&1 | while IFS= read -r line; do log_raw "$line"; done
|
|
log_raw "--- end ---"
|
|
fi
|
|
|
|
# Get all services and their status
|
|
if STACK_STATUS=$(eval "$COMPOSE_CMD ps --format '{{.Name}}\t{{.Status}}'" 2>/dev/null); then
|
|
if [[ -z "$STACK_STATUS" ]]; then
|
|
log_warn "Stack exists but no running services — VM may be freshly provisioned"
|
|
else
|
|
ALL_RUNNING=true
|
|
RUNNING_COUNT=0
|
|
TOTAL_COUNT=0
|
|
while IFS=$'\t' read -r name status; do
|
|
TOTAL_COUNT=$((TOTAL_COUNT + 1))
|
|
status_lower=$(echo "$status" | tr '[:upper:]' '[:lower:]')
|
|
if echo "$status_lower" | grep -qE '^(up|running|healthy)'; then
|
|
RUNNING_COUNT=$((RUNNING_COUNT + 1))
|
|
$VERBOSE && log_pass " $name — $status"
|
|
else
|
|
ALL_RUNNING=false
|
|
log_warn " $name — $status (not healthy)"
|
|
fi
|
|
done <<< "$STACK_STATUS"
|
|
|
|
if [[ "$TOTAL_COUNT" -eq 0 ]]; then
|
|
log_fail "No services found in compose project"
|
|
elif $ALL_RUNNING && [[ "$TOTAL_COUNT" -eq "$RUNNING_COUNT" ]]; then
|
|
log_pass "All ${TOTAL_COUNT} service(s) running (${RUNNING_COUNT}/${TOTAL_COUNT})"
|
|
else
|
|
log_fail "${RUNNING_COUNT}/${TOTAL_COUNT} service(s) running — some services are down"
|
|
fi
|
|
fi
|
|
else
|
|
log_fail "Failed to query compose stack status"
|
|
fi
|
|
fi
|
|
|
|
# ---- Phase 4: Service health checks ----
|
|
log_step "Phase 4: Service Endpoint Health Checks"
|
|
|
|
ENDPOINT_CHECKS=0
|
|
ENDPOINT_PASS=0
|
|
|
|
for svc in "${SERVICES[@]}"; do
|
|
url="${HEALTH_URLS[$svc]:-}"
|
|
if [[ -z "$url" ]]; then
|
|
$VERBOSE && log_info "No health check URL for service '$svc' — skipping"
|
|
continue
|
|
fi
|
|
|
|
ENDPOINT_CHECKS=$((ENDPOINT_CHECKS + 1))
|
|
echo -ne " Checking ${svc} ... "
|
|
|
|
# Perform the HTTP health check with retries
|
|
if retry "curl -sf -o /dev/null -w '%{http_code}' --max-time ${HEALTH_TIMEOUT} '${url}' 2>/dev/null"; then
|
|
HTTP_CODE=$(curl -sf -o /dev/null -w '%{http_code}' --max-time "${HEALTH_TIMEOUT}" "${url}" 2>/dev/null || true)
|
|
ENDPOINT_PASS=$((ENDPOINT_PASS + 1))
|
|
echo -e "${GREEN}OK${NC} (HTTP ${HTTP_CODE})"
|
|
else
|
|
LAST_CODE=$(curl -s -o /dev/null -w '%{http_code}' --max-time "${HEALTH_TIMEOUT}" "${url}" 2>/dev/null || echo "000")
|
|
echo -e "${RED}FAIL${NC} (HTTP ${LAST_CODE})"
|
|
log_fail "Health check failed for ${svc} @ ${url}"
|
|
fi
|
|
done
|
|
|
|
if [[ $ENDPOINT_CHECKS -eq 0 ]]; then
|
|
log_warn "No health check URLs configured — skipping endpoint phase"
|
|
elif [[ $ENDPOINT_PASS -eq $ENDPOINT_CHECKS ]]; then
|
|
log_pass "All ${ENDPOINT_CHECKS} endpoint(s) healthy"
|
|
else
|
|
log_fail "${ENDPOINT_PASS}/${ENDPOINT_CHECKS} endpoint(s) healthy"
|
|
fi
|
|
|
|
# ---- Phase 5: Docker system sanity ----
|
|
log_step "Phase 5: Docker System Sanity"
|
|
|
|
# Check disk space for Docker
|
|
DOCKER_ROOT=$(docker info --format '{{.DockerRootDir}}' 2>/dev/null || echo "/var/lib/docker")
|
|
log_info "Docker root: ${DOCKER_ROOT}"
|
|
|
|
if command -v df &>/dev/null && [[ -d "$DOCKER_ROOT" ]]; then
|
|
AVAIL_PCT=$(df -h "$DOCKER_ROOT" | awk 'NR==2 {print $5}' | tr -d '%')
|
|
if [[ -n "$AVAIL_PCT" ]]; then
|
|
if [[ "$AVAIL_PCT" -ge 90 ]]; then
|
|
log_warn "Docker storage is ${AVAIL_PCT}% full — consider cleanup"
|
|
else
|
|
log_pass "Docker storage at ${AVAIL_PCT}% — within limits"
|
|
fi
|
|
fi
|
|
fi
|
|
|
|
# Check for dangling images
|
|
DANGLING=$(docker images -f "dangling=true" -q 2>/dev/null | wc -l)
|
|
if [[ "$DANGLING" -gt 10 ]]; then
|
|
log_warn "${DANGLING} dangling images found — consider docker image prune"
|
|
fi
|
|
|
|
# ---- Summary ----
|
|
echo ""
|
|
echo -e "${BOLD}============================================${NC}"
|
|
echo -e "${BOLD} Test Summary${NC}"
|
|
echo -e "${BOLD} $(date -u '+%Y-%m-%dT%H:%M:%SZ')${NC}"
|
|
echo -e "${BOLD}============================================${NC}"
|
|
echo -e " ${GREEN}Passed:${NC} ${PASS_COUNT}"
|
|
echo -e " ${RED}Failed:${NC} ${FAIL_COUNT}"
|
|
echo -e " ${YELLOW}Warnings:${NC} ${WARN_COUNT}"
|
|
|
|
if [[ ${#FAILURES[@]} -gt 0 ]]; then
|
|
echo -e "\n${BOLD}Failed checks:${NC}"
|
|
for f in "${FAILURES[@]}"; do
|
|
echo -e " ${RED}•${NC} $f"
|
|
done
|
|
fi
|
|
|
|
echo ""
|
|
if [[ $FAIL_COUNT -eq 0 ]]; then
|
|
echo -e "${GREEN}${BOLD}✓ All integration checks passed${NC}"
|
|
exit 0
|
|
else
|
|
echo -e "${RED}${BOLD}✗ ${FAIL_COUNT} integration check(s) failed${NC}"
|
|
exit 1
|
|
fi
|