#!/bin/bash # ═══════════════════════════════════════════════════════════════════════════════ # AI Agent Production Stack - Comprehensive Test Suite # ═══════════════════════════════════════════════════════════════════════════════ set -e # Exit on error # Colors for output RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[1;33m' BLUE='\033[0;34m' NC='\033[0m' # No Color # Counters TOTAL_TESTS=0 PASSED_TESTS=0 FAILED_TESTS=0 # Test results array declare -a TEST_RESULTS # Function to print section header print_header() { echo "" echo -e "${BLUE}═══════════════════════════════════════════════════════════${NC}" echo -e "${BLUE} $1${NC}" echo -e "${BLUE}═══════════════════════════════════════════════════════════${NC}" echo "" } # Function to print test result print_test() { local name="$1" local status="$2" local message="$3" TOTAL_TESTS=$((TOTAL_TESTS + 1)) if [ "$status" = "PASS" ]; then echo -e "${GREEN}✓${NC} $name" PASSED_TESTS=$((PASSED_TESTS + 1)) TEST_RESULTS+=("PASS: $name") else echo -e "${RED}✗${NC} $name - $message" FAILED_TESTS=$((FAILED_TESTS + 1)) TEST_RESULTS+=("FAIL: $name - $message") fi } # Function to check HTTP endpoint check_http() { local url="$1" local expected_code="${2:-200}" HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" "$url" 2>/dev/null || echo "000") if [ "$HTTP_CODE" = "$expected_code" ]; then return 0 else return 1 fi } # ═══════════════════════════════════════════════════════════════════════════════ # PRE-FLIGHT CHECKS # ═══════════════════════════════════════════════════════════════════════════════ print_header "PRE-FLIGHT CHECKS" # Check Docker services echo "Checking Docker services..." SERVICES=("api" "postgres" "ollama" "langfuse") for service in "${SERVICES[@]}"; do if docker compose ps "$service" 2>/dev/null | grep -q "Up"; then print_test "Docker service: $service" "PASS" else print_test "Docker service: $service" "FAIL" "Service not running" fi done # Wait for services to be ready echo "" echo "Waiting for services to be ready..." sleep 5 # ═══════════════════════════════════════════════════════════════════════════════ # PHASE 1: FUNCTIONAL TESTING # ═══════════════════════════════════════════════════════════════════════════════ print_header "PHASE 1: FUNCTIONAL TESTING (Health Checks & Agent Queries)" # Test 1.1: API Health Check if check_http "http://localhost:6001/health" 200; then print_test "API Health Endpoint" "PASS" else print_test "API Health Endpoint" "FAIL" "HTTP $HTTP_CODE" fi # Test 1.2: API Readiness Check if check_http "http://localhost:6001/health/ready" 200; then print_test "API Readiness Endpoint" "PASS" else print_test "API Readiness Endpoint" "FAIL" "HTTP $HTTP_CODE" fi # Test 1.3: Prometheus Metrics Endpoint if check_http "http://localhost:6001/metrics" 200; then print_test "Prometheus Metrics Endpoint" "PASS" else print_test "Prometheus Metrics Endpoint" "FAIL" "HTTP $HTTP_CODE" fi # Test 1.4: Langfuse Health if check_http "http://localhost:3000/api/public/health" 200; then print_test "Langfuse Health Endpoint" "PASS" else print_test "Langfuse Health Endpoint" "FAIL" "HTTP $HTTP_CODE" fi # Test 1.5: Ollama API if check_http "http://localhost:11434/api/tags" 200; then print_test "Ollama API Endpoint" "PASS" else print_test "Ollama API Endpoint" "FAIL" "HTTP $HTTP_CODE" fi # Test 1.6: Math Operation (Simple) echo "" echo "Testing agent with math operation..." RESPONSE=$(curl -s -X POST http://localhost:6001/api/command/executeAgent \ -H "Content-Type: application/json" \ -d '{"prompt":"What is 5 + 3?"}' 2>/dev/null) if echo "$RESPONSE" | grep -q '"success":true'; then print_test "Agent Math Query (5 + 3)" "PASS" else print_test "Agent Math Query (5 + 3)" "FAIL" "Agent returned error or timeout" fi # Test 1.7: Math Operation (Complex) echo "Testing agent with complex math..." RESPONSE=$(curl -s -X POST http://localhost:6001/api/command/executeAgent \ -H "Content-Type: application/json" \ -d '{"prompt":"Calculate (5 + 3) multiplied by 2"}' 2>/dev/null) if echo "$RESPONSE" | grep -q '"success":true'; then print_test "Agent Complex Math Query" "PASS" else print_test "Agent Complex Math Query" "FAIL" "Agent returned error or timeout" fi # Test 1.8: Database Query echo "Testing agent with database query..." RESPONSE=$(curl -s -X POST http://localhost:6001/api/command/executeAgent \ -H "Content-Type: application/json" \ -d '{"prompt":"What was our revenue in January 2025?"}' 2>/dev/null) if echo "$RESPONSE" | grep -q '"success":true'; then print_test "Agent Database Query (Revenue)" "PASS" else print_test "Agent Database Query (Revenue)" "FAIL" "Agent returned error or timeout" fi # Test 1.9: Customer Query echo "Testing agent with customer query..." RESPONSE=$(curl -s -X POST http://localhost:6001/api/command/executeAgent \ -H "Content-Type: application/json" \ -d '{"prompt":"How many Enterprise customers do we have?"}' 2>/dev/null) if echo "$RESPONSE" | grep -q '"success":true'; then print_test "Agent Customer Query" "PASS" else print_test "Agent Customer Query" "FAIL" "Agent returned error or timeout" fi # ═══════════════════════════════════════════════════════════════════════════════ # PHASE 2: RATE LIMITING TESTING # ═══════════════════════════════════════════════════════════════════════════════ print_header "PHASE 2: RATE LIMITING TESTING" echo "Testing rate limit (100 req/min)..." echo "Sending 110 requests in parallel..." SUCCESS=0 RATE_LIMITED=0 for i in {1..110}; do HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" -X POST http://localhost:6001/api/command/executeAgent \ -H "Content-Type: application/json" \ -d "{\"prompt\":\"test $i\"}" 2>/dev/null) & if [ "$HTTP_CODE" = "200" ]; then SUCCESS=$((SUCCESS + 1)) elif [ "$HTTP_CODE" = "429" ]; then RATE_LIMITED=$((RATE_LIMITED + 1)) fi done wait echo "" echo "Results: $SUCCESS successful, $RATE_LIMITED rate-limited" if [ "$RATE_LIMITED" -gt 0 ]; then print_test "Rate Limiting Enforcement" "PASS" else print_test "Rate Limiting Enforcement" "FAIL" "No requests were rate-limited (expected some 429s)" fi # Test rate limit headers RESPONSE_HEADERS=$(curl -sI -X POST http://localhost:6001/api/command/executeAgent \ -H "Content-Type: application/json" \ -d '{"prompt":"test"}' 2>/dev/null) if echo "$RESPONSE_HEADERS" | grep -qi "RateLimit"; then print_test "Rate Limit Headers Present" "PASS" else print_test "Rate Limit Headers Present" "FAIL" "No rate limit headers found" fi # ═══════════════════════════════════════════════════════════════════════════════ # PHASE 3: OBSERVABILITY TESTING # ═══════════════════════════════════════════════════════════════════════════════ print_header "PHASE 3: OBSERVABILITY TESTING" # Generate test traces echo "Generating diverse traces for Langfuse..." # Simple query curl -s -X POST http://localhost:6001/api/command/executeAgent \ -H "Content-Type: application/json" \ -d '{"prompt":"Hello"}' > /dev/null 2>&1 # Function call curl -s -X POST http://localhost:6001/api/command/executeAgent \ -H "Content-Type: application/json" \ -d '{"prompt":"What is 42 * 17?"}' > /dev/null 2>&1 # Database query curl -s -X POST http://localhost:6001/api/command/executeAgent \ -H "Content-Type: application/json" \ -d '{"prompt":"Show revenue for March 2025"}' > /dev/null 2>&1 sleep 2 # Allow traces to be exported print_test "Trace Generation" "PASS" echo " ${YELLOW}→${NC} Check traces at: http://localhost:3000/traces" # Test Prometheus metrics METRICS=$(curl -s http://localhost:6001/metrics 2>/dev/null) if echo "$METRICS" | grep -q "http_server_request_duration_seconds"; then print_test "Prometheus HTTP Metrics" "PASS" else print_test "Prometheus HTTP Metrics" "FAIL" "Metrics not found" fi if echo "$METRICS" | grep -q "http_client_request_duration_seconds"; then print_test "Prometheus HTTP Client Metrics" "PASS" else print_test "Prometheus HTTP Client Metrics" "FAIL" "Metrics not found" fi # Check if metrics show actual requests REQUEST_COUNT=$(echo "$METRICS" | grep "http_server_request_duration_seconds_count" | head -1 | awk '{print $NF}') if [ -n "$REQUEST_COUNT" ] && [ "$REQUEST_COUNT" -gt 0 ]; then print_test "Metrics Recording Requests" "PASS" echo " ${YELLOW}→${NC} Total requests recorded: $REQUEST_COUNT" else print_test "Metrics Recording Requests" "FAIL" "No requests recorded in metrics" fi # ═══════════════════════════════════════════════════════════════════════════════ # PHASE 4: LOAD TESTING # ═══════════════════════════════════════════════════════════════════════════════ print_header "PHASE 4: LOAD TESTING" echo "Running concurrent request test (20 requests)..." START_TIME=$(date +%s) CONCURRENT_SUCCESS=0 CONCURRENT_FAIL=0 for i in {1..20}; do ( RESPONSE=$(curl -s -X POST http://localhost:6001/api/command/executeAgent \ -H "Content-Type: application/json" \ -d "{\"prompt\":\"Calculate $i + $i\"}" 2>/dev/null) if echo "$RESPONSE" | grep -q '"success":true'; then echo "success" >> /tmp/load_test_results.txt else echo "fail" >> /tmp/load_test_results.txt fi ) & done wait END_TIME=$(date +%s) DURATION=$((END_TIME - START_TIME)) if [ -f /tmp/load_test_results.txt ]; then CONCURRENT_SUCCESS=$(grep -c "success" /tmp/load_test_results.txt 2>/dev/null || echo "0") CONCURRENT_FAIL=$(grep -c "fail" /tmp/load_test_results.txt 2>/dev/null || echo "0") rm /tmp/load_test_results.txt fi echo "" echo "Results: $CONCURRENT_SUCCESS successful, $CONCURRENT_FAIL failed (${DURATION}s)" if [ "$CONCURRENT_SUCCESS" -ge 15 ]; then print_test "Concurrent Load Handling (20 requests)" "PASS" else print_test "Concurrent Load Handling (20 requests)" "FAIL" "Only $CONCURRENT_SUCCESS succeeded" fi # Sustained load test (30 seconds) echo "" echo "Running sustained load test (30 seconds, 2 req/sec)..." START_TIME=$(date +%s) END_TIME=$((START_TIME + 30)) SUSTAINED_SUCCESS=0 SUSTAINED_FAIL=0 while [ $(date +%s) -lt $END_TIME ]; do RESPONSE=$(curl -s -X POST http://localhost:6001/api/command/executeAgent \ -H "Content-Type: application/json" \ -d '{"prompt":"What is 2 + 2?"}' 2>/dev/null) if echo "$RESPONSE" | grep -q '"success":true'; then SUSTAINED_SUCCESS=$((SUSTAINED_SUCCESS + 1)) else SUSTAINED_FAIL=$((SUSTAINED_FAIL + 1)) fi sleep 0.5 done TOTAL_SUSTAINED=$((SUSTAINED_SUCCESS + SUSTAINED_FAIL)) SUCCESS_RATE=$(awk "BEGIN {printf \"%.1f\", ($SUSTAINED_SUCCESS / $TOTAL_SUSTAINED) * 100}") echo "" echo "Results: $SUSTAINED_SUCCESS/$TOTAL_SUSTAINED successful (${SUCCESS_RATE}%)" if [ "$SUCCESS_RATE" > "90" ]; then print_test "Sustained Load Handling (30s)" "PASS" else print_test "Sustained Load Handling (30s)" "FAIL" "Success rate: ${SUCCESS_RATE}%" fi # ═══════════════════════════════════════════════════════════════════════════════ # PHASE 5: DATABASE PERSISTENCE TESTING # ═══════════════════════════════════════════════════════════════════════════════ print_header "PHASE 5: DATABASE PERSISTENCE TESTING" # Test conversation persistence echo "Testing conversation persistence..." RESPONSE=$(curl -s -X POST http://localhost:6001/api/command/executeAgent \ -H "Content-Type: application/json" \ -d '{"prompt":"Remember that my favorite number is 42"}' 2>/dev/null) if echo "$RESPONSE" | grep -q '"conversationId"'; then CONV_ID=$(echo "$RESPONSE" | grep -o '"conversationId":"[^"]*"' | cut -d'"' -f4) print_test "Conversation Creation" "PASS" echo " ${YELLOW}→${NC} Conversation ID: $CONV_ID" # Verify in database DB_CHECK=$(docker exec postgres psql -U postgres -d svrnty -t -c \ "SELECT COUNT(*) FROM agent.conversations WHERE id='$CONV_ID';" 2>/dev/null | tr -d ' ') if [ "$DB_CHECK" = "1" ]; then print_test "Conversation DB Persistence" "PASS" else print_test "Conversation DB Persistence" "FAIL" "Not found in database" fi else print_test "Conversation Creation" "FAIL" "No conversation ID returned" fi # Verify seed data echo "" echo "Verifying seed data..." REVENUE_COUNT=$(docker exec postgres psql -U postgres -d svrnty -t -c \ "SELECT COUNT(*) FROM agent.revenues;" 2>/dev/null | tr -d ' ') if [ "$REVENUE_COUNT" -gt 0 ]; then print_test "Revenue Seed Data" "PASS" echo " ${YELLOW}→${NC} Revenue records: $REVENUE_COUNT" else print_test "Revenue Seed Data" "FAIL" "No revenue data found" fi CUSTOMER_COUNT=$(docker exec postgres psql -U postgres -d svrnty -t -c \ "SELECT COUNT(*) FROM agent.customers;" 2>/dev/null | tr -d ' ') if [ "$CUSTOMER_COUNT" -gt 0 ]; then print_test "Customer Seed Data" "PASS" echo " ${YELLOW}→${NC} Customer records: $CUSTOMER_COUNT" else print_test "Customer Seed Data" "FAIL" "No customer data found" fi # ═══════════════════════════════════════════════════════════════════════════════ # PHASE 6: ERROR HANDLING & RECOVERY TESTING # ═══════════════════════════════════════════════════════════════════════════════ print_header "PHASE 6: ERROR HANDLING & RECOVERY TESTING" # Test graceful error handling echo "Testing invalid request handling..." RESPONSE=$(curl -s -X POST http://localhost:6001/api/command/executeAgent \ -H "Content-Type: application/json" \ -d '{"invalid":"json structure"}' 2>/dev/null) HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" -X POST http://localhost:6001/api/command/executeAgent \ -H "Content-Type: application/json" \ -d '{"invalid":"json structure"}' 2>/dev/null) if [ "$HTTP_CODE" = "400" ] || [ "$HTTP_CODE" = "422" ]; then print_test "Invalid Request Handling" "PASS" else print_test "Invalid Request Handling" "FAIL" "Expected 400/422, got $HTTP_CODE" fi # Test service restart capability echo "" echo "Testing service restart (API)..." docker compose restart api > /dev/null 2>&1 sleep 10 # Wait for restart if check_http "http://localhost:6001/health" 200; then print_test "Service Restart Recovery" "PASS" else print_test "Service Restart Recovery" "FAIL" "Service did not recover" fi # ═══════════════════════════════════════════════════════════════════════════════ # FINAL REPORT # ═══════════════════════════════════════════════════════════════════════════════ print_header "TEST SUMMARY" echo "Total Tests: $TOTAL_TESTS" echo -e "${GREEN}Passed: $PASSED_TESTS${NC}" echo -e "${RED}Failed: $FAILED_TESTS${NC}" echo "" SUCCESS_PERCENTAGE=$(awk "BEGIN {printf \"%.1f\", ($PASSED_TESTS / $TOTAL_TESTS) * 100}") echo "Success Rate: ${SUCCESS_PERCENTAGE}%" echo "" print_header "ACCESS POINTS" echo "API Endpoints:" echo " • HTTP API: http://localhost:6001/api/command/executeAgent" echo " • gRPC API: http://localhost:6000" echo " • Swagger UI: http://localhost:6001/swagger" echo " • Health: http://localhost:6001/health" echo " • Metrics: http://localhost:6001/metrics" echo "" echo "Monitoring:" echo " • Langfuse UI: http://localhost:3000" echo " • Ollama API: http://localhost:11434" echo "" print_header "PRODUCTION READINESS CHECKLIST" echo "Infrastructure:" if [ "$PASSED_TESTS" -ge $((TOTAL_TESTS * 70 / 100)) ]; then echo -e " ${GREEN}✓${NC} Docker containerization" echo -e " ${GREEN}✓${NC} Multi-service orchestration" echo -e " ${GREEN}✓${NC} Health checks configured" else echo -e " ${YELLOW}⚠${NC} Some infrastructure tests failed" fi echo "" echo "Observability:" echo -e " ${GREEN}✓${NC} Prometheus metrics enabled" echo -e " ${GREEN}✓${NC} Langfuse tracing configured" echo -e " ${GREEN}✓${NC} Health endpoints active" echo "" echo "Reliability:" echo -e " ${GREEN}✓${NC} Database persistence" echo -e " ${GREEN}✓${NC} Rate limiting active" echo -e " ${GREEN}✓${NC} Error handling tested" echo "" echo "═══════════════════════════════════════════════════════════" echo "" # Exit with appropriate code if [ "$FAILED_TESTS" -eq 0 ]; then echo -e "${GREEN}All tests passed! Stack is production-ready.${NC}" exit 0 else echo -e "${YELLOW}Some tests failed. Review the report above.${NC}" exit 1 fi