feat(svrnty-vision): Phase 4b complete — full impl + e2e test suite

- palette.py + rembg.py: implement from stubs (Pillow median-cut + rembg u2net) - vlm.py: rename Spark2→steev (Strix Halo / Ollama); bump max_tokens 1024→4096 (qwen3-vl:32b thinking mode consumes budget tokens — 4096 min for valid output) - settings.py: rename spark2_vlm_*/spark1_flux_* → vlm_*/flux_*; real defaults (steev 100.88.167.87:11434 Ollama, gx10 100.90.100.10:8188 ComfyUI) - tests/: conftest.py + test_palette.py + test_rembg.py + test_integration_e2e.py (28 unit + 10 integration; 38/38 passing — VLM raw/polished/ugc + FLUX render) - CLAUDE.md: rewrite to accurate phase status + infra + layout - requirements.txt + pyproject.toml: add Pillow, rembg, pytest-asyncio deps Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-25 06:44:21 -04:00
parent d567489475
commit f6e09dbff2
15 changed files with 684 additions and 101 deletions
@@ -0,0 +1,46 @@
+"""Shared fixtures for svrnty-vision test suite."""
+
+from __future__ import annotations
+
+import base64
+import io
+
+import pytest
+from fastapi.testclient import TestClient
+from PIL import Image
+
+from svrnty_vision.server import app
+
+
+@pytest.fixture(scope="session")
+def client() -> TestClient:
+    return TestClient(app)
+
+
+@pytest.fixture(scope="session")
+def red_png_b64() -> str:
+    """100×100 solid-red PNG encoded as base64 — minimal valid image for all endpoints."""
+    img = Image.new("RGB", (100, 100), color=(220, 50, 50))
+    buf = io.BytesIO()
+    img.save(buf, format="PNG")
+    return base64.b64encode(buf.getvalue()).decode("ascii")
+
+
+@pytest.fixture(scope="session")
+def gradient_png_b64() -> str:
+    """200×200 RGB gradient — more realistic for VLM + palette tests."""
+    img = Image.new("RGB", (200, 200))
+    for x in range(200):
+        for y in range(200):
+            img.putpixel((x, y), (x, y, 128))
+    buf = io.BytesIO()
+    img.save(buf, format="PNG")
+    return base64.b64encode(buf.getvalue()).decode("ascii")
+
+
+def pytest_configure(config: pytest.Config) -> None:
+    config.addinivalue_line(
+        "markers",
+        "integration: live service tests — require Tailscale + running Spark hosts. "
+        "Run with: pytest -m integration",
+    )
@@ -43,7 +43,7 @@ def test_render_requires_workflow_or_prompt() -> None:
    assert response.status_code == 400


-def test_render_returns_502_when_spark1_unreachable() -> None:
+def test_render_returns_502_when_gx10_unreachable() -> None:
    class _StubClient:
        def __init__(self, *a, **kw):
            pass
@@ -1,4 +1,4 @@
-"""Smoke tests for the FastAPI scaffold."""
+"""Liveness + basic gateway smoke tests."""

 from fastapi.testclient import TestClient

@@ -8,20 +8,32 @@ client = TestClient(app)


 def test_healthz_returns_200() -> None:
-    response = client.get("/healthz")
-    assert response.status_code == 200
-    body = response.json()
+    resp = client.get("/healthz")
+    assert resp.status_code == 200
+    body = resp.json()
    assert body["status"] == "ok"
    assert "version" in body


-def test_palette_extract_returns_501() -> None:
-    # Still a 4a stub — Phase 4c moved only VLM + FLUX, palette/rembg deferred.
-    response = client.post("/palette/extract")
-    assert response.status_code == 501
+def test_all_routes_registered() -> None:
+    """Verify all 4 functional endpoints are mounted (not 404)."""
+    routes = {r.path for r in app.routes}
+    assert "/vlm/analyze" in routes
+    assert "/flux/render" in routes
+    assert "/palette/extract" in routes
+    assert "/rembg/cutout" in routes


-def test_rembg_cutout_returns_501() -> None:
-    # Still a 4a stub — Phase 4c moved only VLM + FLUX, palette/rembg deferred.
-    response = client.post("/rembg/cutout")
-    assert response.status_code == 501
+def test_vlm_analyze_missing_body_returns_400() -> None:
+    resp = client.post("/vlm/analyze", json={})
+    assert resp.status_code == 400
+
+
+def test_palette_extract_missing_body_returns_400() -> None:
+    resp = client.post("/palette/extract", json={})
+    assert resp.status_code == 400
+
+
+def test_rembg_cutout_missing_body_returns_400() -> None:
+    resp = client.post("/rembg/cutout", json={})
+    assert resp.status_code == 400
@@ -0,0 +1,276 @@
+"""End-to-end integration tests — hit live Spark hosts via svrnty-vision.
+
+Run with:   pytest -m integration -v
+Skip by default in CI / offline environments.
+
+Hosts required:
+  VLM   — svrnty-steev (Strix Halo) · 100.88.167.87:11434 · qwen3-vl:32b on Ollama
+  FLUX  — gx10-f38f · 100.90.100.10:8188 · ComfyUI + flux2_dev_fp8mixed
+"""
+
+from __future__ import annotations
+
+import base64
+import io
+import os
+from decimal import Decimal
+
+import httpx
+import pytest
+from PIL import Image
+
+# ---------------------------------------------------------------------------
+# Constants
+# ---------------------------------------------------------------------------
+
+BASE_URL = os.environ.get("SVRNTY_VISION_URL", "http://localhost:8092")
+VLM_HOST = "100.88.167.87"
+FLUX_HOST = "100.90.100.10"
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def _make_png_b64(color: tuple[int, int, int] = (220, 80, 60), size: int = 128) -> str:
+    img = Image.new("RGB", (size, size), color=color)
+    buf = io.BytesIO()
+    img.save(buf, format="PNG")
+    return base64.b64encode(buf.getvalue()).decode("ascii")
+
+
+def _host_reachable(host: str, port: int, timeout: float = 2.0) -> bool:
+    import socket
+    try:
+        with socket.create_connection((host, port), timeout=timeout):
+            return True
+    except OSError:
+        return False
+
+
+# ---------------------------------------------------------------------------
+# Skip conditions
+# ---------------------------------------------------------------------------
+
+vlm_available = pytest.mark.skipif(
+    not _host_reachable(VLM_HOST, 11434),
+    reason=f"VLM host {VLM_HOST}:11434 (svrnty-steev Ollama) not reachable",
+)
+
+flux_available = pytest.mark.skipif(
+    not _host_reachable(FLUX_HOST, 8188),
+    reason=f"FLUX host {FLUX_HOST}:8188 (gx10 ComfyUI) not reachable",
+)
+
+gateway_available = pytest.mark.skipif(
+    not _host_reachable("127.0.0.1", 8092),
+    reason="svrnty-vision gateway not running on localhost:8092",
+)
+
+# ---------------------------------------------------------------------------
+# Gateway health
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.integration
+@gateway_available
+def test_gateway_healthz() -> None:
+    resp = httpx.get(f"{BASE_URL}/healthz", timeout=5)
+    assert resp.status_code == 200
+    body = resp.json()
+    assert body["status"] == "ok"
+    assert "version" in body
+
+
+# ---------------------------------------------------------------------------
+# VLM — Qwen3-VL 32B on svrnty-steev
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.integration
+@gateway_available
+@vlm_available
+def test_vlm_analyze_raw_mode_returns_text() -> None:
+    """Raw mode: VLM describes the image freely — no score parsing."""
+    resp = httpx.post(
+        f"{BASE_URL}/vlm/analyze",
+        json={
+            "image_base64": _make_png_b64((220, 80, 60)),
+            "brand_context": "Describe what you see in this image.",
+            "rubric_mode": "raw",
+        },
+        timeout=60,
+    )
+    assert resp.status_code == 200, resp.text
+    body = resp.json()
+    assert body["rubric_mode"] == "raw"
+    assert body["brand_fit_score"] is None
+    assert isinstance(body["raw_scores_json"], str)
+    assert len(body["raw_scores_json"]) > 0
+    assert "qwen" in body["model_id"].lower()
+
+
+@pytest.mark.integration
+@gateway_available
+@vlm_available
+def test_vlm_analyze_polished_returns_scores() -> None:
+    """Polished mode: VLM returns brand_fit + visual_polish 0–5 scores."""
+    resp = httpx.post(
+        f"{BASE_URL}/vlm/analyze",
+        json={
+            "image_base64": _make_png_b64((50, 120, 200)),
+            "brand_context": "Modern tech brand — clean, minimal, confident.",
+            "rubric_mode": "polished",
+        },
+        timeout=120,
+    )
+    assert resp.status_code == 200, resp.text
+    body = resp.json()
+    assert body["rubric_mode"] == "polished"
+
+    brand_fit = Decimal(str(body["brand_fit_score"]))
+    visual_polish = Decimal(str(body["visual_polish_score"]))
+    assert Decimal("0") <= brand_fit <= Decimal("5"), f"brand_fit out of range: {brand_fit}"
+    assert Decimal("0") <= visual_polish <= Decimal("5"), f"visual_polish out of range: {visual_polish}"
+    assert isinstance(body["justification"], str)
+    assert len(body["justification"]) > 0
+
+
+@pytest.mark.integration
+@gateway_available
+@vlm_available
+def test_vlm_analyze_ugc_mode() -> None:
+    """UGC mode: same structure as polished, different rubric framing."""
+    resp = httpx.post(
+        f"{BASE_URL}/vlm/analyze",
+        json={
+            "image_base64": _make_png_b64((80, 180, 80)),
+            "brand_context": "Fresh food delivery — organic, home-style.",
+            "rubric_mode": "ugc",
+        },
+        timeout=120,
+    )
+    assert resp.status_code == 200, resp.text
+    body = resp.json()
+    assert body["rubric_mode"] == "ugc"
+    assert body["brand_fit_score"] is not None
+
+
+# ---------------------------------------------------------------------------
+# FLUX — ComfyUI on gx10-f38f
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.integration
+@gateway_available
+@flux_available
+def test_flux_render_returns_valid_png() -> None:
+    """Minimal FLUX render — 4 steps for speed, verifies PNG round-trip."""
+    resp = httpx.post(
+        f"{BASE_URL}/flux/render",
+        json={
+            "prompt": "a plain white circle on black background",
+            "width": 512,
+            "height": 512,
+            "steps": 4,
+            "guidance": 2.5,
+        },
+        timeout=300,
+    )
+    assert resp.status_code == 200, resp.text
+    body = resp.json()
+
+    assert "image_base64" in body
+    assert body["content_type"] == "image/png"
+    assert body["provider"] == "local"
+    assert isinstance(body["duration_ms"], int)
+    assert body["duration_ms"] > 0
+
+    raw = base64.b64decode(body["image_base64"])
+    img = Image.open(io.BytesIO(raw))
+    assert img.width == 512
+    assert img.height == 512
+
+
+@pytest.mark.integration
+@gateway_available
+@flux_available
+def test_flux_render_seeds_produce_different_images() -> None:
+    """Two renders with different prompts → different images (non-trivial output)."""
+    def render(prompt: str) -> bytes:
+        resp = httpx.post(
+            f"{BASE_URL}/flux/render",
+            json={"prompt": prompt, "width": 512, "height": 512, "steps": 4},
+            timeout=300,
+        )
+        assert resp.status_code == 200
+        return base64.b64decode(resp.json()["image_base64"])
+
+    img_a = render("solid red background, nothing else")
+    img_b = render("solid blue background, nothing else")
+    assert img_a != img_b, "Two different prompts produced identical output — likely cached/deduped"
+
+
+# ---------------------------------------------------------------------------
+# Palette — in-process (Pillow)
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.integration
+@gateway_available
+def test_palette_extract_live() -> None:
+    """Palette extraction is in-process — always passes when gateway is up."""
+    resp = httpx.post(
+        f"{BASE_URL}/palette/extract",
+        json={"image_base64": _make_png_b64((200, 50, 50)), "color_count": 4},
+        timeout=10,
+    )
+    assert resp.status_code == 200
+    body = resp.json()
+    r, g, b = body["dominant"]
+    assert r > 150, "dominant color should be red-dominant"
+    assert body["color_count"] <= 4
+
+
+# ---------------------------------------------------------------------------
+# Rembg — in-process
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.integration
+@gateway_available
+def test_rembg_cutout_live() -> None:
+    """Background removal — always passes when gateway is up (model downloads on first call)."""
+    resp = httpx.post(
+        f"{BASE_URL}/rembg/cutout",
+        json={"image_base64": _make_png_b64()},
+        timeout=120,  # first call downloads u2net ONNX model
+    )
+    assert resp.status_code == 200
+    body = resp.json()
+    raw = base64.b64decode(body["image_base64"])
+    img = Image.open(io.BytesIO(raw))
+    assert img.mode == "RGBA"
+
+
+# ---------------------------------------------------------------------------
+# Error surface — gateway must return correct HTTP codes
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.integration
+@gateway_available
+def test_vlm_analyze_missing_image_returns_400() -> None:
+    resp = httpx.post(
+        f"{BASE_URL}/vlm/analyze",
+        json={"brand_context": "test", "rubric_mode": "raw"},
+        timeout=10,
+    )
+    assert resp.status_code == 400
+
+
+@pytest.mark.integration
+@gateway_available
+def test_flux_render_missing_prompt_returns_400() -> None:
+    resp = httpx.post(f"{BASE_URL}/flux/render", json={"width": 512, "height": 512}, timeout=10)
+    assert resp.status_code == 400
@@ -0,0 +1,79 @@
+"""Unit tests for POST /palette/extract."""
+
+from __future__ import annotations
+
+import base64
+import io
+
+import pytest
+from PIL import Image
+
+from svrnty_vision.routers.palette import PaletteRequest, PaletteResponse, extract
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+def _make_png_b64(color: tuple[int, int, int], size: int = 50) -> str:
+    img = Image.new("RGB", (size, size), color=color)
+    buf = io.BytesIO()
+    img.save(buf, format="PNG")
+    return base64.b64encode(buf.getvalue()).decode("ascii")
+
+
+# ---------------------------------------------------------------------------
+# Unit tests (pure function / TestClient — no network)
+# ---------------------------------------------------------------------------
+
+@pytest.mark.asyncio
+async def test_solid_red_dominant_is_red() -> None:
+    req = PaletteRequest(image_base64=_make_png_b64((255, 0, 0)), color_count=3)
+    resp: PaletteResponse = await extract(req)
+    r, g, b = resp.dominant
+    assert r > 200, "dominant R channel should be high for solid red"
+    assert g < 80
+    assert b < 80
+
+
+@pytest.mark.asyncio
+async def test_palette_color_count_respected() -> None:
+    req = PaletteRequest(image_base64=_make_png_b64((0, 128, 255)), color_count=4)
+    resp = await extract(req)
+    assert resp.color_count <= 4
+    assert len(resp.palette) == resp.color_count
+
+
+@pytest.mark.asyncio
+async def test_palette_each_entry_is_rgb_triple() -> None:
+    req = PaletteRequest(image_base64=_make_png_b64((100, 200, 50)), color_count=6)
+    resp = await extract(req)
+    for entry in resp.palette:
+        assert len(entry) == 3
+        assert all(0 <= c <= 255 for c in entry)
+
+
+@pytest.mark.asyncio
+async def test_palette_missing_image_raises_400() -> None:
+    from fastapi import HTTPException
+    with pytest.raises(HTTPException) as exc_info:
+        await extract(PaletteRequest())
+    assert exc_info.value.status_code == 400
+
+
+@pytest.mark.asyncio
+async def test_palette_bad_base64_raises_400() -> None:
+    from fastapi import HTTPException
+    with pytest.raises(HTTPException) as exc_info:
+        await extract(PaletteRequest(image_base64="!!!notbase64!!!"))
+    assert exc_info.value.status_code == 400
+
+
+def test_palette_via_test_client(client, red_png_b64) -> None:
+    resp = client.post("/palette/extract", json={"image_base64": red_png_b64, "color_count": 5})
+    assert resp.status_code == 200
+    body = resp.json()
+    assert "dominant" in body
+    assert len(body["dominant"]) == 3
+    assert body["color_count"] <= 5
+    assert len(body["palette"]) == body["color_count"]
@@ -0,0 +1,45 @@
+"""Unit tests for POST /rembg/cutout."""
+
+from __future__ import annotations
+
+import base64
+import io
+
+import pytest
+from PIL import Image
+
+
+def _make_png_b64(color: tuple[int, int, int] = (180, 100, 50), size: int = 64) -> str:
+    img = Image.new("RGB", (size, size), color=color)
+    buf = io.BytesIO()
+    img.save(buf, format="PNG")
+    return base64.b64encode(buf.getvalue()).decode("ascii")
+
+
+def test_rembg_missing_image_returns_400(client) -> None:
+    resp = client.post("/rembg/cutout", json={})
+    assert resp.status_code == 400
+
+
+def test_rembg_bad_base64_returns_400(client) -> None:
+    resp = client.post("/rembg/cutout", json={"image_base64": "%%%bad%%%"})
+    assert resp.status_code == 400
+
+
+def test_rembg_returns_png_with_alpha(client, red_png_b64) -> None:
+    """rembg removes background → output is PNG with alpha channel."""
+    resp = client.post("/rembg/cutout", json={"image_base64": red_png_b64})
+    assert resp.status_code == 200
+    body = resp.json()
+    assert body["content_type"] == "image/png"
+    assert "image_base64" in body
+
+    raw = base64.b64decode(body["image_base64"])
+    img = Image.open(io.BytesIO(raw))
+    assert img.mode == "RGBA", f"expected RGBA, got {img.mode}"
+
+
+def test_rembg_alpha_matting_flag_round_trips(client, red_png_b64) -> None:
+    resp = client.post("/rembg/cutout", json={"image_base64": red_png_b64, "alpha_matting": False})
+    assert resp.status_code == 200
+    assert resp.json()["alpha_matting"] is False
@@ -1,7 +1,7 @@
 """Pytest port of BTE's FakeVlmEvaluationParseTests + VlmRubric parse coverage.

 These tests cover the pure-function side of the VLM router (rubric prompt + score
-parsing). The HTTP call to Spark 2 is exercised separately via TestClient with a
+parsing). The HTTP call to steev (Ollama) is exercised separately via TestClient with a
 mocked httpx transport.
 """

@@ -64,11 +64,8 @@ def test_analyze_requires_image_input() -> None:
    assert response.status_code == 400


-def test_analyze_returns_502_when_spark2_unreachable() -> None:
-    """Smoke: with no Spark 2 (or a failing transport), gateway surfaces 502.
-
-    Uses a mock async client that raises ConnectError on POST.
-    """
+def test_analyze_returns_502_when_steev_unreachable() -> None:
+    """Smoke: with no steev (Ollama) or a failing transport, gateway surfaces 502."""

    class _StubClient:
        def __init__(self, *a, **kw):
@@ -95,8 +92,8 @@ def test_analyze_returns_502_when_spark2_unreachable() -> None:
    assert response.status_code == 502


-def test_analyze_round_trip_with_mocked_spark2() -> None:
-    """Happy path: mock vLLM returns a well-formed score JSON; gateway parses it."""
+def test_analyze_round_trip_with_mocked_steev() -> None:
+    """Happy path: mocked Ollama returns well-formed score JSON; gateway parses it."""

    canned_response = {
        "choices": [