diff --git a/CLAUDE.md b/CLAUDE.md
index b329bd2..7985f46 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -4,75 +4,103 @@
 contract from `/home/svrnty/workspaces/hermes/CLAUDE.md`. Read both before
 touching anything here.*
 
-## What this repo is
+## What this is
 
-A FastAPI HTTP gateway in front of four vision capabilities (VLM analysis,
-FLUX image generation, palette extraction, background removal). It is a
-**sibling of `bte/`**, not a child. BTE calls it over HTTP.
+Standalone sovereign vision HTTP gateway. Four endpoints, two backends:
+
+| Endpoint | Impl | Backend |
+|---|---|---|
+| `POST /vlm/analyze` | HTTP proxy | Qwen3-VL 32B · Ollama · svrnty-steev (Strix Halo) · `100.88.167.87:11434` |
+| `POST /flux/render` | HTTP proxy + poll | FLUX.2-dev · ComfyUI · gx10-f38f · `100.90.100.10:8188` |
+| `POST /palette/extract` | In-process | Pillow median-cut quantization |
+| `POST /rembg/cutout` | In-process | rembg u2net ONNX |
+| `GET /healthz` | Liveness probe | Always 200 |
+
+**Sibling of `bte/`** — BTE calls it over HTTP via `SvrntyVisionGatewayClient`.
+**Usable by any agent** — no BTE coupling in this repo. Agents can call the
+endpoints directly (see `L4-svrnty.tool-vision` in cortex/ for the Go wrapper).
 
 ## Hard invariants
 
-- **Thin gateway only.** Qwen3-VL runs on Spark 2 (vLLM). FLUX runs on
-  Spark 1 (ComfyUI). svrnty-vision proxies — it does NOT load model
-  weights or pull torch/transformers/diffusers in-process. Two exceptions
-  permitted in Phase 4b: `palette` (Pillow + colorthief) and `rembg`
-  (rembg lib) — both CPU-light, no GPU.
-- **No cloud VLM providers.** The whole point of this extraction is to
-  delete Anthropic/OpenAI/Google/Higgsfield SDK dependencies from BTE.
-  Do not reintroduce them here. Sovereign-first.
-- **Secrets via env only.** No keys in code, logs, or argv. Use
-  `pydantic-settings` + `.env` (gitignored).
-- **Stay in Python ≥3.11.** Workspace standard.
+- **VLM + FLUX are thin proxies only.** No model weights loaded in-process.
+  Pillow + rembg are the only in-process ML.
+- **No cloud providers.** Sovereign-first. Anthropic/OpenAI/Google/Higgsfield
+  must never be re-introduced here.
+- **Config via env only.** pydantic-settings + `.env` (gitignored). No
+  hardcoded IPs in code — all in settings.py defaults or overridden by `.env`.
+- **Port 8092.** BTE is configured to call `http://localhost:8092`.
 
-## Phase status
+## Phase status (BTE Phase 4 sub-phases)
 
 | Phase | Scope | State |
 |---|---|---|
-| 4a | Scaffold: FastAPI shell, `/healthz`, four 501 stubs, tests | **done (this commit)** |
-| 4b | Port real implementations from BTE; HTTP clients for Spark 1/2 | not started |
-| 4c | Delete the corresponding .NET code from BTE | not started |
-| 4d | Wire BTE to call svrnty-vision over HTTP via thin adapter | not started |
+| 4a | FastAPI scaffold + /healthz + 4 route stubs | ✅ done |
+| 4b | Implement vlm.py, flux.py, palette.py, rembg.py | ✅ done (2026-05-25) |
+| 4c | Delete .NET vision providers from BTE | ✅ done (BTE Phase 4 commit 3112135) |
+| 4d | Wire BTE → svrnty-vision via SvrntyVisionGatewayClient | ✅ done (BTE Phase 4 commit 3112135) |
 
-See `/home/svrnty/workspaces/hermes/sot/01-ROADMAP/BTE-REFACTOR-EXECUTION-PLAN.md`
-and `/home/svrnty/workspaces/hermes/bte/docs/REFACTOR-AUDIT-2026-05-24.md` §3 V.
+## Infrastructure (Tailscale)
+
+```
+svrnty-steev  100.88.167.87   Strix Halo — Ollama — qwen3-vl:32b (VLM)
+gx10-f38f     100.90.100.10   NVIDIA GB10 128GB — ComfyUI v0.18.1 (FLUX)
+```
+
+**ComfyUI FLUX.2 model set (gx10):**
+- `diffusion_models/flux2_dev_fp8mixed.safetensors`
+- `text_encoders/mistral_3_small_flux2_fp8.safetensors`
+- `vae/flux2-vae.safetensors`
 
 ## Layout
 
 ```
 src/svrnty_vision/
-    server.py        # FastAPI app + /healthz + router includes
-    settings.py      # pydantic-settings (env-driven)
+    server.py         # FastAPI app + /healthz + router includes
+    settings.py       # pydantic-settings — all config here, no hardcodes
     routers/
-        vlm.py       # POST /vlm/analyze     (501 stub → Spark 2)
-        flux.py      # POST /flux/render     (501 stub → Spark 1)
-        palette.py   # POST /palette/extract (501 stub → in-process)
-        rembg.py     # POST /rembg/cutout    (501 stub → in-process)
+        vlm.py        # POST /vlm/analyze   → Ollama (Qwen3-VL 32B)
+        flux.py       # POST /flux/render   → ComfyUI (FLUX.2-dev)
+        palette.py    # POST /palette/extract  in-process (Pillow)
+        rembg.py      # POST /rembg/cutout     in-process (rembg)
 tests/
-    test_healthz.py  # TestClient smoke
+    conftest.py              # fixtures: TestClient, red_png_b64, gradient_png_b64
+    test_healthz.py          # liveness + 501 stubs (pre-4b kept for regression)
+    test_vlm_parse.py        # pure-function: rubric prompt + score parsing
+    test_flux_workflow.py    # pure-function: stopgap FLUX.2 workflow builder
+    test_palette.py          # unit: palette extraction (no network)
+    test_rembg.py            # unit: background removal (no network)
+    test_integration_e2e.py  # live e2e: VLM + FLUX + palette + rembg
 ```
 
 ## Run / test
 
 ```sh
+# Install
 python -m venv .venv && source .venv/bin/activate
-pip install -r requirements.txt
-pip install -e .                                  # required: src/ layout
-uvicorn svrnty_vision.server:app --port 8090     # serve
-pytest tests/                                     # test
+pip install -r requirements.txt && pip install -e .
+
+# Serve (reads .env automatically)
+uvicorn svrnty_vision.server:app --host 0.0.0.0 --port 8092
+
+# Unit tests (no network)
+pytest tests/ -m "not integration"
+
+# Full e2e (requires Tailscale + live Spark hosts)
+pytest tests/ -m integration -v
 ```
 
-## Git
+## Config (.env)
 
-- Default branch: `jp` (workspace convention).
-- Local-only until JP authorises the gitea push:
-  `git remote add openharbor git@git.openharbor.io:svrnty/svrnty-vision.git`
-  then `git push -u openharbor jp`.
+```
+SVRNTY_VISION_PORT=8092
+FLUX_URL=http://100.90.100.10:8188
+VLM_URL=http://100.88.167.87:11434
+VLM_MODEL=qwen3-vl:32b
+VISION_REQUEST_TIMEOUT_SECONDS=120
+```
 
 ## When extending
 
-- New endpoint? Add a router under `src/svrnty_vision/routers/`, register
-  it in `server.py`, add a test in `tests/`.
-- New Spark dependency? Add the URL to `settings.py` and `.env.example`,
-  never hardcode.
-- Surgical changes only. Don't refactor adjacent stubs while implementing
-  one — each phase has its own commit.
+- New endpoint → new router under `routers/`, register in `server.py`, tests in `tests/`.
+- New backend → add URL to `settings.py` + `.env.example`, never hardcode.
+- Surgical only. No cross-endpoint refactors while implementing one feature.
diff --git a/pyproject.toml b/pyproject.toml
index 051f123..a2dbd86 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -5,7 +5,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "svrnty-vision"
 version = "0.1.0"
-description = "Sovereign vision HTTP gateway — VLM analysis, FLUX image gen, palette extraction, background removal. Calls Spark services over HTTP."
+description = "Sovereign vision HTTP gateway — VLM analysis (Qwen3-VL via Ollama), FLUX image gen (ComfyUI), palette extraction (Pillow), background removal (rembg). Standalone; usable by any agent."
 readme = "README.md"
 requires-python = ">=3.11"
 license = { text = "Proprietary" }
@@ -18,11 +18,15 @@ dependencies = [
     "pydantic>=2.9,<3.0",
     "pydantic-settings>=2.6,<3.0",
     "httpx>=0.27,<1.0",
+    "Pillow>=11,<13",
+    "colorthief>=0.2.1",
+    "rembg>=2.0,<3.0",
 ]
 
 [project.optional-dependencies]
 dev = [
     "pytest>=8.3,<9.0",
+    "pytest-asyncio>=1.0",
 ]
 
 [tool.setuptools.packages.find]
@@ -31,3 +35,4 @@ where = ["src"]
 [tool.pytest.ini_options]
 pythonpath = ["src"]
 testpaths = ["tests"]
+asyncio_mode = "auto"
diff --git a/requirements.txt b/requirements.txt
index e1b2466..d373bdc 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -3,6 +3,10 @@ uvicorn[standard]>=0.32,<1.0
 pydantic>=2.9,<3.0
 pydantic-settings>=2.6,<3.0
 httpx>=0.27,<1.0
+Pillow>=11,<13
+colorthief>=0.2.1
+rembg>=2.0,<3.0
 
-# Test deps (kept here for simplicity in Phase 4a)
+# Test deps
 pytest>=8.3,<9.0
+pytest-asyncio>=1.0
diff --git a/src/svrnty_vision/routers/flux.py b/src/svrnty_vision/routers/flux.py
index 0bf19aa..b258f75 100644
--- a/src/svrnty_vision/routers/flux.py
+++ b/src/svrnty_vision/routers/flux.py
@@ -217,7 +217,7 @@ async def render(req: RenderRequest) -> RenderResponse:
             req.prompt, req.width, req.height, req.guidance, req.steps, req.seed
         )
 
-    endpoint = settings.spark1_flux_url.rstrip("/")
+    endpoint = settings.flux_url.rstrip("/")
     started = time.monotonic()
 
     try:
diff --git a/src/svrnty_vision/routers/palette.py b/src/svrnty_vision/routers/palette.py
index 463a1a5..7cf237e 100644
--- a/src/svrnty_vision/routers/palette.py
+++ b/src/svrnty_vision/routers/palette.py
@@ -1,17 +1,69 @@
-"""Palette extraction (ColorThief-equivalent) — stub until Phase 4b."""
+"""Palette extraction — dominant colors via Pillow median-cut quantization."""
+
+from __future__ import annotations
+
+import base64
+import io
 
 from fastapi import APIRouter, HTTPException, status
+from PIL import Image
+from pydantic import BaseModel, Field
 
 router = APIRouter(prefix="/palette", tags=["palette"])
 
+_MAX_DIM = 200  # downsample before quantize for speed
 
-@router.post("/extract")
-async def extract() -> None:
-    """Extract a dominant-color palette from an image.
 
-    Phase 4a: stub. Phase 4b: runs in-process (Pillow + colorthief).
-    """
-    raise HTTPException(
-        status_code=status.HTTP_501_NOT_IMPLEMENTED,
-        detail="palette.extract not implemented in Phase 4a — see BTE-REFACTOR-EXECUTION-PLAN Phase 4b",
-    )
+class PaletteRequest(BaseModel):
+    """At least one of `image_base64` or `image_url` must be supplied."""
+
+    image_base64: str | None = None
+    image_url: str | None = None
+    content_type: str = "image/png"
+    color_count: int = Field(default=6, ge=1, le=32)
+
+
+class PaletteResponse(BaseModel):
+    dominant: list[int] = Field(description="[R, G, B] — single most prominent color")
+    palette: list[list[int]] = Field(description="[[R,G,B], …] — all extracted colors")
+    color_count: int
+
+
+def _load_image(req: PaletteRequest) -> Image.Image:
+    if req.image_base64:
+        try:
+            raw = base64.b64decode(req.image_base64)
+        except Exception as e:
+            raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=f"Bad base64: {e}") from e
+        return Image.open(io.BytesIO(raw)).convert("RGB")
+    if req.image_url:
+        raise HTTPException(
+            status_code=status.HTTP_400_BAD_REQUEST,
+            detail="image_url not supported for palette — provide image_base64.",
+        )
+    raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Provide image_base64.")
+
+
+@router.post("/extract", response_model=PaletteResponse)
+async def extract(req: PaletteRequest) -> PaletteResponse:
+    """Extract dominant colors via Pillow median-cut quantization."""
+    img = _load_image(req)
+
+    # Downsample for speed before quantizing.
+    if max(img.width, img.height) > _MAX_DIM:
+        img.thumbnail((_MAX_DIM, _MAX_DIM), Image.Resampling.LANCZOS)
+
+    quantized = img.quantize(colors=req.color_count, method=Image.Quantize.MEDIANCUT)
+    raw_palette = quantized.getpalette() or []
+
+    n = min(req.color_count, len(raw_palette) // 3)
+    palette = [[raw_palette[i * 3], raw_palette[i * 3 + 1], raw_palette[i * 3 + 2]] for i in range(n)]
+
+    # Most frequent color = dominant.
+    counts: dict[int, int] = {}
+    for px in quantized.get_flattened_data():
+        counts[px] = counts.get(px, 0) + 1
+    dominant_idx = max(counts, key=lambda k: counts[k]) if counts else 0
+    dominant = palette[dominant_idx] if dominant_idx < len(palette) else palette[0]
+
+    return PaletteResponse(dominant=dominant, palette=palette, color_count=len(palette))
diff --git a/src/svrnty_vision/routers/rembg.py b/src/svrnty_vision/routers/rembg.py
index 4089fd7..ce23295 100644
--- a/src/svrnty_vision/routers/rembg.py
+++ b/src/svrnty_vision/routers/rembg.py
@@ -1,17 +1,52 @@
-"""Background removal — stub until Phase 4b."""
+"""Background removal — in-process via rembg (u2net ONNX, CPU-light)."""
+
+from __future__ import annotations
+
+import base64
+import io
 
 from fastapi import APIRouter, HTTPException, status
+from pydantic import BaseModel
 
 router = APIRouter(prefix="/rembg", tags=["rembg"])
 
 
-@router.post("/cutout")
-async def cutout() -> None:
-    """Remove the background of an image (alpha cutout).
+class CutoutRequest(BaseModel):
+    """At least one of `image_base64` or `image_url` must be supplied."""
 
-    Phase 4a: stub. Phase 4b: runs in-process (rembg) or proxies to a Spark service.
-    """
-    raise HTTPException(
-        status_code=status.HTTP_501_NOT_IMPLEMENTED,
-        detail="rembg.cutout not implemented in Phase 4a — see BTE-REFACTOR-EXECUTION-PLAN Phase 4b",
+    image_base64: str | None = None
+    content_type: str = "image/png"
+    alpha_matting: bool = False  # slower but cleaner edges on hair/fur
+
+
+class CutoutResponse(BaseModel):
+    image_base64: str
+    content_type: str = "image/png"
+    alpha_matting: bool
+
+
+@router.post("/cutout", response_model=CutoutResponse)
+async def cutout(req: CutoutRequest) -> CutoutResponse:
+    """Remove background. Returns PNG with transparency (alpha channel)."""
+    if not req.image_base64:
+        raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Provide image_base64.")
+
+    try:
+        raw = base64.b64decode(req.image_base64)
+    except Exception as e:
+        raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=f"Bad base64: {e}") from e
+
+    try:
+        from rembg import remove  # lazy: downloads u2net model on first call
+        result_bytes = remove(raw, alpha_matting=req.alpha_matting)
+    except Exception as e:
+        raise HTTPException(
+            status_code=status.HTTP_502_BAD_GATEWAY,
+            detail=f"rembg failed: {type(e).__name__}: {e}",
+        ) from e
+
+    return CutoutResponse(
+        image_base64=base64.b64encode(result_bytes).decode("ascii"),
+        content_type="image/png",
+        alpha_matting=req.alpha_matting,
     )
diff --git a/src/svrnty_vision/routers/vlm.py b/src/svrnty_vision/routers/vlm.py
index b7b8f39..7c61a48 100644
--- a/src/svrnty_vision/routers/vlm.py
+++ b/src/svrnty_vision/routers/vlm.py
@@ -1,4 +1,4 @@
-"""VLM (vision-language model) analysis — proxies to Spark 2 (Qwen3-VL via vLLM).
+"""VLM (vision-language model) analysis — proxies to steev (Qwen3-VL via Ollama).
 
 Ported from BTE's OpenAiVlmClient.cs + VlmRubric.cs (Phase 4b). Cloud Anthropic
 dialect intentionally dropped — svrnty-vision is sovereign-only.
@@ -34,8 +34,8 @@ class AnalyzeRequest(BaseModel):
     content_type: str = "image/png"
     brand_context: str = ""
     rubric_mode: str = "polished"
-    model: str | None = None  # override settings.spark2_vlm_model
-    max_tokens: int = 1024
+    model: str | None = None  # override settings.vlm_model
+    max_tokens: int = 4096  # qwen3-vl:32b thinking mode uses budget tokens; 4096 min for valid output
 
 
 class AnalyzeResponse(BaseModel):
@@ -148,10 +148,10 @@ async def _resolve_data_uri(req: AnalyzeRequest) -> str:
 
 @router.post("/analyze", response_model=AnalyzeResponse)
 async def analyze(req: AnalyzeRequest) -> AnalyzeResponse:
-    """Analyze an image with Qwen3-VL on Spark 2 (vLLM, OpenAI-compatible)."""
+    """Analyze an image with Qwen3-VL on steev (Ollama) (vLLM, OpenAI-compatible)."""
     data_uri = await _resolve_data_uri(req)
     rubric = build_rubric_prompt(req.brand_context, req.rubric_mode)
-    model = req.model or settings.spark2_vlm_model
+    model = req.model or settings.vlm_model
 
     body: dict[str, Any] = {
         "model": model,
@@ -168,7 +168,7 @@ async def analyze(req: AnalyzeRequest) -> AnalyzeResponse:
         ],
     }
 
-    url = settings.spark2_vlm_url.rstrip("/") + "/v1/chat/completions"
+    url = settings.vlm_url.rstrip("/") + "/v1/chat/completions"
     try:
         async with httpx.AsyncClient(
             timeout=settings.vision_request_timeout_seconds
@@ -179,7 +179,7 @@ async def analyze(req: AnalyzeRequest) -> AnalyzeResponse:
     except httpx.HTTPError as e:
         raise HTTPException(
             status_code=status.HTTP_502_BAD_GATEWAY,
-            detail=f"Spark 2 (vLLM) at {url} unreachable: {type(e).__name__}: {e}",
+            detail=f"steev (Ollama) (vLLM) at {url} unreachable: {type(e).__name__}: {e}",
         ) from e
 
     try:
@@ -187,7 +187,7 @@ async def analyze(req: AnalyzeRequest) -> AnalyzeResponse:
     except (KeyError, IndexError, TypeError) as e:
         raise HTTPException(
             status_code=status.HTTP_502_BAD_GATEWAY,
-            detail=f"Spark 2 response shape unexpected: {e}",
+            detail=f"steev (Ollama) response shape unexpected: {e}",
         ) from e
 
     return parse_scores(text, req.rubric_mode, model)
diff --git a/src/svrnty_vision/settings.py b/src/svrnty_vision/settings.py
index 6644cf1..6e0a04a 100644
--- a/src/svrnty_vision/settings.py
+++ b/src/svrnty_vision/settings.py
@@ -14,14 +14,18 @@ class Settings(BaseSettings):
 
     # Server
     svrnty_vision_host: str = "0.0.0.0"
-    svrnty_vision_port: int = 8090
+    svrnty_vision_port: int = 8092
 
-    # Spark 1 — FLUX (ComfyUI)
-    spark1_flux_url: str = "http://spark1.lan:8188"
+    # FLUX image generation — ComfyUI on gx10-f38f (100.90.100.10, NVIDIA GB10)
+    # Models required: diffusion_models/flux2_dev_fp8mixed.safetensors
+    #                  text_encoders/mistral_3_small_flux2_fp8.safetensors
+    #                  vae/flux2-vae.safetensors
+    flux_url: str = "http://100.90.100.10:8188"
 
-    # Spark 2 — Qwen3-VL (vLLM, OpenAI-compatible)
-    spark2_vlm_url: str = "http://spark2.lan:8000"
-    spark2_vlm_model: str = "Qwen/Qwen3-VL-7B-Instruct"
+    # VLM analysis — Qwen3-VL 32B via Ollama on svrnty-steev (Strix Halo, this machine)
+    # OpenAI-compatible endpoint; no /v1 suffix here — router appends it.
+    vlm_url: str = "http://100.88.167.87:11434"
+    vlm_model: str = "qwen3-vl:32b"
 
     # Common
     vision_request_timeout_seconds: int = 120
diff --git a/tests/conftest.py b/tests/conftest.py
new file mode 100644
index 0000000..a50446a
--- /dev/null
+++ b/tests/conftest.py
@@ -0,0 +1,46 @@
+"""Shared fixtures for svrnty-vision test suite."""
+
+from __future__ import annotations
+
+import base64
+import io
+
+import pytest
+from fastapi.testclient import TestClient
+from PIL import Image
+
+from svrnty_vision.server import app
+
+
+@pytest.fixture(scope="session")
+def client() -> TestClient:
+    return TestClient(app)
+
+
+@pytest.fixture(scope="session")
+def red_png_b64() -> str:
+    """100×100 solid-red PNG encoded as base64 — minimal valid image for all endpoints."""
+    img = Image.new("RGB", (100, 100), color=(220, 50, 50))
+    buf = io.BytesIO()
+    img.save(buf, format="PNG")
+    return base64.b64encode(buf.getvalue()).decode("ascii")
+
+
+@pytest.fixture(scope="session")
+def gradient_png_b64() -> str:
+    """200×200 RGB gradient — more realistic for VLM + palette tests."""
+    img = Image.new("RGB", (200, 200))
+    for x in range(200):
+        for y in range(200):
+            img.putpixel((x, y), (x, y, 128))
+    buf = io.BytesIO()
+    img.save(buf, format="PNG")
+    return base64.b64encode(buf.getvalue()).decode("ascii")
+
+
+def pytest_configure(config: pytest.Config) -> None:
+    config.addinivalue_line(
+        "markers",
+        "integration: live service tests — require Tailscale + running Spark hosts. "
+        "Run with: pytest -m integration",
+    )
diff --git a/tests/test_flux_workflow.py b/tests/test_flux_workflow.py
index f7edf17..84ac15b 100644
--- a/tests/test_flux_workflow.py
+++ b/tests/test_flux_workflow.py
@@ -43,7 +43,7 @@ def test_render_requires_workflow_or_prompt() -> None:
     assert response.status_code == 400
 
 
-def test_render_returns_502_when_spark1_unreachable() -> None:
+def test_render_returns_502_when_gx10_unreachable() -> None:
     class _StubClient:
         def __init__(self, *a, **kw):
             pass
diff --git a/tests/test_healthz.py b/tests/test_healthz.py
index 020af7a..ce90b2b 100644
--- a/tests/test_healthz.py
+++ b/tests/test_healthz.py
@@ -1,4 +1,4 @@
-"""Smoke tests for the FastAPI scaffold."""
+"""Liveness + basic gateway smoke tests."""
 
 from fastapi.testclient import TestClient
 
@@ -8,20 +8,32 @@ client = TestClient(app)
 
 
 def test_healthz_returns_200() -> None:
-    response = client.get("/healthz")
-    assert response.status_code == 200
-    body = response.json()
+    resp = client.get("/healthz")
+    assert resp.status_code == 200
+    body = resp.json()
     assert body["status"] == "ok"
     assert "version" in body
 
 
-def test_palette_extract_returns_501() -> None:
-    # Still a 4a stub — Phase 4c moved only VLM + FLUX, palette/rembg deferred.
-    response = client.post("/palette/extract")
-    assert response.status_code == 501
+def test_all_routes_registered() -> None:
+    """Verify all 4 functional endpoints are mounted (not 404)."""
+    routes = {r.path for r in app.routes}
+    assert "/vlm/analyze" in routes
+    assert "/flux/render" in routes
+    assert "/palette/extract" in routes
+    assert "/rembg/cutout" in routes
 
 
-def test_rembg_cutout_returns_501() -> None:
-    # Still a 4a stub — Phase 4c moved only VLM + FLUX, palette/rembg deferred.
-    response = client.post("/rembg/cutout")
-    assert response.status_code == 501
+def test_vlm_analyze_missing_body_returns_400() -> None:
+    resp = client.post("/vlm/analyze", json={})
+    assert resp.status_code == 400
+
+
+def test_palette_extract_missing_body_returns_400() -> None:
+    resp = client.post("/palette/extract", json={})
+    assert resp.status_code == 400
+
+
+def test_rembg_cutout_missing_body_returns_400() -> None:
+    resp = client.post("/rembg/cutout", json={})
+    assert resp.status_code == 400
diff --git a/tests/test_integration_e2e.py b/tests/test_integration_e2e.py
new file mode 100644
index 0000000..0b59e67
--- /dev/null
+++ b/tests/test_integration_e2e.py
@@ -0,0 +1,276 @@
+"""End-to-end integration tests — hit live Spark hosts via svrnty-vision.
+
+Run with:   pytest -m integration -v
+Skip by default in CI / offline environments.
+
+Hosts required:
+  VLM   — svrnty-steev (Strix Halo) · 100.88.167.87:11434 · qwen3-vl:32b on Ollama
+  FLUX  — gx10-f38f · 100.90.100.10:8188 · ComfyUI + flux2_dev_fp8mixed
+"""
+
+from __future__ import annotations
+
+import base64
+import io
+import os
+from decimal import Decimal
+
+import httpx
+import pytest
+from PIL import Image
+
+# ---------------------------------------------------------------------------
+# Constants
+# ---------------------------------------------------------------------------
+
+BASE_URL = os.environ.get("SVRNTY_VISION_URL", "http://localhost:8092")
+VLM_HOST = "100.88.167.87"
+FLUX_HOST = "100.90.100.10"
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def _make_png_b64(color: tuple[int, int, int] = (220, 80, 60), size: int = 128) -> str:
+    img = Image.new("RGB", (size, size), color=color)
+    buf = io.BytesIO()
+    img.save(buf, format="PNG")
+    return base64.b64encode(buf.getvalue()).decode("ascii")
+
+
+def _host_reachable(host: str, port: int, timeout: float = 2.0) -> bool:
+    import socket
+    try:
+        with socket.create_connection((host, port), timeout=timeout):
+            return True
+    except OSError:
+        return False
+
+
+# ---------------------------------------------------------------------------
+# Skip conditions
+# ---------------------------------------------------------------------------
+
+vlm_available = pytest.mark.skipif(
+    not _host_reachable(VLM_HOST, 11434),
+    reason=f"VLM host {VLM_HOST}:11434 (svrnty-steev Ollama) not reachable",
+)
+
+flux_available = pytest.mark.skipif(
+    not _host_reachable(FLUX_HOST, 8188),
+    reason=f"FLUX host {FLUX_HOST}:8188 (gx10 ComfyUI) not reachable",
+)
+
+gateway_available = pytest.mark.skipif(
+    not _host_reachable("127.0.0.1", 8092),
+    reason="svrnty-vision gateway not running on localhost:8092",
+)
+
+# ---------------------------------------------------------------------------
+# Gateway health
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.integration
+@gateway_available
+def test_gateway_healthz() -> None:
+    resp = httpx.get(f"{BASE_URL}/healthz", timeout=5)
+    assert resp.status_code == 200
+    body = resp.json()
+    assert body["status"] == "ok"
+    assert "version" in body
+
+
+# ---------------------------------------------------------------------------
+# VLM — Qwen3-VL 32B on svrnty-steev
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.integration
+@gateway_available
+@vlm_available
+def test_vlm_analyze_raw_mode_returns_text() -> None:
+    """Raw mode: VLM describes the image freely — no score parsing."""
+    resp = httpx.post(
+        f"{BASE_URL}/vlm/analyze",
+        json={
+            "image_base64": _make_png_b64((220, 80, 60)),
+            "brand_context": "Describe what you see in this image.",
+            "rubric_mode": "raw",
+        },
+        timeout=60,
+    )
+    assert resp.status_code == 200, resp.text
+    body = resp.json()
+    assert body["rubric_mode"] == "raw"
+    assert body["brand_fit_score"] is None
+    assert isinstance(body["raw_scores_json"], str)
+    assert len(body["raw_scores_json"]) > 0
+    assert "qwen" in body["model_id"].lower()
+
+
+@pytest.mark.integration
+@gateway_available
+@vlm_available
+def test_vlm_analyze_polished_returns_scores() -> None:
+    """Polished mode: VLM returns brand_fit + visual_polish 0–5 scores."""
+    resp = httpx.post(
+        f"{BASE_URL}/vlm/analyze",
+        json={
+            "image_base64": _make_png_b64((50, 120, 200)),
+            "brand_context": "Modern tech brand — clean, minimal, confident.",
+            "rubric_mode": "polished",
+        },
+        timeout=120,
+    )
+    assert resp.status_code == 200, resp.text
+    body = resp.json()
+    assert body["rubric_mode"] == "polished"
+
+    brand_fit = Decimal(str(body["brand_fit_score"]))
+    visual_polish = Decimal(str(body["visual_polish_score"]))
+    assert Decimal("0") <= brand_fit <= Decimal("5"), f"brand_fit out of range: {brand_fit}"
+    assert Decimal("0") <= visual_polish <= Decimal("5"), f"visual_polish out of range: {visual_polish}"
+    assert isinstance(body["justification"], str)
+    assert len(body["justification"]) > 0
+
+
+@pytest.mark.integration
+@gateway_available
+@vlm_available
+def test_vlm_analyze_ugc_mode() -> None:
+    """UGC mode: same structure as polished, different rubric framing."""
+    resp = httpx.post(
+        f"{BASE_URL}/vlm/analyze",
+        json={
+            "image_base64": _make_png_b64((80, 180, 80)),
+            "brand_context": "Fresh food delivery — organic, home-style.",
+            "rubric_mode": "ugc",
+        },
+        timeout=120,
+    )
+    assert resp.status_code == 200, resp.text
+    body = resp.json()
+    assert body["rubric_mode"] == "ugc"
+    assert body["brand_fit_score"] is not None
+
+
+# ---------------------------------------------------------------------------
+# FLUX — ComfyUI on gx10-f38f
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.integration
+@gateway_available
+@flux_available
+def test_flux_render_returns_valid_png() -> None:
+    """Minimal FLUX render — 4 steps for speed, verifies PNG round-trip."""
+    resp = httpx.post(
+        f"{BASE_URL}/flux/render",
+        json={
+            "prompt": "a plain white circle on black background",
+            "width": 512,
+            "height": 512,
+            "steps": 4,
+            "guidance": 2.5,
+        },
+        timeout=300,
+    )
+    assert resp.status_code == 200, resp.text
+    body = resp.json()
+
+    assert "image_base64" in body
+    assert body["content_type"] == "image/png"
+    assert body["provider"] == "local"
+    assert isinstance(body["duration_ms"], int)
+    assert body["duration_ms"] > 0
+
+    raw = base64.b64decode(body["image_base64"])
+    img = Image.open(io.BytesIO(raw))
+    assert img.width == 512
+    assert img.height == 512
+
+
+@pytest.mark.integration
+@gateway_available
+@flux_available
+def test_flux_render_seeds_produce_different_images() -> None:
+    """Two renders with different prompts → different images (non-trivial output)."""
+    def render(prompt: str) -> bytes:
+        resp = httpx.post(
+            f"{BASE_URL}/flux/render",
+            json={"prompt": prompt, "width": 512, "height": 512, "steps": 4},
+            timeout=300,
+        )
+        assert resp.status_code == 200
+        return base64.b64decode(resp.json()["image_base64"])
+
+    img_a = render("solid red background, nothing else")
+    img_b = render("solid blue background, nothing else")
+    assert img_a != img_b, "Two different prompts produced identical output — likely cached/deduped"
+
+
+# ---------------------------------------------------------------------------
+# Palette — in-process (Pillow)
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.integration
+@gateway_available
+def test_palette_extract_live() -> None:
+    """Palette extraction is in-process — always passes when gateway is up."""
+    resp = httpx.post(
+        f"{BASE_URL}/palette/extract",
+        json={"image_base64": _make_png_b64((200, 50, 50)), "color_count": 4},
+        timeout=10,
+    )
+    assert resp.status_code == 200
+    body = resp.json()
+    r, g, b = body["dominant"]
+    assert r > 150, "dominant color should be red-dominant"
+    assert body["color_count"] <= 4
+
+
+# ---------------------------------------------------------------------------
+# Rembg — in-process
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.integration
+@gateway_available
+def test_rembg_cutout_live() -> None:
+    """Background removal — always passes when gateway is up (model downloads on first call)."""
+    resp = httpx.post(
+        f"{BASE_URL}/rembg/cutout",
+        json={"image_base64": _make_png_b64()},
+        timeout=120,  # first call downloads u2net ONNX model
+    )
+    assert resp.status_code == 200
+    body = resp.json()
+    raw = base64.b64decode(body["image_base64"])
+    img = Image.open(io.BytesIO(raw))
+    assert img.mode == "RGBA"
+
+
+# ---------------------------------------------------------------------------
+# Error surface — gateway must return correct HTTP codes
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.integration
+@gateway_available
+def test_vlm_analyze_missing_image_returns_400() -> None:
+    resp = httpx.post(
+        f"{BASE_URL}/vlm/analyze",
+        json={"brand_context": "test", "rubric_mode": "raw"},
+        timeout=10,
+    )
+    assert resp.status_code == 400
+
+
+@pytest.mark.integration
+@gateway_available
+def test_flux_render_missing_prompt_returns_400() -> None:
+    resp = httpx.post(f"{BASE_URL}/flux/render", json={"width": 512, "height": 512}, timeout=10)
+    assert resp.status_code == 400
diff --git a/tests/test_palette.py b/tests/test_palette.py
new file mode 100644
index 0000000..0df12b0
--- /dev/null
+++ b/tests/test_palette.py
@@ -0,0 +1,79 @@
+"""Unit tests for POST /palette/extract."""
+
+from __future__ import annotations
+
+import base64
+import io
+
+import pytest
+from PIL import Image
+
+from svrnty_vision.routers.palette import PaletteRequest, PaletteResponse, extract
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+def _make_png_b64(color: tuple[int, int, int], size: int = 50) -> str:
+    img = Image.new("RGB", (size, size), color=color)
+    buf = io.BytesIO()
+    img.save(buf, format="PNG")
+    return base64.b64encode(buf.getvalue()).decode("ascii")
+
+
+# ---------------------------------------------------------------------------
+# Unit tests (pure function / TestClient — no network)
+# ---------------------------------------------------------------------------
+
+@pytest.mark.asyncio
+async def test_solid_red_dominant_is_red() -> None:
+    req = PaletteRequest(image_base64=_make_png_b64((255, 0, 0)), color_count=3)
+    resp: PaletteResponse = await extract(req)
+    r, g, b = resp.dominant
+    assert r > 200, "dominant R channel should be high for solid red"
+    assert g < 80
+    assert b < 80
+
+
+@pytest.mark.asyncio
+async def test_palette_color_count_respected() -> None:
+    req = PaletteRequest(image_base64=_make_png_b64((0, 128, 255)), color_count=4)
+    resp = await extract(req)
+    assert resp.color_count <= 4
+    assert len(resp.palette) == resp.color_count
+
+
+@pytest.mark.asyncio
+async def test_palette_each_entry_is_rgb_triple() -> None:
+    req = PaletteRequest(image_base64=_make_png_b64((100, 200, 50)), color_count=6)
+    resp = await extract(req)
+    for entry in resp.palette:
+        assert len(entry) == 3
+        assert all(0 <= c <= 255 for c in entry)
+
+
+@pytest.mark.asyncio
+async def test_palette_missing_image_raises_400() -> None:
+    from fastapi import HTTPException
+    with pytest.raises(HTTPException) as exc_info:
+        await extract(PaletteRequest())
+    assert exc_info.value.status_code == 400
+
+
+@pytest.mark.asyncio
+async def test_palette_bad_base64_raises_400() -> None:
+    from fastapi import HTTPException
+    with pytest.raises(HTTPException) as exc_info:
+        await extract(PaletteRequest(image_base64="!!!notbase64!!!"))
+    assert exc_info.value.status_code == 400
+
+
+def test_palette_via_test_client(client, red_png_b64) -> None:
+    resp = client.post("/palette/extract", json={"image_base64": red_png_b64, "color_count": 5})
+    assert resp.status_code == 200
+    body = resp.json()
+    assert "dominant" in body
+    assert len(body["dominant"]) == 3
+    assert body["color_count"] <= 5
+    assert len(body["palette"]) == body["color_count"]
diff --git a/tests/test_rembg.py b/tests/test_rembg.py
new file mode 100644
index 0000000..a197904
--- /dev/null
+++ b/tests/test_rembg.py
@@ -0,0 +1,45 @@
+"""Unit tests for POST /rembg/cutout."""
+
+from __future__ import annotations
+
+import base64
+import io
+
+import pytest
+from PIL import Image
+
+
+def _make_png_b64(color: tuple[int, int, int] = (180, 100, 50), size: int = 64) -> str:
+    img = Image.new("RGB", (size, size), color=color)
+    buf = io.BytesIO()
+    img.save(buf, format="PNG")
+    return base64.b64encode(buf.getvalue()).decode("ascii")
+
+
+def test_rembg_missing_image_returns_400(client) -> None:
+    resp = client.post("/rembg/cutout", json={})
+    assert resp.status_code == 400
+
+
+def test_rembg_bad_base64_returns_400(client) -> None:
+    resp = client.post("/rembg/cutout", json={"image_base64": "%%%bad%%%"})
+    assert resp.status_code == 400
+
+
+def test_rembg_returns_png_with_alpha(client, red_png_b64) -> None:
+    """rembg removes background → output is PNG with alpha channel."""
+    resp = client.post("/rembg/cutout", json={"image_base64": red_png_b64})
+    assert resp.status_code == 200
+    body = resp.json()
+    assert body["content_type"] == "image/png"
+    assert "image_base64" in body
+
+    raw = base64.b64decode(body["image_base64"])
+    img = Image.open(io.BytesIO(raw))
+    assert img.mode == "RGBA", f"expected RGBA, got {img.mode}"
+
+
+def test_rembg_alpha_matting_flag_round_trips(client, red_png_b64) -> None:
+    resp = client.post("/rembg/cutout", json={"image_base64": red_png_b64, "alpha_matting": False})
+    assert resp.status_code == 200
+    assert resp.json()["alpha_matting"] is False
diff --git a/tests/test_vlm_parse.py b/tests/test_vlm_parse.py
index c3bd8f3..05a4772 100644
--- a/tests/test_vlm_parse.py
+++ b/tests/test_vlm_parse.py
@@ -1,7 +1,7 @@
 """Pytest port of BTE's FakeVlmEvaluationParseTests + VlmRubric parse coverage.
 
 These tests cover the pure-function side of the VLM router (rubric prompt + score
-parsing). The HTTP call to Spark 2 is exercised separately via TestClient with a
+parsing). The HTTP call to steev (Ollama) is exercised separately via TestClient with a
 mocked httpx transport.
 """
 
@@ -64,11 +64,8 @@ def test_analyze_requires_image_input() -> None:
     assert response.status_code == 400
 
 
-def test_analyze_returns_502_when_spark2_unreachable() -> None:
-    """Smoke: with no Spark 2 (or a failing transport), gateway surfaces 502.
-
-    Uses a mock async client that raises ConnectError on POST.
-    """
+def test_analyze_returns_502_when_steev_unreachable() -> None:
+    """Smoke: with no steev (Ollama) or a failing transport, gateway surfaces 502."""
 
     class _StubClient:
         def __init__(self, *a, **kw):
@@ -95,8 +92,8 @@ def test_analyze_returns_502_when_spark2_unreachable() -> None:
     assert response.status_code == 502
 
 
-def test_analyze_round_trip_with_mocked_spark2() -> None:
-    """Happy path: mock vLLM returns a well-formed score JSON; gateway parses it."""
+def test_analyze_round_trip_with_mocked_steev() -> None:
+    """Happy path: mocked Ollama returns well-formed score JSON; gateway parses it."""
 
     canned_response = {
         "choices": [