feat(svrnty-vision): Phase 4b complete — full impl + e2e test suite

- palette.py + rembg.py: implement from stubs (Pillow median-cut + rembg u2net) - vlm.py: rename Spark2→steev (Strix Halo / Ollama); bump max_tokens 1024→4096 (qwen3-vl:32b thinking mode consumes budget tokens — 4096 min for valid output) - settings.py: rename spark2_vlm_*/spark1_flux_* → vlm_*/flux_*; real defaults (steev 100.88.167.87:11434 Ollama, gx10 100.90.100.10:8188 ComfyUI) - tests/: conftest.py + test_palette.py + test_rembg.py + test_integration_e2e.py (28 unit + 10 integration; 38/38 passing — VLM raw/polished/ugc + FLUX render) - CLAUDE.md: rewrite to accurate phase status + infra + layout - requirements.txt + pyproject.toml: add Pillow, rembg, pytest-asyncio deps Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-25 06:44:21 -04:00
parent d567489475
commit f6e09dbff2
15 changed files with 684 additions and 101 deletions
@@ -217,7 +217,7 @@ async def render(req: RenderRequest) -> RenderResponse:
            req.prompt, req.width, req.height, req.guidance, req.steps, req.seed
        )

-    endpoint = settings.spark1_flux_url.rstrip("/")
+    endpoint = settings.flux_url.rstrip("/")
    started = time.monotonic()

    try:
@@ -1,17 +1,69 @@
-"""Palette extraction (ColorThief-equivalent) — stub until Phase 4b."""
+"""Palette extraction — dominant colors via Pillow median-cut quantization."""
+
+from __future__ import annotations
+
+import base64
+import io

 from fastapi import APIRouter, HTTPException, status
+from PIL import Image
+from pydantic import BaseModel, Field

 router = APIRouter(prefix="/palette", tags=["palette"])

+_MAX_DIM = 200  # downsample before quantize for speed

-@router.post("/extract")
-async def extract() -> None:
-    """Extract a dominant-color palette from an image.

-    Phase 4a: stub. Phase 4b: runs in-process (Pillow + colorthief).
-    """
-    raise HTTPException(
-        status_code=status.HTTP_501_NOT_IMPLEMENTED,
-        detail="palette.extract not implemented in Phase 4a — see BTE-REFACTOR-EXECUTION-PLAN Phase 4b",
-    )
+class PaletteRequest(BaseModel):
+    """At least one of `image_base64` or `image_url` must be supplied."""
+
+    image_base64: str | None = None
+    image_url: str | None = None
+    content_type: str = "image/png"
+    color_count: int = Field(default=6, ge=1, le=32)
+
+
+class PaletteResponse(BaseModel):
+    dominant: list[int] = Field(description="[R, G, B] — single most prominent color")
+    palette: list[list[int]] = Field(description="[[R,G,B], …] — all extracted colors")
+    color_count: int
+
+
+def _load_image(req: PaletteRequest) -> Image.Image:
+    if req.image_base64:
+        try:
+            raw = base64.b64decode(req.image_base64)
+        except Exception as e:
+            raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=f"Bad base64: {e}") from e
+        return Image.open(io.BytesIO(raw)).convert("RGB")
+    if req.image_url:
+        raise HTTPException(
+            status_code=status.HTTP_400_BAD_REQUEST,
+            detail="image_url not supported for palette — provide image_base64.",
+        )
+    raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Provide image_base64.")
+
+
+@router.post("/extract", response_model=PaletteResponse)
+async def extract(req: PaletteRequest) -> PaletteResponse:
+    """Extract dominant colors via Pillow median-cut quantization."""
+    img = _load_image(req)
+
+    # Downsample for speed before quantizing.
+    if max(img.width, img.height) > _MAX_DIM:
+        img.thumbnail((_MAX_DIM, _MAX_DIM), Image.Resampling.LANCZOS)
+
+    quantized = img.quantize(colors=req.color_count, method=Image.Quantize.MEDIANCUT)
+    raw_palette = quantized.getpalette() or []
+
+    n = min(req.color_count, len(raw_palette) // 3)
+    palette = [[raw_palette[i * 3], raw_palette[i * 3 + 1], raw_palette[i * 3 + 2]] for i in range(n)]
+
+    # Most frequent color = dominant.
+    counts: dict[int, int] = {}
+    for px in quantized.get_flattened_data():
+        counts[px] = counts.get(px, 0) + 1
+    dominant_idx = max(counts, key=lambda k: counts[k]) if counts else 0
+    dominant = palette[dominant_idx] if dominant_idx < len(palette) else palette[0]
+
+    return PaletteResponse(dominant=dominant, palette=palette, color_count=len(palette))
@@ -1,17 +1,52 @@
-"""Background removal — stub until Phase 4b."""
+"""Background removal — in-process via rembg (u2net ONNX, CPU-light)."""
+
+from __future__ import annotations
+
+import base64
+import io

 from fastapi import APIRouter, HTTPException, status
+from pydantic import BaseModel

 router = APIRouter(prefix="/rembg", tags=["rembg"])


-@router.post("/cutout")
-async def cutout() -> None:
-    """Remove the background of an image (alpha cutout).
+class CutoutRequest(BaseModel):
+    """At least one of `image_base64` or `image_url` must be supplied."""

-    Phase 4a: stub. Phase 4b: runs in-process (rembg) or proxies to a Spark service.
-    """
-    raise HTTPException(
-        status_code=status.HTTP_501_NOT_IMPLEMENTED,
-        detail="rembg.cutout not implemented in Phase 4a — see BTE-REFACTOR-EXECUTION-PLAN Phase 4b",
+    image_base64: str | None = None
+    content_type: str = "image/png"
+    alpha_matting: bool = False  # slower but cleaner edges on hair/fur
+
+
+class CutoutResponse(BaseModel):
+    image_base64: str
+    content_type: str = "image/png"
+    alpha_matting: bool
+
+
+@router.post("/cutout", response_model=CutoutResponse)
+async def cutout(req: CutoutRequest) -> CutoutResponse:
+    """Remove background. Returns PNG with transparency (alpha channel)."""
+    if not req.image_base64:
+        raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Provide image_base64.")
+
+    try:
+        raw = base64.b64decode(req.image_base64)
+    except Exception as e:
+        raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=f"Bad base64: {e}") from e
+
+    try:
+        from rembg import remove  # lazy: downloads u2net model on first call
+        result_bytes = remove(raw, alpha_matting=req.alpha_matting)
+    except Exception as e:
+        raise HTTPException(
+            status_code=status.HTTP_502_BAD_GATEWAY,
+            detail=f"rembg failed: {type(e).__name__}: {e}",
+        ) from e
+
+    return CutoutResponse(
+        image_base64=base64.b64encode(result_bytes).decode("ascii"),
+        content_type="image/png",
+        alpha_matting=req.alpha_matting,
    )
@@ -1,4 +1,4 @@
-"""VLM (vision-language model) analysis — proxies to Spark 2 (Qwen3-VL via vLLM).
+"""VLM (vision-language model) analysis — proxies to steev (Qwen3-VL via Ollama).

 Ported from BTE's OpenAiVlmClient.cs + VlmRubric.cs (Phase 4b). Cloud Anthropic
 dialect intentionally dropped — svrnty-vision is sovereign-only.
@@ -34,8 +34,8 @@ class AnalyzeRequest(BaseModel):
    content_type: str = "image/png"
    brand_context: str = ""
    rubric_mode: str = "polished"
-    model: str | None = None  # override settings.spark2_vlm_model
-    max_tokens: int = 1024
+    model: str | None = None  # override settings.vlm_model
+    max_tokens: int = 4096  # qwen3-vl:32b thinking mode uses budget tokens; 4096 min for valid output


 class AnalyzeResponse(BaseModel):
@@ -148,10 +148,10 @@ async def _resolve_data_uri(req: AnalyzeRequest) -> str:

@router.post("/analyze", response_model=AnalyzeResponse)
 async def analyze(req: AnalyzeRequest) -> AnalyzeResponse:
-    """Analyze an image with Qwen3-VL on Spark 2 (vLLM, OpenAI-compatible)."""
+    """Analyze an image with Qwen3-VL on steev (Ollama) (vLLM, OpenAI-compatible)."""
    data_uri = await _resolve_data_uri(req)
    rubric = build_rubric_prompt(req.brand_context, req.rubric_mode)
-    model = req.model or settings.spark2_vlm_model
+    model = req.model or settings.vlm_model

    body: dict[str, Any] = {
        "model": model,
@@ -168,7 +168,7 @@ async def analyze(req: AnalyzeRequest) -> AnalyzeResponse:
        ],
    }

-    url = settings.spark2_vlm_url.rstrip("/") + "/v1/chat/completions"
+    url = settings.vlm_url.rstrip("/") + "/v1/chat/completions"
    try:
        async with httpx.AsyncClient(
            timeout=settings.vision_request_timeout_seconds
@@ -179,7 +179,7 @@ async def analyze(req: AnalyzeRequest) -> AnalyzeResponse:
    except httpx.HTTPError as e:
        raise HTTPException(
            status_code=status.HTTP_502_BAD_GATEWAY,
-            detail=f"Spark 2 (vLLM) at {url} unreachable: {type(e).__name__}: {e}",
+            detail=f"steev (Ollama) (vLLM) at {url} unreachable: {type(e).__name__}: {e}",
        ) from e

    try:
@@ -187,7 +187,7 @@ async def analyze(req: AnalyzeRequest) -> AnalyzeResponse:
    except (KeyError, IndexError, TypeError) as e:
        raise HTTPException(
            status_code=status.HTTP_502_BAD_GATEWAY,
-            detail=f"Spark 2 response shape unexpected: {e}",
+            detail=f"steev (Ollama) response shape unexpected: {e}",
        ) from e

    return parse_scores(text, req.rubric_mode, model)
@@ -14,14 +14,18 @@ class Settings(BaseSettings):

    # Server
    svrnty_vision_host: str = "0.0.0.0"
-    svrnty_vision_port: int = 8090
+    svrnty_vision_port: int = 8092

-    # Spark 1 — FLUX (ComfyUI)
-    spark1_flux_url: str = "http://spark1.lan:8188"
+    # FLUX image generation — ComfyUI on gx10-f38f (100.90.100.10, NVIDIA GB10)
+    # Models required: diffusion_models/flux2_dev_fp8mixed.safetensors
+    #                  text_encoders/mistral_3_small_flux2_fp8.safetensors
+    #                  vae/flux2-vae.safetensors
+    flux_url: str = "http://100.90.100.10:8188"

-    # Spark 2 — Qwen3-VL (vLLM, OpenAI-compatible)
-    spark2_vlm_url: str = "http://spark2.lan:8000"
-    spark2_vlm_model: str = "Qwen/Qwen3-VL-7B-Instruct"
+    # VLM analysis — Qwen3-VL 32B via Ollama on svrnty-steev (Strix Halo, this machine)
+    # OpenAI-compatible endpoint; no /v1 suffix here — router appends it.
+    vlm_url: str = "http://100.88.167.87:11434"
+    vlm_model: str = "qwen3-vl:32b"

    # Common
    vision_request_timeout_seconds: int = 120