feat(svrnty-vision): Phase 4b complete — full impl + e2e test suite

- palette.py + rembg.py: implement from stubs (Pillow median-cut + rembg u2net)
- vlm.py: rename Spark2→steev (Strix Halo / Ollama); bump max_tokens 1024→4096
  (qwen3-vl:32b thinking mode consumes budget tokens — 4096 min for valid output)
- settings.py: rename spark2_vlm_*/spark1_flux_* → vlm_*/flux_*; real defaults
  (steev 100.88.167.87:11434 Ollama, gx10 100.90.100.10:8188 ComfyUI)
- tests/: conftest.py + test_palette.py + test_rembg.py + test_integration_e2e.py
  (28 unit + 10 integration; 38/38 passing — VLM raw/polished/ugc + FLUX render)
- CLAUDE.md: rewrite to accurate phase status + infra + layout
- requirements.txt + pyproject.toml: add Pillow, rembg, pytest-asyncio deps

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Svrnty
2026-05-25 06:44:21 -04:00
parent d567489475
commit f6e09dbff2
15 changed files with 684 additions and 101 deletions
+1 -1
View File
@@ -217,7 +217,7 @@ async def render(req: RenderRequest) -> RenderResponse:
req.prompt, req.width, req.height, req.guidance, req.steps, req.seed
)
endpoint = settings.spark1_flux_url.rstrip("/")
endpoint = settings.flux_url.rstrip("/")
started = time.monotonic()
try:
+62 -10
View File
@@ -1,17 +1,69 @@
"""Palette extraction (ColorThief-equivalent) — stub until Phase 4b."""
"""Palette extraction — dominant colors via Pillow median-cut quantization."""
from __future__ import annotations
import base64
import io
from fastapi import APIRouter, HTTPException, status
from PIL import Image
from pydantic import BaseModel, Field
router = APIRouter(prefix="/palette", tags=["palette"])
_MAX_DIM = 200 # downsample before quantize for speed
@router.post("/extract")
async def extract() -> None:
"""Extract a dominant-color palette from an image.
Phase 4a: stub. Phase 4b: runs in-process (Pillow + colorthief).
"""
raise HTTPException(
status_code=status.HTTP_501_NOT_IMPLEMENTED,
detail="palette.extract not implemented in Phase 4a — see BTE-REFACTOR-EXECUTION-PLAN Phase 4b",
)
class PaletteRequest(BaseModel):
"""At least one of `image_base64` or `image_url` must be supplied."""
image_base64: str | None = None
image_url: str | None = None
content_type: str = "image/png"
color_count: int = Field(default=6, ge=1, le=32)
class PaletteResponse(BaseModel):
dominant: list[int] = Field(description="[R, G, B] — single most prominent color")
palette: list[list[int]] = Field(description="[[R,G,B], …] — all extracted colors")
color_count: int
def _load_image(req: PaletteRequest) -> Image.Image:
if req.image_base64:
try:
raw = base64.b64decode(req.image_base64)
except Exception as e:
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=f"Bad base64: {e}") from e
return Image.open(io.BytesIO(raw)).convert("RGB")
if req.image_url:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="image_url not supported for palette — provide image_base64.",
)
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Provide image_base64.")
@router.post("/extract", response_model=PaletteResponse)
async def extract(req: PaletteRequest) -> PaletteResponse:
"""Extract dominant colors via Pillow median-cut quantization."""
img = _load_image(req)
# Downsample for speed before quantizing.
if max(img.width, img.height) > _MAX_DIM:
img.thumbnail((_MAX_DIM, _MAX_DIM), Image.Resampling.LANCZOS)
quantized = img.quantize(colors=req.color_count, method=Image.Quantize.MEDIANCUT)
raw_palette = quantized.getpalette() or []
n = min(req.color_count, len(raw_palette) // 3)
palette = [[raw_palette[i * 3], raw_palette[i * 3 + 1], raw_palette[i * 3 + 2]] for i in range(n)]
# Most frequent color = dominant.
counts: dict[int, int] = {}
for px in quantized.get_flattened_data():
counts[px] = counts.get(px, 0) + 1
dominant_idx = max(counts, key=lambda k: counts[k]) if counts else 0
dominant = palette[dominant_idx] if dominant_idx < len(palette) else palette[0]
return PaletteResponse(dominant=dominant, palette=palette, color_count=len(palette))
+44 -9
View File
@@ -1,17 +1,52 @@
"""Background removal — stub until Phase 4b."""
"""Background removal — in-process via rembg (u2net ONNX, CPU-light)."""
from __future__ import annotations
import base64
import io
from fastapi import APIRouter, HTTPException, status
from pydantic import BaseModel
router = APIRouter(prefix="/rembg", tags=["rembg"])
@router.post("/cutout")
async def cutout() -> None:
"""Remove the background of an image (alpha cutout).
class CutoutRequest(BaseModel):
"""At least one of `image_base64` or `image_url` must be supplied."""
Phase 4a: stub. Phase 4b: runs in-process (rembg) or proxies to a Spark service.
"""
raise HTTPException(
status_code=status.HTTP_501_NOT_IMPLEMENTED,
detail="rembg.cutout not implemented in Phase 4a — see BTE-REFACTOR-EXECUTION-PLAN Phase 4b",
image_base64: str | None = None
content_type: str = "image/png"
alpha_matting: bool = False # slower but cleaner edges on hair/fur
class CutoutResponse(BaseModel):
image_base64: str
content_type: str = "image/png"
alpha_matting: bool
@router.post("/cutout", response_model=CutoutResponse)
async def cutout(req: CutoutRequest) -> CutoutResponse:
"""Remove background. Returns PNG with transparency (alpha channel)."""
if not req.image_base64:
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Provide image_base64.")
try:
raw = base64.b64decode(req.image_base64)
except Exception as e:
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=f"Bad base64: {e}") from e
try:
from rembg import remove # lazy: downloads u2net model on first call
result_bytes = remove(raw, alpha_matting=req.alpha_matting)
except Exception as e:
raise HTTPException(
status_code=status.HTTP_502_BAD_GATEWAY,
detail=f"rembg failed: {type(e).__name__}: {e}",
) from e
return CutoutResponse(
image_base64=base64.b64encode(result_bytes).decode("ascii"),
content_type="image/png",
alpha_matting=req.alpha_matting,
)
+8 -8
View File
@@ -1,4 +1,4 @@
"""VLM (vision-language model) analysis — proxies to Spark 2 (Qwen3-VL via vLLM).
"""VLM (vision-language model) analysis — proxies to steev (Qwen3-VL via Ollama).
Ported from BTE's OpenAiVlmClient.cs + VlmRubric.cs (Phase 4b). Cloud Anthropic
dialect intentionally dropped — svrnty-vision is sovereign-only.
@@ -34,8 +34,8 @@ class AnalyzeRequest(BaseModel):
content_type: str = "image/png"
brand_context: str = ""
rubric_mode: str = "polished"
model: str | None = None # override settings.spark2_vlm_model
max_tokens: int = 1024
model: str | None = None # override settings.vlm_model
max_tokens: int = 4096 # qwen3-vl:32b thinking mode uses budget tokens; 4096 min for valid output
class AnalyzeResponse(BaseModel):
@@ -148,10 +148,10 @@ async def _resolve_data_uri(req: AnalyzeRequest) -> str:
@router.post("/analyze", response_model=AnalyzeResponse)
async def analyze(req: AnalyzeRequest) -> AnalyzeResponse:
"""Analyze an image with Qwen3-VL on Spark 2 (vLLM, OpenAI-compatible)."""
"""Analyze an image with Qwen3-VL on steev (Ollama) (vLLM, OpenAI-compatible)."""
data_uri = await _resolve_data_uri(req)
rubric = build_rubric_prompt(req.brand_context, req.rubric_mode)
model = req.model or settings.spark2_vlm_model
model = req.model or settings.vlm_model
body: dict[str, Any] = {
"model": model,
@@ -168,7 +168,7 @@ async def analyze(req: AnalyzeRequest) -> AnalyzeResponse:
],
}
url = settings.spark2_vlm_url.rstrip("/") + "/v1/chat/completions"
url = settings.vlm_url.rstrip("/") + "/v1/chat/completions"
try:
async with httpx.AsyncClient(
timeout=settings.vision_request_timeout_seconds
@@ -179,7 +179,7 @@ async def analyze(req: AnalyzeRequest) -> AnalyzeResponse:
except httpx.HTTPError as e:
raise HTTPException(
status_code=status.HTTP_502_BAD_GATEWAY,
detail=f"Spark 2 (vLLM) at {url} unreachable: {type(e).__name__}: {e}",
detail=f"steev (Ollama) (vLLM) at {url} unreachable: {type(e).__name__}: {e}",
) from e
try:
@@ -187,7 +187,7 @@ async def analyze(req: AnalyzeRequest) -> AnalyzeResponse:
except (KeyError, IndexError, TypeError) as e:
raise HTTPException(
status_code=status.HTTP_502_BAD_GATEWAY,
detail=f"Spark 2 response shape unexpected: {e}",
detail=f"steev (Ollama) response shape unexpected: {e}",
) from e
return parse_scores(text, req.rubric_mode, model)
+10 -6
View File
@@ -14,14 +14,18 @@ class Settings(BaseSettings):
# Server
svrnty_vision_host: str = "0.0.0.0"
svrnty_vision_port: int = 8090
svrnty_vision_port: int = 8092
# Spark 1 — FLUX (ComfyUI)
spark1_flux_url: str = "http://spark1.lan:8188"
# FLUX image generation — ComfyUI on gx10-f38f (100.90.100.10, NVIDIA GB10)
# Models required: diffusion_models/flux2_dev_fp8mixed.safetensors
# text_encoders/mistral_3_small_flux2_fp8.safetensors
# vae/flux2-vae.safetensors
flux_url: str = "http://100.90.100.10:8188"
# Spark 2 — Qwen3-VL (vLLM, OpenAI-compatible)
spark2_vlm_url: str = "http://spark2.lan:8000"
spark2_vlm_model: str = "Qwen/Qwen3-VL-7B-Instruct"
# VLM analysis — Qwen3-VL 32B via Ollama on svrnty-steev (Strix Halo, this machine)
# OpenAI-compatible endpoint; no /v1 suffix here — router appends it.
vlm_url: str = "http://100.88.167.87:11434"
vlm_model: str = "qwen3-vl:32b"
# Common
vision_request_timeout_seconds: int = 120