feat(svrnty-vision): Phase 4b complete — full impl + e2e test suite
- palette.py + rembg.py: implement from stubs (Pillow median-cut + rembg u2net) - vlm.py: rename Spark2→steev (Strix Halo / Ollama); bump max_tokens 1024→4096 (qwen3-vl:32b thinking mode consumes budget tokens — 4096 min for valid output) - settings.py: rename spark2_vlm_*/spark1_flux_* → vlm_*/flux_*; real defaults (steev 100.88.167.87:11434 Ollama, gx10 100.90.100.10:8188 ComfyUI) - tests/: conftest.py + test_palette.py + test_rembg.py + test_integration_e2e.py (28 unit + 10 integration; 38/38 passing — VLM raw/polished/ugc + FLUX render) - CLAUDE.md: rewrite to accurate phase status + infra + layout - requirements.txt + pyproject.toml: add Pillow, rembg, pytest-asyncio deps Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -217,7 +217,7 @@ async def render(req: RenderRequest) -> RenderResponse:
|
||||
req.prompt, req.width, req.height, req.guidance, req.steps, req.seed
|
||||
)
|
||||
|
||||
endpoint = settings.spark1_flux_url.rstrip("/")
|
||||
endpoint = settings.flux_url.rstrip("/")
|
||||
started = time.monotonic()
|
||||
|
||||
try:
|
||||
|
||||
@@ -1,17 +1,69 @@
|
||||
"""Palette extraction (ColorThief-equivalent) — stub until Phase 4b."""
|
||||
"""Palette extraction — dominant colors via Pillow median-cut quantization."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import base64
|
||||
import io
|
||||
|
||||
from fastapi import APIRouter, HTTPException, status
|
||||
from PIL import Image
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
router = APIRouter(prefix="/palette", tags=["palette"])
|
||||
|
||||
_MAX_DIM = 200 # downsample before quantize for speed
|
||||
|
||||
@router.post("/extract")
|
||||
async def extract() -> None:
|
||||
"""Extract a dominant-color palette from an image.
|
||||
|
||||
Phase 4a: stub. Phase 4b: runs in-process (Pillow + colorthief).
|
||||
"""
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_501_NOT_IMPLEMENTED,
|
||||
detail="palette.extract not implemented in Phase 4a — see BTE-REFACTOR-EXECUTION-PLAN Phase 4b",
|
||||
)
|
||||
class PaletteRequest(BaseModel):
|
||||
"""At least one of `image_base64` or `image_url` must be supplied."""
|
||||
|
||||
image_base64: str | None = None
|
||||
image_url: str | None = None
|
||||
content_type: str = "image/png"
|
||||
color_count: int = Field(default=6, ge=1, le=32)
|
||||
|
||||
|
||||
class PaletteResponse(BaseModel):
|
||||
dominant: list[int] = Field(description="[R, G, B] — single most prominent color")
|
||||
palette: list[list[int]] = Field(description="[[R,G,B], …] — all extracted colors")
|
||||
color_count: int
|
||||
|
||||
|
||||
def _load_image(req: PaletteRequest) -> Image.Image:
|
||||
if req.image_base64:
|
||||
try:
|
||||
raw = base64.b64decode(req.image_base64)
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=f"Bad base64: {e}") from e
|
||||
return Image.open(io.BytesIO(raw)).convert("RGB")
|
||||
if req.image_url:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail="image_url not supported for palette — provide image_base64.",
|
||||
)
|
||||
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Provide image_base64.")
|
||||
|
||||
|
||||
@router.post("/extract", response_model=PaletteResponse)
|
||||
async def extract(req: PaletteRequest) -> PaletteResponse:
|
||||
"""Extract dominant colors via Pillow median-cut quantization."""
|
||||
img = _load_image(req)
|
||||
|
||||
# Downsample for speed before quantizing.
|
||||
if max(img.width, img.height) > _MAX_DIM:
|
||||
img.thumbnail((_MAX_DIM, _MAX_DIM), Image.Resampling.LANCZOS)
|
||||
|
||||
quantized = img.quantize(colors=req.color_count, method=Image.Quantize.MEDIANCUT)
|
||||
raw_palette = quantized.getpalette() or []
|
||||
|
||||
n = min(req.color_count, len(raw_palette) // 3)
|
||||
palette = [[raw_palette[i * 3], raw_palette[i * 3 + 1], raw_palette[i * 3 + 2]] for i in range(n)]
|
||||
|
||||
# Most frequent color = dominant.
|
||||
counts: dict[int, int] = {}
|
||||
for px in quantized.get_flattened_data():
|
||||
counts[px] = counts.get(px, 0) + 1
|
||||
dominant_idx = max(counts, key=lambda k: counts[k]) if counts else 0
|
||||
dominant = palette[dominant_idx] if dominant_idx < len(palette) else palette[0]
|
||||
|
||||
return PaletteResponse(dominant=dominant, palette=palette, color_count=len(palette))
|
||||
|
||||
@@ -1,17 +1,52 @@
|
||||
"""Background removal — stub until Phase 4b."""
|
||||
"""Background removal — in-process via rembg (u2net ONNX, CPU-light)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import base64
|
||||
import io
|
||||
|
||||
from fastapi import APIRouter, HTTPException, status
|
||||
from pydantic import BaseModel
|
||||
|
||||
router = APIRouter(prefix="/rembg", tags=["rembg"])
|
||||
|
||||
|
||||
@router.post("/cutout")
|
||||
async def cutout() -> None:
|
||||
"""Remove the background of an image (alpha cutout).
|
||||
class CutoutRequest(BaseModel):
|
||||
"""At least one of `image_base64` or `image_url` must be supplied."""
|
||||
|
||||
Phase 4a: stub. Phase 4b: runs in-process (rembg) or proxies to a Spark service.
|
||||
"""
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_501_NOT_IMPLEMENTED,
|
||||
detail="rembg.cutout not implemented in Phase 4a — see BTE-REFACTOR-EXECUTION-PLAN Phase 4b",
|
||||
image_base64: str | None = None
|
||||
content_type: str = "image/png"
|
||||
alpha_matting: bool = False # slower but cleaner edges on hair/fur
|
||||
|
||||
|
||||
class CutoutResponse(BaseModel):
|
||||
image_base64: str
|
||||
content_type: str = "image/png"
|
||||
alpha_matting: bool
|
||||
|
||||
|
||||
@router.post("/cutout", response_model=CutoutResponse)
|
||||
async def cutout(req: CutoutRequest) -> CutoutResponse:
|
||||
"""Remove background. Returns PNG with transparency (alpha channel)."""
|
||||
if not req.image_base64:
|
||||
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Provide image_base64.")
|
||||
|
||||
try:
|
||||
raw = base64.b64decode(req.image_base64)
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=f"Bad base64: {e}") from e
|
||||
|
||||
try:
|
||||
from rembg import remove # lazy: downloads u2net model on first call
|
||||
result_bytes = remove(raw, alpha_matting=req.alpha_matting)
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_502_BAD_GATEWAY,
|
||||
detail=f"rembg failed: {type(e).__name__}: {e}",
|
||||
) from e
|
||||
|
||||
return CutoutResponse(
|
||||
image_base64=base64.b64encode(result_bytes).decode("ascii"),
|
||||
content_type="image/png",
|
||||
alpha_matting=req.alpha_matting,
|
||||
)
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
"""VLM (vision-language model) analysis — proxies to Spark 2 (Qwen3-VL via vLLM).
|
||||
"""VLM (vision-language model) analysis — proxies to steev (Qwen3-VL via Ollama).
|
||||
|
||||
Ported from BTE's OpenAiVlmClient.cs + VlmRubric.cs (Phase 4b). Cloud Anthropic
|
||||
dialect intentionally dropped — svrnty-vision is sovereign-only.
|
||||
@@ -34,8 +34,8 @@ class AnalyzeRequest(BaseModel):
|
||||
content_type: str = "image/png"
|
||||
brand_context: str = ""
|
||||
rubric_mode: str = "polished"
|
||||
model: str | None = None # override settings.spark2_vlm_model
|
||||
max_tokens: int = 1024
|
||||
model: str | None = None # override settings.vlm_model
|
||||
max_tokens: int = 4096 # qwen3-vl:32b thinking mode uses budget tokens; 4096 min for valid output
|
||||
|
||||
|
||||
class AnalyzeResponse(BaseModel):
|
||||
@@ -148,10 +148,10 @@ async def _resolve_data_uri(req: AnalyzeRequest) -> str:
|
||||
|
||||
@router.post("/analyze", response_model=AnalyzeResponse)
|
||||
async def analyze(req: AnalyzeRequest) -> AnalyzeResponse:
|
||||
"""Analyze an image with Qwen3-VL on Spark 2 (vLLM, OpenAI-compatible)."""
|
||||
"""Analyze an image with Qwen3-VL on steev (Ollama) (vLLM, OpenAI-compatible)."""
|
||||
data_uri = await _resolve_data_uri(req)
|
||||
rubric = build_rubric_prompt(req.brand_context, req.rubric_mode)
|
||||
model = req.model or settings.spark2_vlm_model
|
||||
model = req.model or settings.vlm_model
|
||||
|
||||
body: dict[str, Any] = {
|
||||
"model": model,
|
||||
@@ -168,7 +168,7 @@ async def analyze(req: AnalyzeRequest) -> AnalyzeResponse:
|
||||
],
|
||||
}
|
||||
|
||||
url = settings.spark2_vlm_url.rstrip("/") + "/v1/chat/completions"
|
||||
url = settings.vlm_url.rstrip("/") + "/v1/chat/completions"
|
||||
try:
|
||||
async with httpx.AsyncClient(
|
||||
timeout=settings.vision_request_timeout_seconds
|
||||
@@ -179,7 +179,7 @@ async def analyze(req: AnalyzeRequest) -> AnalyzeResponse:
|
||||
except httpx.HTTPError as e:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_502_BAD_GATEWAY,
|
||||
detail=f"Spark 2 (vLLM) at {url} unreachable: {type(e).__name__}: {e}",
|
||||
detail=f"steev (Ollama) (vLLM) at {url} unreachable: {type(e).__name__}: {e}",
|
||||
) from e
|
||||
|
||||
try:
|
||||
@@ -187,7 +187,7 @@ async def analyze(req: AnalyzeRequest) -> AnalyzeResponse:
|
||||
except (KeyError, IndexError, TypeError) as e:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_502_BAD_GATEWAY,
|
||||
detail=f"Spark 2 response shape unexpected: {e}",
|
||||
detail=f"steev (Ollama) response shape unexpected: {e}",
|
||||
) from e
|
||||
|
||||
return parse_scores(text, req.rubric_mode, model)
|
||||
|
||||
@@ -14,14 +14,18 @@ class Settings(BaseSettings):
|
||||
|
||||
# Server
|
||||
svrnty_vision_host: str = "0.0.0.0"
|
||||
svrnty_vision_port: int = 8090
|
||||
svrnty_vision_port: int = 8092
|
||||
|
||||
# Spark 1 — FLUX (ComfyUI)
|
||||
spark1_flux_url: str = "http://spark1.lan:8188"
|
||||
# FLUX image generation — ComfyUI on gx10-f38f (100.90.100.10, NVIDIA GB10)
|
||||
# Models required: diffusion_models/flux2_dev_fp8mixed.safetensors
|
||||
# text_encoders/mistral_3_small_flux2_fp8.safetensors
|
||||
# vae/flux2-vae.safetensors
|
||||
flux_url: str = "http://100.90.100.10:8188"
|
||||
|
||||
# Spark 2 — Qwen3-VL (vLLM, OpenAI-compatible)
|
||||
spark2_vlm_url: str = "http://spark2.lan:8000"
|
||||
spark2_vlm_model: str = "Qwen/Qwen3-VL-7B-Instruct"
|
||||
# VLM analysis — Qwen3-VL 32B via Ollama on svrnty-steev (Strix Halo, this machine)
|
||||
# OpenAI-compatible endpoint; no /v1 suffix here — router appends it.
|
||||
vlm_url: str = "http://100.88.167.87:11434"
|
||||
vlm_model: str = "qwen3-vl:32b"
|
||||
|
||||
# Common
|
||||
vision_request_timeout_seconds: int = 120
|
||||
|
||||
Reference in New Issue
Block a user