svrnty-hermes-webui-plugin/tests/unit/test_transcribe.py

"""Unit tests for routes/transcribe.py (P3.B + L6).

Cover the route handler shape + the audio_attachment_processor contract.
Network calls to the external STT endpoint are mocked.
"""
import json
import os
from unittest.mock import MagicMock, patch

from routes import transcribe


class _FakeHandler:
    def __init__(self, body=b"", headers=None):
        self.status = None
        self.headers = headers or {}
        self.body_out = b""
        self.rfile = MagicMock()
        self.rfile.read.return_value = body

    def send_response(self, code):
        self.status = code

    def send_header(self, k, v):
        pass

    def end_headers(self):
        pass

    @property
    def wfile(self):
        h = self

        class _W:
            def write(self_, b): h.body_out += b
        return _W()


def test_register_wires_route_and_processor():
    api = MagicMock()
    api.logger.return_value = MagicMock()
    transcribe.register(api)
    api.register_route.assert_called_once_with(
        "/api/transcribe", "POST", transcribe._handle_transcribe)
    api.register_audio_attachment_processor.assert_called_once_with(
        transcribe._transcribe_audio_attachments)


def test_processor_returns_empty_when_stt_url_unset():
    with patch.dict(os.environ, {"HERMES_WEBUI_STT_URL": ""}, clear=False):
        assert transcribe._transcribe_audio_attachments(
            [{"path": "/tmp/foo.webm", "mime": "audio/webm"}]) == ""


def test_processor_returns_empty_when_no_audio_attachments():
    with patch.dict(os.environ, {"HERMES_WEBUI_STT_URL": "http://stt:8000/transcribe"}):
        assert transcribe._transcribe_audio_attachments([]) == ""
        assert transcribe._transcribe_audio_attachments(
            [{"path": "/tmp/doc.pdf", "mime": "application/pdf"}]) == ""


def test_processor_transcribes_audio_attachments():
    """End-to-end: audio attachment → STT call → transcript block."""
    attachments = [{
        "path": "/tmp/voice-message-123.webm",
        "mime": "audio/webm",
        "name": "voice-message-123.webm",
    }]
    with patch.dict(os.environ, {"HERMES_WEBUI_STT_URL": "http://stt:8000/v1/audio/transcriptions"}):
        with patch.object(transcribe, "_external_stt_transcribe",
                          return_value="hello world"):
            out = transcribe._transcribe_audio_attachments(attachments)
    assert out.startswith("[Voice message transcript]")
    assert "hello world" in out


def test_processor_detects_audio_by_filename_prefix():
    """voice-message-* prefix triggers transcription even with non-audio mime."""
    attachments = [{
        "path": "/tmp/voice-message-abc.mp4",
        "mime": "video/mp4",  # browser may upload as video/* per upload handler
        "name": "voice-message-abc.mp4",
    }]
    with patch.dict(os.environ, {"HERMES_WEBUI_STT_URL": "http://stt:8000/v1"}):
        with patch.object(transcribe, "_external_stt_transcribe",
                          return_value="hi"):
            assert "hi" in transcribe._transcribe_audio_attachments(attachments)


def test_handle_transcribe_503_when_stt_url_missing():
    with patch.dict(os.environ, {"HERMES_WEBUI_STT_URL": ""}, clear=False):
        h = _FakeHandler()
        transcribe._handle_transcribe(h, None)
    assert h.status == 503


def test_handle_transcribe_400_on_non_multipart():
    with patch.dict(os.environ, {"HERMES_WEBUI_STT_URL": "http://stt:8000/v1"}):
        h = _FakeHandler(headers={"Content-Type": "application/json", "Content-Length": "10"})
        transcribe._handle_transcribe(h, None)
    assert h.status == 400


def test_multipart_parser_extracts_file_field():
    """_parse_multipart_file pulls the named field's bytes + filename."""
    boundary = "----boundary"
    body = (
        f"--{boundary}\r\n"
        f'Content-Disposition: form-data; name="file"; filename="hello.wav"\r\n'
        f"Content-Type: audio/wav\r\n\r\n"
        f"FAKEAUDIO\r\n"
        f"--{boundary}--\r\n"
    ).encode()
    data, fname = transcribe._parse_multipart_file(
        body, f"multipart/form-data; boundary={boundary}", "file")
    assert data == b"FAKEAUDIO"
    assert fname == "hello.wav"