swift-apple-intelligence-grpc/Sources/AppleIntelligenceCore/Providers/AppleIntelligenceProvider.swift

import Foundation
import GRPCCore
import GRPCProtobuf
import GRPCNIOTransportHTTP2

/// gRPC service provider for Apple Intelligence
public struct AppleIntelligenceProvider: Appleintelligence_AppleIntelligenceService.ServiceProtocol {
    /// The underlying AI service
    private let service: AppleIntelligenceService

    /// Text-to-Speech service
    private let ttsService: TextToSpeechService?

    /// Speech-to-Text service
    private let sttService: SpeechToTextService?

    /// Optional API key for authentication
    private let apiKey: String?

    public init(
        service: AppleIntelligenceService,
        ttsService: TextToSpeechService? = nil,
        sttService: SpeechToTextService? = nil,
        apiKey: String? = nil
    ) {
        self.service = service
        self.ttsService = ttsService
        self.sttService = sttService
        self.apiKey = apiKey
    }

    // MARK: - ServiceProtocol Implementation

    public func complete(
        request: GRPCCore.ServerRequest<Appleintelligence_CompletionRequest>,
        context: GRPCCore.ServerContext
    ) async throws -> GRPCCore.ServerResponse<Appleintelligence_CompletionResponse> {
        try validateApiKey(metadata: request.metadata)

        let message = request.message

        // Convert protobuf images to service format
        let images = message.images.map { img in
            (data: img.data, filename: img.filename.isEmpty ? nil : img.filename)
        }

        let (text, analyses) = try await service.complete(
            prompt: message.prompt,
            temperature: message.hasTemperature ? message.temperature : nil,
            maxTokens: message.hasMaxTokens ? Int(message.maxTokens) : nil,
            images: images
        )

        var response = Appleintelligence_CompletionResponse()
        response.id = UUID().uuidString
        response.text = text
        response.finishReason = "stop"

        // Include analysis results if requested
        if message.includeAnalysis {
            response.imageAnalyses = analyses.map { analysis in
                var protoAnalysis = Appleintelligence_ImageAnalysis()
                protoAnalysis.textContent = analysis.textContent
                protoAnalysis.labels = analysis.labels
                protoAnalysis.description_p = analysis.description
                return protoAnalysis
            }
        }

        return ServerResponse(message: response)
    }

    public func streamComplete(
        request: GRPCCore.ServerRequest<Appleintelligence_CompletionRequest>,
        context: GRPCCore.ServerContext
    ) async throws -> GRPCCore.StreamingServerResponse<Appleintelligence_CompletionChunk> {
        try validateApiKey(metadata: request.metadata)

        let message = request.message
        let completionId = UUID().uuidString

        // Convert protobuf images to service format
        let images = message.images.map { img in
            (data: img.data, filename: img.filename.isEmpty ? nil : img.filename)
        }

        return StreamingServerResponse { writer in
            let stream = await self.service.streamComplete(
                prompt: message.prompt,
                temperature: message.hasTemperature ? message.temperature : nil,
                maxTokens: message.hasMaxTokens ? Int(message.maxTokens) : nil,
                images: images
            )

            var lastContent = ""
            var isFirstChunk = true
            for try await (partialResponse, analyses) in stream {
                // Calculate the delta (new text since last response)
                let delta: String
                if partialResponse.hasPrefix(lastContent) {
                    delta = String(partialResponse.dropFirst(lastContent.count))
                } else {
                    delta = partialResponse
                }
                lastContent = partialResponse

                if !delta.isEmpty || isFirstChunk {
                    var chunk = Appleintelligence_CompletionChunk()
                    chunk.id = completionId
                    chunk.delta = delta
                    chunk.isFinal = false

                    // Include analyses in first chunk if requested
                    if isFirstChunk && message.includeAnalysis, let analyses = analyses {
                        chunk.imageAnalyses = analyses.map { analysis in
                            var protoAnalysis = Appleintelligence_ImageAnalysis()
                            protoAnalysis.textContent = analysis.textContent
                            protoAnalysis.labels = analysis.labels
                            protoAnalysis.description_p = analysis.description
                            return protoAnalysis
                        }
                    }

                    try await writer.write(chunk)
                    isFirstChunk = false
                }
            }

            // Send final chunk
            var finalChunk = Appleintelligence_CompletionChunk()
            finalChunk.id = completionId
            finalChunk.delta = ""
            finalChunk.isFinal = true
            finalChunk.finishReason = "stop"
            try await writer.write(finalChunk)

            return [:]
        }
    }

    public func health(
        request: GRPCCore.ServerRequest<Appleintelligence_HealthRequest>,
        context: GRPCCore.ServerContext
    ) async throws -> GRPCCore.ServerResponse<Appleintelligence_HealthResponse> {
        let isHealthy = await service.isAvailable
        let modelStatus = await service.getModelStatus()

        var response = Appleintelligence_HealthResponse()
        response.healthy = isHealthy
        response.modelStatus = modelStatus

        return ServerResponse(message: response)
    }

    // MARK: - Text-to-Speech

    public func textToSpeech(
        request: GRPCCore.ServerRequest<Appleintelligence_TextToSpeechRequest>,
        context: GRPCCore.ServerContext
    ) async throws -> GRPCCore.ServerResponse<Appleintelligence_TextToSpeechResponse> {
        try validateApiKey(metadata: request.metadata)

        guard let ttsService = ttsService else {
            throw RPCError(code: .unavailable, message: "Text-to-Speech service not available")
        }

        let message = request.message

        // Convert proto config to service config
        var config = SpeechConfig.default
        if message.hasVoiceConfig {
            let voiceConfig = message.voiceConfig
            config = SpeechConfig(
                voiceIdentifier: voiceConfig.voiceIdentifier.isEmpty ? nil : voiceConfig.voiceIdentifier,
                speakingRate: voiceConfig.hasSpeakingRate ? voiceConfig.speakingRate : 0.5,
                pitchMultiplier: voiceConfig.hasPitchMultiplier ? voiceConfig.pitchMultiplier : 1.0,
                volume: voiceConfig.hasVolume ? voiceConfig.volume : 1.0
            )
        }

        // Convert proto format to service format
        let outputFormat: AudioOutputFormat
        switch message.outputFormat {
        case .wav, .unspecified:
            outputFormat = .wav
        case .mp3:
            outputFormat = .mp3
        case .UNRECOGNIZED:
            outputFormat = .wav
        }

        do {
            let result = try await ttsService.synthesize(
                text: message.text,
                config: config,
                outputFormat: outputFormat
            )

            var response = Appleintelligence_TextToSpeechResponse()
            response.audioData = result.audioData
            response.format = outputFormat == .wav ? .wav : .mp3
            response.sampleRate = Int32(result.sampleRate)
            response.channels = Int32(result.channels)
            response.durationSeconds = result.durationSeconds

            return ServerResponse(message: response)
        } catch let error as TextToSpeechError {
            throw RPCError(code: .internalError, message: error.description)
        }
    }

    public func listVoices(
        request: GRPCCore.ServerRequest<Appleintelligence_ListVoicesRequest>,
        context: GRPCCore.ServerContext
    ) async throws -> GRPCCore.ServerResponse<Appleintelligence_ListVoicesResponse> {
        try validateApiKey(metadata: request.metadata)

        guard let ttsService = ttsService else {
            throw RPCError(code: .unavailable, message: "Text-to-Speech service not available")
        }

        let message = request.message
        let languageCode = message.hasLanguageCode ? message.languageCode : nil

        let voices = await ttsService.listVoices(languageCode: languageCode)

        var response = Appleintelligence_ListVoicesResponse()
        response.voices = voices.map { voice in
            var protoVoice = Appleintelligence_VoiceInfo()
            protoVoice.identifier = voice.identifier
            protoVoice.name = voice.name
            protoVoice.language = voice.language
            protoVoice.isPremium = voice.isPremium
            protoVoice.gender = voice.gender
            return protoVoice
        }

        return ServerResponse(message: response)
    }

    // MARK: - Speech-to-Text

    public func transcribe(
        request: GRPCCore.ServerRequest<Appleintelligence_TranscribeRequest>,
        context: GRPCCore.ServerContext
    ) async throws -> GRPCCore.ServerResponse<Appleintelligence_TranscribeResponse> {
        try validateApiKey(metadata: request.metadata)

        guard let sttService = sttService else {
            throw RPCError(code: .unavailable, message: "Speech-to-Text service not available")
        }

        let message = request.message

        guard message.hasAudio else {
            throw RPCError(code: .invalidArgument, message: "Audio data is required")
        }

        // Convert proto config to service config
        var config = TranscriptionConfig.default
        if message.hasConfig {
            let protoConfig = message.config
            config = TranscriptionConfig(
                languageCode: protoConfig.hasLanguageCode ? protoConfig.languageCode : nil,
                enablePunctuation: protoConfig.hasEnablePunctuation ? protoConfig.enablePunctuation : true,
                enableTimestamps: protoConfig.hasEnableTimestamps ? protoConfig.enableTimestamps : false
            )
        }

        do {
            let result = try await sttService.transcribe(
                audioData: message.audio.data,
                mimeType: message.audio.mimeType,
                config: config
            )

            var response = Appleintelligence_TranscribeResponse()
            response.text = result.text
            response.detectedLanguage = result.detectedLanguage
            response.confidence = result.confidence
            response.segments = result.segments.map { segment in
                var protoSegment = Appleintelligence_TranscriptionSegment()
                protoSegment.text = segment.text
                protoSegment.startTime = segment.startTime
                protoSegment.endTime = segment.endTime
                protoSegment.confidence = segment.confidence
                return protoSegment
            }

            return ServerResponse(message: response)
        } catch let error as SpeechToTextError {
            throw RPCError(code: .internalError, message: error.description)
        }
    }

    public func streamTranscribe(
        request: GRPCCore.StreamingServerRequest<Appleintelligence_StreamingTranscribeRequest>,
        context: GRPCCore.ServerContext
    ) async throws -> GRPCCore.StreamingServerResponse<Appleintelligence_StreamingTranscribeResponse> {
        try validateApiKey(metadata: request.metadata)

        guard let sttService = sttService else {
            throw RPCError(code: .unavailable, message: "Speech-to-Text service not available")
        }

        return StreamingServerResponse { writer in
            var config = TranscriptionConfig.default

            // Process incoming stream
            for try await message in request.messages {
                switch message.request {
                case .config(let protoConfig):
                    // First message should be config
                    config = TranscriptionConfig(
                        languageCode: protoConfig.hasLanguageCode ? protoConfig.languageCode : nil,
                        enablePunctuation: protoConfig.hasEnablePunctuation ? protoConfig.enablePunctuation : true,
                        enableTimestamps: protoConfig.hasEnableTimestamps ? protoConfig.enableTimestamps : false
                    )

                    // Start streaming transcription
                    let stream = await sttService.streamTranscribe(config: config)
                    Task {
                        do {
                            for try await update in stream {
                                var response = Appleintelligence_StreamingTranscribeResponse()
                                response.partialText = update.partialText
                                response.isFinal = update.isFinal
                                if let finalText = update.finalText {
                                    response.finalText = finalText
                                }
                                response.segments = update.segments.map { segment in
                                    var protoSegment = Appleintelligence_TranscriptionSegment()
                                    protoSegment.text = segment.text
                                    protoSegment.startTime = segment.startTime
                                    protoSegment.endTime = segment.endTime
                                    protoSegment.confidence = segment.confidence
                                    return protoSegment
                                }
                                try await writer.write(response)
                            }
                        } catch {
                            // Stream ended or error occurred
                        }
                    }

                case .audioChunk(let chunk):
                    // Feed audio chunk to service
                    try await sttService.feedAudioChunk(chunk)

                case .none:
                    break
                }
            }

            // End streaming session
            await sttService.endStreamingSession()

            return [:]
        }
    }

    // MARK: - Private Helpers

    /// Validate API key if configured
    private func validateApiKey(metadata: Metadata) throws {
        guard let expectedKey = apiKey else {
            return // No API key required
        }

        // Look for Authorization header in metadata
        let authValues = metadata["authorization"]
        guard let authHeader = authValues.first(where: { _ in true }),
              case .string(let authString) = authHeader,
              authString.hasPrefix("Bearer ") else {
            throw RPCError(code: .unauthenticated, message: "Missing or invalid Authorization header")
        }

        let providedKey = String(authString.dropFirst("Bearer ".count))
        guard providedKey == expectedKey else {
            throw RPCError(code: .unauthenticated, message: "Invalid API key")
        }
    }
}