"""Voice transcription helpers for co-creation sessions.""" from __future__ import annotations from dataclasses import dataclass from fastapi import HTTPException from sqlalchemy.ext.asyncio import AsyncSession from app.core.config import settings from app.services.provider_router import transcribe_audio @dataclass(frozen=True) class VoiceTranscriptionResult: """Normalized transcription result for one uploaded voice turn.""" transcript_text: str confidence: float | None = None provider: str = "demo" def _resolve_transcript_hint(transcript_hint: str | None) -> str | None: normalized = (transcript_hint or "").strip() return normalized or None async def transcribe_voice_audio( *, audio_bytes: bytes, file_name: str, mime_type: str | None, transcript_hint: str | None = None, db: AsyncSession | None = None, user_id: str | None = None, ) -> VoiceTranscriptionResult: """Transcribe one uploaded audio turn using configured ASR providers.""" mode = (settings.voice_transcription_mode or "provider").strip().lower() if mode == "disabled": raise HTTPException( status_code=503, detail="当前环境已禁用语音转写,请先使用文本共创模式。", ) hint = _resolve_transcript_hint(transcript_hint) provider_name = "openai_asr" if mode == "openai" else mode strategy_providers = None if mode == "provider" else [provider_name] result = await transcribe_audio( audio_bytes=audio_bytes, file_name=file_name, mime_type=mime_type, transcript_hint=hint, language=settings.voice_transcription_language, provider_names=strategy_providers, db=db, user_id=user_id, ) return VoiceTranscriptionResult( transcript_text=result.transcript_text, confidence=result.confidence, provider=result.provider, )