Files
dreamweaver/backend/app/services/voice_transcription_service.py

66 lines
1.9 KiB
Python

"""Voice transcription helpers for co-creation sessions."""
from __future__ import annotations
from dataclasses import dataclass
from fastapi import HTTPException
from sqlalchemy.ext.asyncio import AsyncSession
from app.core.config import settings
from app.services.provider_router import transcribe_audio
@dataclass(frozen=True)
class VoiceTranscriptionResult:
"""Normalized transcription result for one uploaded voice turn."""
transcript_text: str
confidence: float | None = None
provider: str = "demo"
def _resolve_transcript_hint(transcript_hint: str | None) -> str | None:
normalized = (transcript_hint or "").strip()
return normalized or None
async def transcribe_voice_audio(
*,
audio_bytes: bytes,
file_name: str,
mime_type: str | None,
transcript_hint: str | None = None,
db: AsyncSession | None = None,
user_id: str | None = None,
) -> VoiceTranscriptionResult:
"""Transcribe one uploaded audio turn using configured ASR providers."""
mode = (settings.voice_transcription_mode or "provider").strip().lower()
if mode == "disabled":
raise HTTPException(
status_code=503,
detail="当前环境已禁用语音转写,请先使用文本共创模式。",
)
hint = _resolve_transcript_hint(transcript_hint)
provider_name = "openai_asr" if mode == "openai" else mode
strategy_providers = None if mode == "provider" else [provider_name]
result = await transcribe_audio(
audio_bytes=audio_bytes,
file_name=file_name,
mime_type=mime_type,
transcript_hint=hint,
language=settings.voice_transcription_language,
provider_names=strategy_providers,
db=db,
user_id=user_id,
)
return VoiceTranscriptionResult(
transcript_text=result.transcript_text,
confidence=result.confidence,
provider=result.provider,
)