66 lines
1.9 KiB
Python
66 lines
1.9 KiB
Python
"""Voice transcription helpers for co-creation sessions."""
|
|
|
|
from __future__ import annotations
|
|
|
|
from dataclasses import dataclass
|
|
|
|
from fastapi import HTTPException
|
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
|
|
from app.core.config import settings
|
|
from app.services.provider_router import transcribe_audio
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class VoiceTranscriptionResult:
|
|
"""Normalized transcription result for one uploaded voice turn."""
|
|
|
|
transcript_text: str
|
|
confidence: float | None = None
|
|
provider: str = "demo"
|
|
|
|
|
|
def _resolve_transcript_hint(transcript_hint: str | None) -> str | None:
|
|
normalized = (transcript_hint or "").strip()
|
|
return normalized or None
|
|
|
|
|
|
async def transcribe_voice_audio(
|
|
*,
|
|
audio_bytes: bytes,
|
|
file_name: str,
|
|
mime_type: str | None,
|
|
transcript_hint: str | None = None,
|
|
db: AsyncSession | None = None,
|
|
user_id: str | None = None,
|
|
) -> VoiceTranscriptionResult:
|
|
"""Transcribe one uploaded audio turn using configured ASR providers."""
|
|
|
|
mode = (settings.voice_transcription_mode or "provider").strip().lower()
|
|
|
|
if mode == "disabled":
|
|
raise HTTPException(
|
|
status_code=503,
|
|
detail="当前环境已禁用语音转写,请先使用文本共创模式。",
|
|
)
|
|
|
|
hint = _resolve_transcript_hint(transcript_hint)
|
|
provider_name = "openai_asr" if mode == "openai" else mode
|
|
strategy_providers = None if mode == "provider" else [provider_name]
|
|
result = await transcribe_audio(
|
|
audio_bytes=audio_bytes,
|
|
file_name=file_name,
|
|
mime_type=mime_type,
|
|
transcript_hint=hint,
|
|
language=settings.voice_transcription_language,
|
|
provider_names=strategy_providers,
|
|
db=db,
|
|
user_id=user_id,
|
|
)
|
|
|
|
return VoiceTranscriptionResult(
|
|
transcript_text=result.transcript_text,
|
|
confidence=result.confidence,
|
|
provider=result.provider,
|
|
)
|