feat: add voice studio prototype flow

This commit is contained in:
2026-04-19 23:10:16 +08:00
parent f106f740dd
commit 46d6201529
14 changed files with 1745 additions and 212 deletions

View File

@@ -0,0 +1,134 @@
"""Voice transcription helpers for co-creation sessions."""
from __future__ import annotations
from dataclasses import dataclass
from io import BytesIO
from fastapi import HTTPException
from openai import AsyncOpenAI
from app.core.config import settings
from app.core.logging import get_logger
logger = get_logger(__name__)
@dataclass(frozen=True)
class VoiceTranscriptionResult:
"""Normalized transcription result for one uploaded voice turn."""
transcript_text: str
confidence: float | None = None
provider: str = "demo"
def _normalize_transcript(transcript_text: str) -> str:
return transcript_text.strip()
async def _transcribe_demo(
*,
audio_bytes: bytes,
mime_type: str | None,
transcript_hint: str | None,
) -> VoiceTranscriptionResult:
hint = _normalize_transcript(transcript_hint or "")
if hint:
return VoiceTranscriptionResult(
transcript_text=hint,
confidence=1.0,
provider="demo",
)
if mime_type and mime_type.startswith("text/"):
text = _normalize_transcript(audio_bytes.decode("utf-8", errors="ignore"))
if text:
return VoiceTranscriptionResult(
transcript_text=text,
confidence=1.0,
provider="demo",
)
raise HTTPException(
status_code=503,
detail=(
"当前环境未配置真实语音转写,请先使用文本共创模式,"
"或在开发模式下提供 transcript_hint。"
),
)
async def _transcribe_openai(
*,
audio_bytes: bytes,
file_name: str,
mime_type: str | None,
transcript_hint: str | None,
) -> VoiceTranscriptionResult:
if not settings.openai_api_key:
raise HTTPException(
status_code=503,
detail="OPENAI_API_KEY 未配置,无法使用 OpenAI 语音转写。",
)
client = AsyncOpenAI(api_key=settings.openai_api_key)
audio_file = BytesIO(audio_bytes)
audio_file.name = file_name
prompt = transcript_hint.strip() if transcript_hint else None
try:
response = await client.audio.transcriptions.create(
model=settings.voice_transcription_model,
file=audio_file,
language=settings.voice_transcription_language,
prompt=prompt,
)
except Exception as exc:
logger.warning("voice_transcription_openai_failed", error=str(exc))
raise HTTPException(
status_code=503,
detail="语音转写服务暂时不可用,请稍后重试。",
) from exc
transcript_text = _normalize_transcript(getattr(response, "text", "") or "")
if not transcript_text:
raise HTTPException(status_code=502, detail="语音转写结果为空,请重试。")
return VoiceTranscriptionResult(
transcript_text=transcript_text,
confidence=None,
provider="openai",
)
async def transcribe_voice_audio(
*,
audio_bytes: bytes,
file_name: str,
mime_type: str | None,
transcript_hint: str | None = None,
) -> VoiceTranscriptionResult:
"""Transcribe one uploaded audio turn according to the configured mode."""
mode = (settings.voice_transcription_mode or "demo").strip().lower()
if mode == "disabled":
raise HTTPException(
status_code=503,
detail="当前环境已禁用语音转写,请先使用文本共创模式。",
)
if mode == "openai":
return await _transcribe_openai(
audio_bytes=audio_bytes,
file_name=file_name,
mime_type=mime_type,
transcript_hint=transcript_hint,
)
return await _transcribe_demo(
audio_bytes=audio_bytes,
mime_type=mime_type,
transcript_hint=transcript_hint,
)