feat: improve voice studio alpha recovery flow

This commit is contained in:
2026-04-19 23:25:41 +08:00
parent 46d6201529
commit 4ecf0c09c0
9 changed files with 657 additions and 14 deletions

View File

@@ -6,9 +6,10 @@ from datetime import datetime, timezone
from typing import Any
from fastapi import HTTPException
from sqlalchemy import desc, select
from sqlalchemy import case, desc, select
from sqlalchemy.ext.asyncio import AsyncSession
from app.core.config import settings
from app.core.logging import get_logger
from app.db.models import VoiceSession, VoiceSessionEvent, VoiceTurn
from app.schemas.voice_session_schemas import (
@@ -134,6 +135,7 @@ def _session_to_summary(
session_audio_exists(latest_turn.assistant_audio_path) if latest_turn else False
),
last_turn_status=latest_turn.status if latest_turn else None,
transcription_mode_hint=settings.voice_transcription_mode,
can_continue=_session_can_continue(session),
can_finalize=_session_can_finalize(session),
last_error=session.last_error,
@@ -602,17 +604,29 @@ async def list_voice_sessions_service(
user_id: str,
db: AsyncSession,
*,
limit: int = 8,
limit: int | None = None,
active_only: bool = False,
active_first: bool = False,
) -> list[VoiceSessionSummaryResponse]:
query = (
select(VoiceSession)
.where(VoiceSession.user_id == user_id)
.order_by(desc(VoiceSession.updated_at), desc(VoiceSession.created_at))
.limit(limit)
)
resolved_limit = limit or settings.voice_session_default_list_limit
resolved_limit = max(1, min(resolved_limit, settings.voice_session_max_list_limit))
query = select(VoiceSession).where(VoiceSession.user_id == user_id)
if active_only:
query = query.where(VoiceSession.status.in_(CONTINUABLE_SESSION_STATUSES))
if active_first:
query = query.order_by(
desc(
case(
(VoiceSession.status.in_(CONTINUABLE_SESSION_STATUSES), 1),
else_=0,
)
),
desc(VoiceSession.updated_at),
desc(VoiceSession.created_at),
)
else:
query = query.order_by(desc(VoiceSession.updated_at), desc(VoiceSession.created_at))
query = query.limit(resolved_limit)
sessions = (await db.execute(query)).scalars().all()
summaries: list[VoiceSessionSummaryResponse] = []
@@ -628,6 +642,30 @@ async def list_voice_sessions_service(
return summaries
async def get_latest_active_voice_session_service(
user_id: str,
db: AsyncSession,
) -> VoiceSessionSummaryResponse | None:
query = (
select(VoiceSession)
.where(
VoiceSession.user_id == user_id,
VoiceSession.status.in_(CONTINUABLE_SESSION_STATUSES),
)
.order_by(desc(VoiceSession.updated_at), desc(VoiceSession.created_at))
.limit(1)
)
session = (await db.execute(query)).scalar_one_or_none()
if session is None:
return None
latest_turn = await _get_latest_turn(db, session_id=session.id)
return _session_to_summary(
session,
latest_turn=latest_turn,
total_turns=session.current_turn_index,
)
async def create_voice_session_service(
request: VoiceSessionCreateRequest,
user_id: str,
@@ -766,6 +804,13 @@ async def create_voice_turn_from_upload_service(
status_code=409,
detail="Voice session is not ready for another turn.",
)
if not audio_bytes:
raise HTTPException(status_code=400, detail="上传音频为空,请重新录音后再试。")
if len(audio_bytes) > settings.voice_turn_max_upload_bytes:
raise HTTPException(
status_code=413,
detail="上传音频过大,请缩短单轮录音时长后再试。",
)
next_turn_index = session.current_turn_index + 1
user_audio_path = write_uploaded_user_audio(
session_id=session.id,
@@ -805,6 +850,91 @@ async def create_voice_turn_from_upload_service(
)
async def retry_voice_turn_service(
session_id: str,
turn_id: str,
user_id: str,
db: AsyncSession,
) -> VoiceTurnAcceptedResponse:
turn = await _get_owned_turn(
db,
session_id=session_id,
turn_id=turn_id,
user_id=user_id,
)
if turn.status != "failed":
raise HTTPException(status_code=409, detail="Only failed turns can be retried.")
if not turn.user_transcript:
raise HTTPException(status_code=409, detail="This turn has no transcript to retry.")
return await create_voice_turn_from_text_service(
session_id,
VoiceTurnCreateFallbackRequest(
transcript_text=turn.user_transcript,
duration_ms=turn.user_audio_duration_ms,
),
user_id,
db,
)
async def retry_voice_turn_audio_service(
session_id: str,
turn_id: str,
user_id: str,
db: AsyncSession,
) -> VoiceTurnSummaryResponse:
turn = await _get_owned_turn(
db,
session_id=session_id,
turn_id=turn_id,
user_id=user_id,
)
if not turn.assistant_text:
raise HTTPException(status_code=409, detail="This turn has no assistant text to speak.")
if session_audio_exists(turn.assistant_audio_path):
raise HTTPException(status_code=409, detail="Assistant audio already exists for this turn.")
try:
audio_bytes = await text_to_speech(
turn.assistant_text,
db=db,
user_id=user_id,
)
saved_path = write_session_audio(
build_turn_assistant_audio_path(turn.session_id, turn.turn_index),
audio_bytes,
)
turn.assistant_audio_path = saved_path
turn.assistant_audio_duration_ms = None
if turn.status == "narrative_ready":
turn.status = "audio_ready"
await db.commit()
await db.refresh(turn)
await _record_session_event(
db,
session_id=turn.session_id,
turn_id=turn.id,
event_type="assistant_audio_retry_succeeded",
status="succeeded",
message="Assistant audio regenerated for one voice turn.",
metadata={"audio_path": saved_path},
)
except Exception as exc:
await _record_session_event(
db,
session_id=turn.session_id,
turn_id=turn.id,
event_type="assistant_audio_retry_failed",
status="failed",
message="Assistant audio retry failed.",
metadata={"error": str(exc)},
)
raise HTTPException(status_code=503, detail="语音补发失败,请稍后再试。") from exc
return _turn_to_summary(turn)
async def get_voice_turn_service(
session_id: str,
turn_id: str,