feat: improve voice studio alpha recovery flow

2026-04-19 23:25:41 +08:00
parent 46d6201529
commit 4ecf0c09c0
9 changed files with 657 additions and 14 deletions
--- a/backend/app/services/voice_session_service.py
+++ b/backend/app/services/voice_session_service.py
@@ -6,9 +6,10 @@ from datetime import datetime, timezone
 from typing import Any

 from fastapi import HTTPException
-from sqlalchemy import desc, select
+from sqlalchemy import case, desc, select
 from sqlalchemy.ext.asyncio import AsyncSession

+from app.core.config import settings
 from app.core.logging import get_logger
 from app.db.models import VoiceSession, VoiceSessionEvent, VoiceTurn
 from app.schemas.voice_session_schemas import (
@@ -134,6 +135,7 @@ def _session_to_summary(
            session_audio_exists(latest_turn.assistant_audio_path) if latest_turn else False
        ),
        last_turn_status=latest_turn.status if latest_turn else None,
+        transcription_mode_hint=settings.voice_transcription_mode,
        can_continue=_session_can_continue(session),
        can_finalize=_session_can_finalize(session),
        last_error=session.last_error,
@@ -602,17 +604,29 @@ async def list_voice_sessions_service(
    user_id: str,
    db: AsyncSession,
    *,
-    limit: int = 8,
+    limit: int | None = None,
    active_only: bool = False,
+    active_first: bool = False,
 ) -> list[VoiceSessionSummaryResponse]:
-    query = (
-        select(VoiceSession)
-        .where(VoiceSession.user_id == user_id)
-        .order_by(desc(VoiceSession.updated_at), desc(VoiceSession.created_at))
-        .limit(limit)
-    )
+    resolved_limit = limit or settings.voice_session_default_list_limit
+    resolved_limit = max(1, min(resolved_limit, settings.voice_session_max_list_limit))
+    query = select(VoiceSession).where(VoiceSession.user_id == user_id)
    if active_only:
        query = query.where(VoiceSession.status.in_(CONTINUABLE_SESSION_STATUSES))
+    if active_first:
+        query = query.order_by(
+            desc(
+                case(
+                    (VoiceSession.status.in_(CONTINUABLE_SESSION_STATUSES), 1),
+                    else_=0,
+                )
+            ),
+            desc(VoiceSession.updated_at),
+            desc(VoiceSession.created_at),
+        )
+    else:
+        query = query.order_by(desc(VoiceSession.updated_at), desc(VoiceSession.created_at))
+    query = query.limit(resolved_limit)

    sessions = (await db.execute(query)).scalars().all()
    summaries: list[VoiceSessionSummaryResponse] = []
@@ -628,6 +642,30 @@ async def list_voice_sessions_service(
    return summaries


+async def get_latest_active_voice_session_service(
+    user_id: str,
+    db: AsyncSession,
+) -> VoiceSessionSummaryResponse | None:
+    query = (
+        select(VoiceSession)
+        .where(
+            VoiceSession.user_id == user_id,
+            VoiceSession.status.in_(CONTINUABLE_SESSION_STATUSES),
+        )
+        .order_by(desc(VoiceSession.updated_at), desc(VoiceSession.created_at))
+        .limit(1)
+    )
+    session = (await db.execute(query)).scalar_one_or_none()
+    if session is None:
+        return None
+    latest_turn = await _get_latest_turn(db, session_id=session.id)
+    return _session_to_summary(
+        session,
+        latest_turn=latest_turn,
+        total_turns=session.current_turn_index,
+    )
+
+
 async def create_voice_session_service(
    request: VoiceSessionCreateRequest,
    user_id: str,
@@ -766,6 +804,13 @@ async def create_voice_turn_from_upload_service(
            status_code=409,
            detail="Voice session is not ready for another turn.",
        )
+    if not audio_bytes:
+        raise HTTPException(status_code=400, detail="上传音频为空，请重新录音后再试。")
+    if len(audio_bytes) > settings.voice_turn_max_upload_bytes:
+        raise HTTPException(
+            status_code=413,
+            detail="上传音频过大，请缩短单轮录音时长后再试。",
+        )
    next_turn_index = session.current_turn_index + 1
    user_audio_path = write_uploaded_user_audio(
        session_id=session.id,
@@ -805,6 +850,91 @@ async def create_voice_turn_from_upload_service(
    )


+async def retry_voice_turn_service(
+    session_id: str,
+    turn_id: str,
+    user_id: str,
+    db: AsyncSession,
+) -> VoiceTurnAcceptedResponse:
+    turn = await _get_owned_turn(
+        db,
+        session_id=session_id,
+        turn_id=turn_id,
+        user_id=user_id,
+    )
+    if turn.status != "failed":
+        raise HTTPException(status_code=409, detail="Only failed turns can be retried.")
+    if not turn.user_transcript:
+        raise HTTPException(status_code=409, detail="This turn has no transcript to retry.")
+
+    return await create_voice_turn_from_text_service(
+        session_id,
+        VoiceTurnCreateFallbackRequest(
+            transcript_text=turn.user_transcript,
+            duration_ms=turn.user_audio_duration_ms,
+        ),
+        user_id,
+        db,
+    )
+
+
+async def retry_voice_turn_audio_service(
+    session_id: str,
+    turn_id: str,
+    user_id: str,
+    db: AsyncSession,
+) -> VoiceTurnSummaryResponse:
+    turn = await _get_owned_turn(
+        db,
+        session_id=session_id,
+        turn_id=turn_id,
+        user_id=user_id,
+    )
+    if not turn.assistant_text:
+        raise HTTPException(status_code=409, detail="This turn has no assistant text to speak.")
+    if session_audio_exists(turn.assistant_audio_path):
+        raise HTTPException(status_code=409, detail="Assistant audio already exists for this turn.")
+
+    try:
+        audio_bytes = await text_to_speech(
+            turn.assistant_text,
+            db=db,
+            user_id=user_id,
+        )
+        saved_path = write_session_audio(
+            build_turn_assistant_audio_path(turn.session_id, turn.turn_index),
+            audio_bytes,
+        )
+        turn.assistant_audio_path = saved_path
+        turn.assistant_audio_duration_ms = None
+        if turn.status == "narrative_ready":
+            turn.status = "audio_ready"
+        await db.commit()
+        await db.refresh(turn)
+        await _record_session_event(
+            db,
+            session_id=turn.session_id,
+            turn_id=turn.id,
+            event_type="assistant_audio_retry_succeeded",
+            status="succeeded",
+            message="Assistant audio regenerated for one voice turn.",
+            metadata={"audio_path": saved_path},
+        )
+    except Exception as exc:
+        await _record_session_event(
+            db,
+            session_id=turn.session_id,
+            turn_id=turn.id,
+            event_type="assistant_audio_retry_failed",
+            status="failed",
+            message="Assistant audio retry failed.",
+            metadata={"error": str(exc)},
+        )
+        raise HTTPException(status_code=503, detail="语音补发失败，请稍后再试。") from exc
+
+    return _turn_to_summary(turn)
+
+
 async def get_voice_turn_service(
    session_id: str,
    turn_id: str,