feat: improve voice studio alpha recovery flow

2026-04-19 23:25:41 +08:00
parent 46d6201529
commit 4ecf0c09c0
9 changed files with 657 additions and 14 deletions
--- a/backend/app/api/voice_sessions.py
+++ b/backend/app/api/voice_sessions.py
@@ -12,6 +12,7 @@ from fastapi import (
 )
 from sqlalchemy.ext.asyncio import AsyncSession

+from app.core.config import settings
 from app.core.deps import require_user
 from app.core.rate_limiter import check_rate_limit
 from app.db.database import get_db
@@ -34,11 +35,14 @@ from app.services.voice_session_service import (
    create_voice_turn_from_text_service,
    create_voice_turn_from_upload_service,
    finalize_voice_session_service,
+    get_latest_active_voice_session_service,
    get_voice_session_detail_service,
    get_voice_turn_audio_service,
    get_voice_turn_service,
    get_voice_turn_user_audio_service,
    list_voice_sessions_service,
+    retry_voice_turn_audio_service,
+    retry_voice_turn_service,
 )

 router = APIRouter()
@@ -68,8 +72,13 @@ async def create_voice_session(

@router.get("/voice-sessions", response_model=list[VoiceSessionSummaryResponse])
 async def list_voice_sessions(
-    limit: int = Query(default=8, ge=1, le=20),
+    limit: int = Query(
+        default=settings.voice_session_default_list_limit,
+        ge=1,
+        le=settings.voice_session_max_list_limit,
+    ),
    active_only: bool = Query(default=False),
+    active_first: bool = Query(default=True),
    user: User = Depends(require_user),
    db: AsyncSession = Depends(get_db),
 ):
@@ -79,9 +88,19 @@ async def list_voice_sessions(
        db,
        limit=limit,
        active_only=active_only,
+        active_first=active_first,
    )


+@router.get("/voice-sessions/active", response_model=VoiceSessionSummaryResponse | None)
+async def get_latest_active_voice_session(
+    user: User = Depends(require_user),
+    db: AsyncSession = Depends(get_db),
+):
+    """Get the latest active voice session for quick resume behavior."""
+    return await get_latest_active_voice_session_service(user.id, db)
+
+
@router.get("/voice-sessions/{session_id}", response_model=VoiceSessionDetailResponse)
 async def get_voice_session(
    session_id: str,
@@ -158,6 +177,21 @@ async def get_voice_turn(
    return await get_voice_turn_service(session_id, turn_id, user.id, db)


+@router.post(
+    "/voice-sessions/{session_id}/turns/{turn_id}/retry",
+    response_model=VoiceTurnAcceptedResponse,
+    status_code=status.HTTP_202_ACCEPTED,
+)
+async def retry_voice_turn(
+    session_id: str,
+    turn_id: str,
+    user: User = Depends(require_user),
+    db: AsyncSession = Depends(get_db),
+):
+    """Retry one failed voice turn using its saved transcript."""
+    return await retry_voice_turn_service(session_id, turn_id, user.id, db)
+
+
@router.get("/voice-sessions/{session_id}/turns/{turn_id}/audio")
 async def get_voice_turn_audio(
    session_id: str,
@@ -170,6 +204,20 @@ async def get_voice_turn_audio(
    return Response(content=audio_bytes, media_type="audio/mpeg")


+@router.post(
+    "/voice-sessions/{session_id}/turns/{turn_id}/retry-audio",
+    response_model=VoiceTurnSummaryResponse,
+)
+async def retry_voice_turn_audio(
+    session_id: str,
+    turn_id: str,
+    user: User = Depends(require_user),
+    db: AsyncSession = Depends(get_db),
+):
+    """Retry assistant audio synthesis when one turn only has text output."""
+    return await retry_voice_turn_audio_service(session_id, turn_id, user.id, db)
+
+
@router.get("/voice-sessions/{session_id}/turns/{turn_id}/user-audio")
 async def get_voice_turn_user_audio(
    session_id: str,
--- a/backend/app/core/config.py
+++ b/backend/app/core/config.py
@@ -82,6 +82,18 @@ class Settings(BaseSettings):
        "zh",
        description="Preferred language hint for voice transcription",
    )
+    voice_turn_max_upload_bytes: int = Field(
+        5 * 1024 * 1024,
+        description="Maximum accepted upload size in bytes for one voice turn audio file",
+    )
+    voice_session_default_list_limit: int = Field(
+        8,
+        description="Default number of recent voice sessions returned to the client",
+    )
+    voice_session_max_list_limit: int = Field(
+        20,
+        description="Maximum number of recent voice sessions returned to the client",
+    )
    story_audio_cache_ttl_days: int = Field(
        30,
        description="TTL in days before cached story audio is pruned",
--- a/backend/app/schemas/voice_session_schemas.py
+++ b/backend/app/schemas/voice_session_schemas.py
@@ -101,6 +101,7 @@ class VoiceSessionSummaryResponse(BaseModel):
    latest_detected_intent: str | None = None
    latest_assistant_audio_ready: bool = False
    last_turn_status: str | None = None
+    transcription_mode_hint: str | None = None
    can_continue: bool = False
    can_finalize: bool = False
    last_error: str | None = None
--- a/backend/app/services/voice_session_service.py
+++ b/backend/app/services/voice_session_service.py
@@ -6,9 +6,10 @@ from datetime import datetime, timezone
 from typing import Any

 from fastapi import HTTPException
-from sqlalchemy import desc, select
+from sqlalchemy import case, desc, select
 from sqlalchemy.ext.asyncio import AsyncSession

+from app.core.config import settings
 from app.core.logging import get_logger
 from app.db.models import VoiceSession, VoiceSessionEvent, VoiceTurn
 from app.schemas.voice_session_schemas import (
@@ -134,6 +135,7 @@ def _session_to_summary(
            session_audio_exists(latest_turn.assistant_audio_path) if latest_turn else False
        ),
        last_turn_status=latest_turn.status if latest_turn else None,
+        transcription_mode_hint=settings.voice_transcription_mode,
        can_continue=_session_can_continue(session),
        can_finalize=_session_can_finalize(session),
        last_error=session.last_error,
@@ -602,17 +604,29 @@ async def list_voice_sessions_service(
    user_id: str,
    db: AsyncSession,
    *,
-    limit: int = 8,
+    limit: int | None = None,
    active_only: bool = False,
+    active_first: bool = False,
 ) -> list[VoiceSessionSummaryResponse]:
-    query = (
-        select(VoiceSession)
-        .where(VoiceSession.user_id == user_id)
-        .order_by(desc(VoiceSession.updated_at), desc(VoiceSession.created_at))
-        .limit(limit)
-    )
+    resolved_limit = limit or settings.voice_session_default_list_limit
+    resolved_limit = max(1, min(resolved_limit, settings.voice_session_max_list_limit))
+    query = select(VoiceSession).where(VoiceSession.user_id == user_id)
    if active_only:
        query = query.where(VoiceSession.status.in_(CONTINUABLE_SESSION_STATUSES))
+    if active_first:
+        query = query.order_by(
+            desc(
+                case(
+                    (VoiceSession.status.in_(CONTINUABLE_SESSION_STATUSES), 1),
+                    else_=0,
+                )
+            ),
+            desc(VoiceSession.updated_at),
+            desc(VoiceSession.created_at),
+        )
+    else:
+        query = query.order_by(desc(VoiceSession.updated_at), desc(VoiceSession.created_at))
+    query = query.limit(resolved_limit)

    sessions = (await db.execute(query)).scalars().all()
    summaries: list[VoiceSessionSummaryResponse] = []
@@ -628,6 +642,30 @@ async def list_voice_sessions_service(
    return summaries


+async def get_latest_active_voice_session_service(
+    user_id: str,
+    db: AsyncSession,
+) -> VoiceSessionSummaryResponse | None:
+    query = (
+        select(VoiceSession)
+        .where(
+            VoiceSession.user_id == user_id,
+            VoiceSession.status.in_(CONTINUABLE_SESSION_STATUSES),
+        )
+        .order_by(desc(VoiceSession.updated_at), desc(VoiceSession.created_at))
+        .limit(1)
+    )
+    session = (await db.execute(query)).scalar_one_or_none()
+    if session is None:
+        return None
+    latest_turn = await _get_latest_turn(db, session_id=session.id)
+    return _session_to_summary(
+        session,
+        latest_turn=latest_turn,
+        total_turns=session.current_turn_index,
+    )
+
+
 async def create_voice_session_service(
    request: VoiceSessionCreateRequest,
    user_id: str,
@@ -766,6 +804,13 @@ async def create_voice_turn_from_upload_service(
            status_code=409,
            detail="Voice session is not ready for another turn.",
        )
+    if not audio_bytes:
+        raise HTTPException(status_code=400, detail="上传音频为空，请重新录音后再试。")
+    if len(audio_bytes) > settings.voice_turn_max_upload_bytes:
+        raise HTTPException(
+            status_code=413,
+            detail="上传音频过大，请缩短单轮录音时长后再试。",
+        )
    next_turn_index = session.current_turn_index + 1
    user_audio_path = write_uploaded_user_audio(
        session_id=session.id,
@@ -805,6 +850,91 @@ async def create_voice_turn_from_upload_service(
    )


+async def retry_voice_turn_service(
+    session_id: str,
+    turn_id: str,
+    user_id: str,
+    db: AsyncSession,
+) -> VoiceTurnAcceptedResponse:
+    turn = await _get_owned_turn(
+        db,
+        session_id=session_id,
+        turn_id=turn_id,
+        user_id=user_id,
+    )
+    if turn.status != "failed":
+        raise HTTPException(status_code=409, detail="Only failed turns can be retried.")
+    if not turn.user_transcript:
+        raise HTTPException(status_code=409, detail="This turn has no transcript to retry.")
+
+    return await create_voice_turn_from_text_service(
+        session_id,
+        VoiceTurnCreateFallbackRequest(
+            transcript_text=turn.user_transcript,
+            duration_ms=turn.user_audio_duration_ms,
+        ),
+        user_id,
+        db,
+    )
+
+
+async def retry_voice_turn_audio_service(
+    session_id: str,
+    turn_id: str,
+    user_id: str,
+    db: AsyncSession,
+) -> VoiceTurnSummaryResponse:
+    turn = await _get_owned_turn(
+        db,
+        session_id=session_id,
+        turn_id=turn_id,
+        user_id=user_id,
+    )
+    if not turn.assistant_text:
+        raise HTTPException(status_code=409, detail="This turn has no assistant text to speak.")
+    if session_audio_exists(turn.assistant_audio_path):
+        raise HTTPException(status_code=409, detail="Assistant audio already exists for this turn.")
+
+    try:
+        audio_bytes = await text_to_speech(
+            turn.assistant_text,
+            db=db,
+            user_id=user_id,
+        )
+        saved_path = write_session_audio(
+            build_turn_assistant_audio_path(turn.session_id, turn.turn_index),
+            audio_bytes,
+        )
+        turn.assistant_audio_path = saved_path
+        turn.assistant_audio_duration_ms = None
+        if turn.status == "narrative_ready":
+            turn.status = "audio_ready"
+        await db.commit()
+        await db.refresh(turn)
+        await _record_session_event(
+            db,
+            session_id=turn.session_id,
+            turn_id=turn.id,
+            event_type="assistant_audio_retry_succeeded",
+            status="succeeded",
+            message="Assistant audio regenerated for one voice turn.",
+            metadata={"audio_path": saved_path},
+        )
+    except Exception as exc:
+        await _record_session_event(
+            db,
+            session_id=turn.session_id,
+            turn_id=turn.id,
+            event_type="assistant_audio_retry_failed",
+            status="failed",
+            message="Assistant audio retry failed.",
+            metadata={"error": str(exc)},
+        )
+        raise HTTPException(status_code=503, detail="语音补发失败，请稍后再试。") from exc
+
+    return _turn_to_summary(turn)
+
+
 async def get_voice_turn_service(
    session_id: str,
    turn_id: str,
--- a/backend/tests/test_voice_sessions.py
+++ b/backend/tests/test_voice_sessions.py
@@ -2,6 +2,7 @@ from unittest.mock import AsyncMock, patch

 from httpx import ASGITransport, AsyncClient

+from app.core.config import settings
 from app.db.database import get_db
 from app.main import app
 from app.services.adapters.text.models import StoryOutput
@@ -343,3 +344,221 @@ async def test_voice_session_list_orders_recent_sessions_first(
                }
        finally:
            app.dependency_overrides.clear()
+
+
+async def test_voice_session_active_endpoint_returns_latest_active_session(
+    db_session,
+    auth_token,
+):
+    async def override_get_db():
+        yield db_session
+
+    app.dependency_overrides[get_db] = override_get_db
+
+    with (
+        patch(
+            "app.services.voice_session_service.generate_story_content",
+            new_callable=AsyncMock,
+        ) as mock_generate,
+        patch(
+            "app.services.voice_session_service.text_to_speech",
+            new_callable=AsyncMock,
+        ) as mock_tts,
+    ):
+        mock_generate.return_value = StoryOutput(
+            mode="generated",
+            title="活动会话",
+            story_text="一段活动中的故事。",
+            cover_prompt_suggestion="活动会话封面",
+        )
+        mock_tts.return_value = b"active-audio"
+
+        transport = ASGITransport(app=app)
+        try:
+            async with AsyncClient(transport=transport, base_url="http://test") as client:
+                client.cookies.set("access_token", auth_token)
+
+                response = await client.post("/api/voice-sessions", json={})
+                old_session_id = response.json()["id"]
+                await client.post(
+                    f"/api/voice-sessions/{old_session_id}/abandon",
+                    json={"reason": "旧会话结束"},
+                )
+
+                response = await client.post("/api/voice-sessions", json={})
+                active_session_id = response.json()["id"]
+                await client.post(
+                    f"/api/voice-sessions/{active_session_id}/turns/fallback",
+                    json={"transcript_text": "请继续一个新故事"},
+                )
+
+                response = await client.get("/api/voice-sessions/active")
+                assert response.status_code == 200
+                data = response.json()
+                assert data["id"] == active_session_id
+                assert data["can_continue"] is True
+                assert data["status"] == "waiting_user"
+        finally:
+            app.dependency_overrides.clear()
+
+
+async def test_voice_session_can_retry_failed_turn_from_saved_transcript(
+    db_session,
+    auth_token,
+):
+    async def override_get_db():
+        yield db_session
+
+    app.dependency_overrides[get_db] = override_get_db
+
+    with (
+        patch(
+            "app.services.voice_session_service.generate_story_content",
+            new_callable=AsyncMock,
+        ) as mock_generate,
+        patch(
+            "app.services.voice_session_service.text_to_speech",
+            new_callable=AsyncMock,
+        ) as mock_tts,
+    ):
+        mock_generate.side_effect = [
+            RuntimeError("provider down"),
+            StoryOutput(
+                mode="generated",
+                title="重试成功",
+                story_text="重试后的故事终于顺利继续了。",
+                cover_prompt_suggestion="重试封面",
+            ),
+        ]
+        mock_tts.return_value = b"retry-turn-audio"
+
+        transport = ASGITransport(app=app)
+        try:
+            async with AsyncClient(transport=transport, base_url="http://test") as client:
+                client.cookies.set("access_token", auth_token)
+
+                response = await client.post("/api/voice-sessions", json={})
+                session_id = response.json()["id"]
+
+                response = await client.post(
+                    f"/api/voice-sessions/{session_id}/turns/fallback",
+                    json={"transcript_text": "先讲一个会失败的回合"},
+                )
+                assert response.status_code == 202
+                failed_turn_id = response.json()["turn_id"]
+
+                response = await client.get(
+                    f"/api/voice-sessions/{session_id}/turns/{failed_turn_id}"
+                )
+                assert response.status_code == 200
+                assert response.json()["status"] == "failed"
+
+                response = await client.post(
+                    f"/api/voice-sessions/{session_id}/turns/{failed_turn_id}/retry"
+                )
+                assert response.status_code == 202
+                retried_turn_id = response.json()["turn_id"]
+                assert retried_turn_id != failed_turn_id
+
+                response = await client.get(
+                    f"/api/voice-sessions/{session_id}/turns/{retried_turn_id}"
+                )
+                assert response.status_code == 200
+                retried_turn = response.json()
+                assert retried_turn["status"] == "audio_ready"
+                assert retried_turn["assistant_text"] == "重试后的故事终于顺利继续了。"
+        finally:
+            app.dependency_overrides.clear()
+
+
+async def test_voice_session_can_retry_missing_assistant_audio(
+    db_session,
+    auth_token,
+):
+    async def override_get_db():
+        yield db_session
+
+    app.dependency_overrides[get_db] = override_get_db
+
+    with (
+        patch(
+            "app.services.voice_session_service.generate_story_content",
+            new_callable=AsyncMock,
+        ) as mock_generate,
+        patch(
+            "app.services.voice_session_service.text_to_speech",
+            new_callable=AsyncMock,
+        ) as mock_tts,
+    ):
+        mock_generate.return_value = StoryOutput(
+            mode="generated",
+            title="补发语音",
+            story_text="这一轮先有文本，稍后再补发语音。",
+            cover_prompt_suggestion="补发语音封面",
+        )
+        mock_tts.side_effect = [RuntimeError("tts down"), b"recovered-audio"]
+
+        transport = ASGITransport(app=app)
+        try:
+            async with AsyncClient(transport=transport, base_url="http://test") as client:
+                client.cookies.set("access_token", auth_token)
+
+                response = await client.post("/api/voice-sessions", json={})
+                session_id = response.json()["id"]
+
+                response = await client.post(
+                    f"/api/voice-sessions/{session_id}/turns/fallback",
+                    json={"transcript_text": "请先给我一段只有文本的结果"},
+                )
+                assert response.status_code == 202
+                turn_id = response.json()["turn_id"]
+
+                response = await client.get(
+                    f"/api/voice-sessions/{session_id}/turns/{turn_id}"
+                )
+                assert response.status_code == 200
+                turn = response.json()
+                assert turn["status"] == "narrative_ready"
+                assert turn["assistant_audio_ready"] is False
+
+                response = await client.post(
+                    f"/api/voice-sessions/{session_id}/turns/{turn_id}/retry-audio"
+                )
+                assert response.status_code == 200
+                retried = response.json()
+                assert retried["status"] == "audio_ready"
+                assert retried["assistant_audio_ready"] is True
+        finally:
+            app.dependency_overrides.clear()
+
+
+async def test_voice_session_uploaded_audio_respects_size_limit(
+    db_session,
+    auth_token,
+    monkeypatch,
+):
+    async def override_get_db():
+        yield db_session
+
+    app.dependency_overrides[get_db] = override_get_db
+    monkeypatch.setattr(settings, "voice_turn_max_upload_bytes", 4)
+
+    transport = ASGITransport(app=app)
+    try:
+        async with AsyncClient(transport=transport, base_url="http://test") as client:
+            client.cookies.set("access_token", auth_token)
+
+            response = await client.post("/api/voice-sessions", json={})
+            assert response.status_code == 201
+            session_id = response.json()["id"]
+
+            response = await client.post(
+                f"/api/voice-sessions/{session_id}/turns",
+                files={
+                    "audio_file": ("turn.webm", b"12345", "audio/webm"),
+                },
+                data={"transcript_hint": "太长了"},
+            )
+            assert response.status_code == 413
+    finally:
+        app.dependency_overrides.clear()
--- a/frontend/src/types/voiceSession.ts
+++ b/frontend/src/types/voiceSession.ts
@@ -45,6 +45,7 @@ export interface VoiceSessionSummary {
  latest_detected_intent: string | null
  latest_assistant_audio_ready: boolean
  last_turn_status: string | null
+  transcription_mode_hint: string | null
  can_continue: boolean
  can_finalize: boolean
  last_error: string | null
--- a/frontend/src/views/Home.vue
+++ b/frontend/src/views/Home.vue
@@ -1,8 +1,10 @@
 <script setup lang="ts">
-import { ref } from 'vue'
+import { onMounted, ref, watch } from 'vue'
 import { useRouter } from 'vue-router'
 import { useI18n } from 'vue-i18n'
 import { useUserStore } from '../stores/user'
+import { api } from '../api/client'
+import type { VoiceSessionSummary } from '../types/voiceSession'
 import BaseButton from '../components/ui/BaseButton.vue'
 import LoginDialog from '../components/ui/LoginDialog.vue'
 import {
@@ -25,6 +27,7 @@ function switchLocale(lang: 'en' | 'zh') {

 // ========== 登录对话框状态 ==========
 const showLoginDialog = ref(false)
+const activeVoiceSession = ref<VoiceSessionSummary | null>(null)

 // ========== 创作入口 ==========
 // 旧的创作变量已移除，现在只负责跳转
@@ -45,6 +48,26 @@ function openVoiceStudio() {
  router.push('/voice-studio')
 }

+function continueVoiceStudio() {
+  if (!activeVoiceSession.value) {
+    openVoiceStudio()
+    return
+  }
+  router.push('/voice-studio')
+}
+
+async function loadActiveVoiceSession() {
+  if (!userStore.user) {
+    activeVoiceSession.value = null
+    return
+  }
+  try {
+    activeVoiceSession.value = await api.get<VoiceSessionSummary | null>('/api/voice-sessions/active')
+  } catch {
+    activeVoiceSession.value = null
+  }
+}
+
 function scrollToFeatures() {
  document.getElementById('features')?.scrollIntoView({ behavior: 'smooth' })
 }
@@ -54,6 +77,20 @@ function scrollToFeatures() {
 // const familiesCount = 5000
 // const satisfactionCount = 99

+onMounted(async () => {
+  if (!userStore.user) {
+    await userStore.fetchSession()
+  }
+  await loadActiveVoiceSession()
+})
+
+watch(
+  () => userStore.user?.id,
+  () => {
+    void loadActiveVoiceSession()
+  },
+)
+
 </script>

 <template>
@@ -152,6 +189,15 @@ function scrollToFeatures() {
                    <MicrophoneIcon class="h-5 w-5 mr-2" />
                    进入语音共创
                </BaseButton>
+                <BaseButton
+                  v-if="activeVoiceSession"
+                  size="lg"
+                  variant="ghost"
+                  @click="continueVoiceStudio"
+                >
+                    <ArrowRightOnRectangleIcon class="h-5 w-5 mr-2" />
+                    继续语音共创
+                </BaseButton>
                <button @click="scrollToFeatures" class="px-6 py-3 rounded-xl font-semibold text-stone-600 bg-white border border-stone-200 hover:border-amber-400 hover:text-amber-700 transition-all shadow-sm">
                   了解更多功能
                </button>
--- a/frontend/src/views/MyStories.vue
+++ b/frontend/src/views/MyStories.vue
@@ -8,6 +8,7 @@ import BaseCard from '../components/ui/BaseCard.vue'
 import EmptyState from '../components/ui/EmptyState.vue'
 import LoadingSpinner from '../components/ui/LoadingSpinner.vue'
 import type { GenerationOpsSummary, GenerationProviderAnalytics } from '../types/generation'
+import type { VoiceSessionSummary } from '../types/voiceSession'
 import {
  getAssetStatusMeta,
  getGenerationStatusMeta,
@@ -40,6 +41,7 @@ const router = useRouter()
 const stories = ref<StoryItem[]>([])
 const providerAnalytics = ref<GenerationProviderAnalytics | null>(null)
 const opsSummary = ref<GenerationOpsSummary | null>(null)
+const activeVoiceSession = ref<VoiceSessionSummary | null>(null)
 const loading = ref(true)
 const error = ref('')
 const showCreateModal = ref(false)
@@ -76,14 +78,16 @@ function buildProviderAnalyticsPath() {

 async function fetchStories() {
  try {
-    const [storyList, analytics, ops] = await Promise.all([
+    const [storyList, analytics, ops, activeSession] = await Promise.all([
      api.get<StoryItem[]>('/api/stories'),
      api.get<GenerationProviderAnalytics>(buildProviderAnalyticsPath()),
      api.get<GenerationOpsSummary>('/api/generations/ops-summary'),
+      api.get<VoiceSessionSummary | null>('/api/voice-sessions/active').catch(() => null),
    ])
    stories.value = storyList
    providerAnalytics.value = analytics
    opsSummary.value = ops
+    activeVoiceSession.value = activeSession
  } catch (e) {
    error.value = e instanceof Error ? e.message : '加载失败'
  } finally {
@@ -112,6 +116,10 @@ function goToCreate() {
  showCreateModal.value = true
 }

+function goToVoiceStudio() {
+  router.push('/voice-studio')
+}
+
 function getStoryLink(story: StoryItem) {
  return story.mode === 'storybook' ? `/storybook/view/${story.id}` : `/story/${story.id}`
 }
@@ -200,6 +208,27 @@ watch([selectedWindow, selectedCapability], () => {
    </div>

    <template v-else>
+      <BaseCard
+        v-if="activeVoiceSession"
+        class="mb-8 border border-purple-100 bg-purple-50/60"
+        padding="lg"
+      >
+        <div class="flex flex-col gap-4 lg:flex-row lg:items-center lg:justify-between">
+          <div>
+            <h2 class="text-xl font-bold text-gray-800">语音共创正在进行中</h2>
+            <p class="mt-2 text-sm leading-6 text-gray-600">
+              最近的语音共创会话仍可继续：
+              {{ activeVoiceSession.working_title || '未命名语音会话' }}，
+              当前状态 {{ activeVoiceSession.status }}，已完成 {{ activeVoiceSession.total_turns }} 轮。
+            </p>
+          </div>
+          <BaseButton @click="goToVoiceStudio">
+            <SparklesIcon class="h-5 w-5 mr-2" />
+            继续语音共创
+          </BaseButton>
+        </div>
+      </BaseCard>
+
      <BaseCard class="mb-8" padding="lg">
        <div class="grid grid-cols-2 lg:grid-cols-4 gap-4">
          <div class="text-center px-4 py-2">
--- a/frontend/src/views/VoiceStudio.vue
+++ b/frontend/src/views/VoiceStudio.vue
@@ -47,6 +47,7 @@ const profiles = ref<ChildProfile[]>([])
 const universes = ref<StoryUniverse[]>([])
 const selectedProfileId = ref('')
 const selectedUniverseId = ref('')
+const sessionFilter = ref<'active' | 'recent'>('active')
 const textTurnInput = ref('')
 const uploadTranscriptHint = ref('')
 const loadingSessions = ref(false)
@@ -60,12 +61,14 @@ const recordingDurationMs = ref(0)
 const error = ref('')
 const mediaError = ref('')
 const recorderSupported = computed(() => typeof window !== 'undefined' && 'MediaRecorder' in window)
+const sessionPollIntervalMs = 1500

 let mediaRecorder: MediaRecorder | null = null
 let mediaStream: MediaStream | null = null
 let recordingChunks: Blob[] = []
 let recordingTimer: number | null = null
 let recordingStartedAt = 0
+let sessionPollTimer: number | null = null

 const recordedBlob = ref<Blob | null>(null)
 const recordedAudioUrl = ref<string | null>(null)
@@ -78,6 +81,22 @@ const universeOptions = computed(() =>
 )

 const activeTurnList = computed(() => activeSession.value?.recent_turns ?? [])
+const transcriptionModeDescription = computed(() => {
+  switch (activeSession.value?.transcription_mode_hint) {
+    case 'openai':
+      return '当前会尝试使用 OpenAI 语音转写，转写提示可以留空。'
+    case 'disabled':
+      return '当前环境禁用了真实语音转写，请先使用文本共创或填写开发转写提示。'
+    default:
+      return '当前默认是 demo 转写模式。若本地未接真实 ASR，可在下方填写转写提示辅助开发验证。'
+  }
+})
+const isSessionProcessing = computed(
+  () =>
+    activeSession.value?.status === 'processing_turn'
+    || activeSession.value?.last_turn_status === 'received'
+    || activeSession.value?.last_turn_status === 'transcribing',
+)

 function formatSessionStatus(status: string) {
  switch (status) {
@@ -182,8 +201,13 @@ async function loadSessions() {
  if (!userStore.user) return
  loadingSessions.value = true
  try {
-    sessions.value = await api.get<VoiceSessionSummary[]>('/api/voice-sessions?limit=8')
-    if (!activeSession.value) {
+    const params = new URLSearchParams({
+      limit: '8',
+      active_first: 'true',
+      active_only: sessionFilter.value === 'active' ? 'true' : 'false',
+    })
+    sessions.value = await api.get<VoiceSessionSummary[]>(`/api/voice-sessions?${params.toString()}`)
+    if (!activeSession.value && sessionFilter.value === 'recent') {
      const resumable = sessions.value.find((item) => item.can_continue)
      if (resumable) {
        await loadSessionDetail(resumable.id)
@@ -196,6 +220,18 @@ async function loadSessions() {
  }
 }

+async function loadLatestActiveSession() {
+  if (!userStore.user) return
+  try {
+    const session = await api.get<VoiceSessionSummary | null>('/api/voice-sessions/active')
+    if (session) {
+      await loadSessionDetail(session.id)
+    }
+  } catch {
+    // Ignore active-session bootstrap failures and fall back to normal listing.
+  }
+}
+
 async function loadSessionDetail(sessionId: string) {
  loadingSessionDetail.value = true
  error.value = ''
@@ -208,6 +244,23 @@ async function loadSessionDetail(sessionId: string) {
  }
 }

+function stopSessionPolling() {
+  if (sessionPollTimer) {
+    window.clearInterval(sessionPollTimer)
+    sessionPollTimer = null
+  }
+}
+
+function startSessionPolling() {
+  if (!activeSession.value?.id || sessionPollTimer) return
+  sessionPollTimer = window.setInterval(() => {
+    if (activeSession.value?.id) {
+      void loadSessionDetail(activeSession.value.id)
+      void loadSessions()
+    }
+  }, sessionPollIntervalMs)
+}
+
 async function createSession() {
  creatingSession.value = true
  error.value = ''
@@ -217,6 +270,7 @@ async function createSession() {
      universe_id: selectedUniverseId.value || null,
      target_mode: 'story',
    })
+    sessionFilter.value = 'active'
    await loadSessions()
    await loadSessionDetail(session.id)
    textTurnInput.value = ''
@@ -319,6 +373,39 @@ async function finalizeSession() {
  }
 }

+async function retryFailedTurn(turnId: string) {
+  if (!activeSession.value) return
+  sendingTurn.value = true
+  error.value = ''
+  try {
+    const result = await api.post<VoiceTurnAcceptedResponse>(
+      `/api/voice-sessions/${activeSession.value.id}/turns/${turnId}/retry`,
+    )
+    await refreshAfterTurn(result.session_id, result.turn_id)
+  } catch (err) {
+    error.value = err instanceof Error ? err.message : '重试本轮故事失败'
+  } finally {
+    sendingTurn.value = false
+  }
+}
+
+async function retryAssistantAudio(turnId: string) {
+  if (!activeSession.value) return
+  sendingTurn.value = true
+  error.value = ''
+  try {
+    await api.post<VoiceTurnSummary>(
+      `/api/voice-sessions/${activeSession.value.id}/turns/${turnId}/retry-audio`,
+    )
+    await loadSessionDetail(activeSession.value.id)
+    await loadSessions()
+  } catch (err) {
+    error.value = err instanceof Error ? err.message : '补发语音失败'
+  } finally {
+    sendingTurn.value = false
+  }
+}
+
 async function abandonSession() {
  if (!activeSession.value) return
  abandoning.value = true
@@ -404,16 +491,34 @@ watch(selectedProfileId, (newId) => {
  }
 })

+watch(sessionFilter, () => {
+  void loadSessions()
+})
+
+watch(
+  () => isSessionProcessing.value,
+  (processing) => {
+    if (processing) {
+      startSessionPolling()
+    } else {
+      stopSessionPolling()
+    }
+  },
+  { immediate: true },
+)
+
 onMounted(async () => {
  if (!userStore.user) {
    await userStore.fetchSession()
  }
  if (!userStore.user) return
  await fetchProfiles()
+  await loadLatestActiveSession()
  await loadSessions()
 })

 onBeforeUnmount(() => {
+  stopSessionPolling()
  if (recording.value) {
    stopRecording()
  }
@@ -488,6 +593,29 @@ onBeforeUnmount(() => {
              <span class="text-xs text-gray-400">{{ sessions.length }} 个</span>
            </div>

+            <div class="mt-4 flex gap-2">
+              <button
+                type="button"
+                class="rounded-lg border px-3 py-1.5 text-sm transition-colors"
+                :class="sessionFilter === 'active'
+                  ? 'border-purple-600 bg-purple-600 text-white'
+                  : 'border-gray-200 bg-white text-gray-600 hover:border-gray-400'"
+                @click="sessionFilter = 'active'"
+              >
+                活跃会话
+              </button>
+              <button
+                type="button"
+                class="rounded-lg border px-3 py-1.5 text-sm transition-colors"
+                :class="sessionFilter === 'recent'
+                  ? 'border-purple-600 bg-purple-600 text-white'
+                  : 'border-gray-200 bg-white text-gray-600 hover:border-gray-400'"
+                @click="sessionFilter = 'recent'"
+              >
+                最近全部
+              </button>
+            </div>
+
            <div v-if="loadingSessions" class="py-8">
              <LoadingSpinner text="加载会话中..." />
            </div>
@@ -558,6 +686,13 @@ onBeforeUnmount(() => {
                </p>
              </div>

+              <div
+                v-if="isSessionProcessing"
+                class="rounded-2xl border border-amber-100 bg-amber-50 px-4 py-3 text-sm text-amber-700"
+              >
+                当前会话仍在处理中，页面会自动轮询刷新。
+              </div>
+
              <div class="flex flex-wrap gap-3">
                <BaseButton
                  variant="secondary"
@@ -614,7 +749,7 @@ onBeforeUnmount(() => {
                    <span class="text-xs text-gray-400">已支持上传音频 turn</span>
                  </div>
                  <p class="mt-2 text-sm text-gray-500">
-                    当前默认是 demo 转写模式。若本地未接真实 ASR，可在下方填写转写提示辅助开发验证。
+                    {{ transcriptionModeDescription }}
                  </p>

                  <div class="mt-4 flex flex-wrap gap-3">
@@ -714,6 +849,28 @@ onBeforeUnmount(() => {
                      <div v-if="turn.assistant_audio_url" class="mt-3">
                        <audio class="w-full" :src="turn.assistant_audio_url" controls></audio>
                      </div>
+                      <div class="mt-4 flex flex-wrap gap-3">
+                        <BaseButton
+                          v-if="turn.status === 'failed'"
+                          size="sm"
+                          variant="secondary"
+                          @click="retryFailedTurn(turn.id)"
+                          :disabled="sendingTurn || !activeSession?.can_continue"
+                        >
+                          <ArrowPathIcon class="h-4 w-4" />
+                          重试本轮
+                        </BaseButton>
+                        <BaseButton
+                          v-if="turn.assistant_text && !turn.assistant_audio_ready"
+                          size="sm"
+                          variant="ghost"
+                          @click="retryAssistantAudio(turn.id)"
+                          :disabled="sendingTurn"
+                        >
+                          <ArrowPathIcon class="h-4 w-4" />
+                          补发语音
+                        </BaseButton>
+                      </div>
                      <div v-if="turn.error_message" class="mt-3 text-sm text-rose-600">
                        {{ turn.error_message }}
                      </div>