feat: add voice co-creation session skeleton

2026-04-19 22:54:48 +08:00
parent 7c85d7a2e7
commit f106f740dd
10 changed files with 1540 additions and 0 deletions
--- a/backend/alembic/versions/0013_add_voice_sessions_phase_a.py
+++ b/backend/alembic/versions/0013_add_voice_sessions_phase_a.py
@@ -0,0 +1,187 @@
 """add voice co-creation phase a tables
 Revision ID: 0013_add_voice_sessions_phase_a
 Revises: 0012_story_text_status
 Create Date: 2026-04-19
 """
 import sqlalchemy as sa
 from alembic import op
 revision = "0013_add_voice_sessions_phase_a"
 down_revision = "0012_story_text_status"
 branch_labels = None
 depends_on = None
 def upgrade() -> None:
    op.create_table(
        "voice_sessions",
        sa.Column("id", sa.String(length=36), nullable=False),
        sa.Column("user_id", sa.String(length=255), nullable=False),
        sa.Column("child_profile_id", sa.String(length=36), nullable=True),
        sa.Column("universe_id", sa.String(length=36), nullable=True),
        sa.Column("final_story_id", sa.Integer(), nullable=True),
        sa.Column(
            "target_mode",
            sa.String(length=32),
            nullable=False,
            server_default="story",
        ),
        sa.Column(
            "status",
            sa.String(length=32),
            nullable=False,
            server_default="draft",
        ),
        sa.Column(
            "current_turn_index",
            sa.Integer(),
            nullable=False,
            server_default="0",
        ),
        sa.Column("working_title", sa.String(length=255), nullable=True),
        sa.Column("story_state", sa.JSON(), nullable=False, server_default="{}"),
        sa.Column("latest_user_transcript", sa.Text(), nullable=True),
        sa.Column("latest_assistant_text", sa.Text(), nullable=True),
        sa.Column("last_error", sa.Text(), nullable=True),
        sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.func.now()),
        sa.Column("updated_at", sa.DateTime(timezone=True), server_default=sa.func.now()),
        sa.ForeignKeyConstraint(["user_id"], ["users.id"], ondelete="CASCADE"),
        sa.ForeignKeyConstraint(
            ["child_profile_id"],
            ["child_profiles.id"],
            ondelete="SET NULL",
        ),
        sa.ForeignKeyConstraint(
            ["universe_id"],
            ["story_universes.id"],
            ondelete="SET NULL",
        ),
        sa.ForeignKeyConstraint(
            ["final_story_id"],
            ["stories.id"],
            ondelete="SET NULL",
        ),
        sa.PrimaryKeyConstraint("id"),
    )
    op.create_index("ix_voice_sessions_user_id", "voice_sessions", ["user_id"])
    op.create_index(
        "ix_voice_sessions_child_profile_id",
        "voice_sessions",
        ["child_profile_id"],
    )
    op.create_index("ix_voice_sessions_universe_id", "voice_sessions", ["universe_id"])
    op.create_index(
        "ix_voice_sessions_final_story_id",
        "voice_sessions",
        ["final_story_id"],
    )
    op.create_index("ix_voice_sessions_status", "voice_sessions", ["status"])
    op.create_index("ix_voice_sessions_created_at", "voice_sessions", ["created_at"])
    op.create_table(
        "voice_turns",
        sa.Column("id", sa.String(length=36), nullable=False),
        sa.Column("session_id", sa.String(length=36), nullable=False),
        sa.Column("turn_index", sa.Integer(), nullable=False),
        sa.Column(
            "status",
            sa.String(length=32),
            nullable=False,
            server_default="received",
        ),
        sa.Column("user_audio_path", sa.String(length=500), nullable=True),
        sa.Column("user_audio_mime_type", sa.String(length=100), nullable=True),
        sa.Column("user_audio_duration_ms", sa.Integer(), nullable=True),
        sa.Column("user_transcript", sa.Text(), nullable=True),
        sa.Column("transcript_confidence", sa.Float(), nullable=True),
        sa.Column(
            "detected_intent",
            sa.String(length=32),
            nullable=False,
            server_default="unknown",
        ),
        sa.Column("intent_confidence", sa.Float(), nullable=True),
        sa.Column("story_patch", sa.JSON(), nullable=False, server_default="{}"),
        sa.Column("assistant_text", sa.Text(), nullable=True),
        sa.Column("assistant_audio_path", sa.String(length=500), nullable=True),
        sa.Column("assistant_audio_duration_ms", sa.Integer(), nullable=True),
        sa.Column("error_message", sa.Text(), nullable=True),
        sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.func.now()),
        sa.Column("updated_at", sa.DateTime(timezone=True), server_default=sa.func.now()),
        sa.ForeignKeyConstraint(["session_id"], ["voice_sessions.id"], ondelete="CASCADE"),
        sa.PrimaryKeyConstraint("id"),
        sa.UniqueConstraint(
            "session_id",
            "turn_index",
            name="uq_voice_turn_session_turn_index",
        ),
    )
    op.create_index("ix_voice_turns_session_id", "voice_turns", ["session_id"])
    op.create_index("ix_voice_turns_status", "voice_turns", ["status"])
    op.create_index("ix_voice_turns_created_at", "voice_turns", ["created_at"])
    op.create_table(
        "voice_session_events",
        sa.Column("id", sa.Integer(), autoincrement=True, nullable=False),
        sa.Column("session_id", sa.String(length=36), nullable=False),
        sa.Column("turn_id", sa.String(length=36), nullable=True),
        sa.Column("event_type", sa.String(length=64), nullable=False),
        sa.Column("status", sa.String(length=32), nullable=False),
        sa.Column("message", sa.Text(), nullable=True),
        sa.Column("event_metadata", sa.JSON(), nullable=False, server_default="{}"),
        sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.func.now()),
        sa.ForeignKeyConstraint(["session_id"], ["voice_sessions.id"], ondelete="CASCADE"),
        sa.ForeignKeyConstraint(["turn_id"], ["voice_turns.id"], ondelete="SET NULL"),
        sa.PrimaryKeyConstraint("id"),
    )
    op.create_index(
        "ix_voice_session_events_session_id",
        "voice_session_events",
        ["session_id"],
    )
    op.create_index(
        "ix_voice_session_events_turn_id",
        "voice_session_events",
        ["turn_id"],
    )
    op.create_index(
        "ix_voice_session_events_created_at",
        "voice_session_events",
        ["created_at"],
    )
 def downgrade() -> None:
    op.drop_index(
        "ix_voice_session_events_created_at",
        table_name="voice_session_events",
    )
    op.drop_index(
        "ix_voice_session_events_turn_id",
        table_name="voice_session_events",
    )
    op.drop_index(
        "ix_voice_session_events_session_id",
        table_name="voice_session_events",
    )
    op.drop_table("voice_session_events")
    op.drop_index("ix_voice_turns_created_at", table_name="voice_turns")
    op.drop_index("ix_voice_turns_status", table_name="voice_turns")
    op.drop_index("ix_voice_turns_session_id", table_name="voice_turns")
    op.drop_table("voice_turns")
    op.drop_index("ix_voice_sessions_created_at", table_name="voice_sessions")
    op.drop_index("ix_voice_sessions_status", table_name="voice_sessions")
    op.drop_index("ix_voice_sessions_final_story_id", table_name="voice_sessions")
    op.drop_index("ix_voice_sessions_universe_id", table_name="voice_sessions")
    op.drop_index(
        "ix_voice_sessions_child_profile_id",
        table_name="voice_sessions",
    )
    op.drop_index("ix_voice_sessions_user_id", table_name="voice_sessions")
    op.drop_table("voice_sessions")
--- a/backend/app/api/voice_sessions.py
+++ b/backend/app/api/voice_sessions.py
@@ -0,0 +1,137 @@
 """Voice co-creation session APIs."""
 from fastapi import APIRouter, Depends, Response, status
 from sqlalchemy.ext.asyncio import AsyncSession
 from app.core.deps import require_user
 from app.core.rate_limiter import check_rate_limit
 from app.db.database import get_db
 from app.db.models import User
 from app.schemas.voice_session_schemas import (
    VoiceSessionAbandonRequest,
    VoiceSessionCreateRequest,
    VoiceSessionDetailResponse,
    VoiceSessionFinalizeRequest,
    VoiceSessionFinalizeResponse,
    VoiceSessionSummaryResponse,
    VoiceTurnAcceptedResponse,
    VoiceTurnCreateFallbackRequest,
    VoiceTurnSummaryResponse,
 )
 from app.services.voice_session_service import (
    abandon_voice_session_service,
    create_voice_session_service,
    create_voice_turn_from_text_service,
    finalize_voice_session_service,
    get_voice_session_detail_service,
    get_voice_turn_audio_service,
    get_voice_turn_service,
 )
 router = APIRouter()
 VOICE_SESSION_RATE_LIMIT_WINDOW = 60
 VOICE_SESSION_RATE_LIMIT_REQUESTS = 20
@router.post(
    "/voice-sessions",
    response_model=VoiceSessionSummaryResponse,
    status_code=status.HTTP_201_CREATED,
 )
 async def create_voice_session(
    request: VoiceSessionCreateRequest,
    user: User = Depends(require_user),
    db: AsyncSession = Depends(get_db),
 ):
    """Create one draft voice co-creation session."""
    await check_rate_limit(
        f"voice-session:{user.id}",
        VOICE_SESSION_RATE_LIMIT_REQUESTS,
        VOICE_SESSION_RATE_LIMIT_WINDOW,
    )
    return await create_voice_session_service(request, user.id, db)
@router.get("/voice-sessions/{session_id}", response_model=VoiceSessionDetailResponse)
 async def get_voice_session(
    session_id: str,
    user: User = Depends(require_user),
    db: AsyncSession = Depends(get_db),
 ):
    """Get one voice co-creation session with recent turns and events."""
    return await get_voice_session_detail_service(session_id, user.id, db)
@router.post(
    "/voice-sessions/{session_id}/turns/fallback",
    response_model=VoiceTurnAcceptedResponse,
    status_code=status.HTTP_202_ACCEPTED,
 )
 async def create_voice_turn_from_text(
    session_id: str,
    request: VoiceTurnCreateFallbackRequest,
    user: User = Depends(require_user),
    db: AsyncSession = Depends(get_db),
 ):
    """Create one turn using text fallback before real audio upload is added."""
    await check_rate_limit(
        f"voice-turn:{user.id}",
        VOICE_SESSION_RATE_LIMIT_REQUESTS,
        VOICE_SESSION_RATE_LIMIT_WINDOW,
    )
    return await create_voice_turn_from_text_service(session_id, request, user.id, db)
@router.get(
    "/voice-sessions/{session_id}/turns/{turn_id}",
    response_model=VoiceTurnSummaryResponse,
 )
 async def get_voice_turn(
    session_id: str,
    turn_id: str,
    user: User = Depends(require_user),
    db: AsyncSession = Depends(get_db),
 ):
    """Get one processed turn within a voice session."""
    return await get_voice_turn_service(session_id, turn_id, user.id, db)
@router.get("/voice-sessions/{session_id}/turns/{turn_id}/audio")
 async def get_voice_turn_audio(
    session_id: str,
    turn_id: str,
    user: User = Depends(require_user),
    db: AsyncSession = Depends(get_db),
 ):
    """Get synthesized assistant audio for one completed voice turn."""
    audio_bytes = await get_voice_turn_audio_service(session_id, turn_id, user.id, db)
    return Response(content=audio_bytes, media_type="audio/mpeg")
@router.post(
    "/voice-sessions/{session_id}/finalize",
    response_model=VoiceSessionFinalizeResponse,
 )
 async def finalize_voice_session(
    session_id: str,
    request: VoiceSessionFinalizeRequest,
    user: User = Depends(require_user),
    db: AsyncSession = Depends(get_db),
 ):
    """Finalize one voice session into a persisted story."""
    return await finalize_voice_session_service(session_id, request, user.id, db)
@router.post(
    "/voice-sessions/{session_id}/abandon",
    response_model=VoiceSessionSummaryResponse,
 )
 async def abandon_voice_session(
    session_id: str,
    request: VoiceSessionAbandonRequest,
    user: User = Depends(require_user),
    db: AsyncSession = Depends(get_db),
 ):
    """Abandon one in-progress voice session without saving it as a story."""
    return await abandon_voice_session_service(session_id, request, user.id, db)
--- a/backend/app/core/config.py
+++ b/backend/app/core/config.py
@@ -66,6 +66,10 @@ class Settings(BaseSettings):
        "storage/audio",
        description="Directory for cached story audio files",
    )
    voice_session_storage_dir: str = Field(
        "storage/voice_sessions",
        description="Directory for persisted voice co-creation session assets",
    )
    story_audio_cache_ttl_days: int = Field(
        30,
        description="TTL in days before cached story audio is pruned",
--- a/backend/app/db/models.py
+++ b/backend/app/db/models.py
@@ -168,6 +168,95 @@ class GenerationJobEvent(Base):
    )
 class VoiceSession(Base):
    """Voice co-creation session before it is finalized as a formal story."""
    __tablename__ = "voice_sessions"
    id: Mapped[str] = mapped_column(String(36), primary_key=True, default=_uuid)
    user_id: Mapped[str] = mapped_column(
        String(255), ForeignKey("users.id", ondelete="CASCADE"), nullable=False, index=True
    )
    child_profile_id: Mapped[str | None] = mapped_column(
        String(36), ForeignKey("child_profiles.id", ondelete="SET NULL"), nullable=True, index=True
    )
    universe_id: Mapped[str | None] = mapped_column(
        String(36), ForeignKey("story_universes.id", ondelete="SET NULL"), nullable=True, index=True
    )
    final_story_id: Mapped[int | None] = mapped_column(
        Integer, ForeignKey("stories.id", ondelete="SET NULL"), nullable=True, index=True
    )
    target_mode: Mapped[str] = mapped_column(String(32), nullable=False, default="story")
    status: Mapped[str] = mapped_column(String(32), nullable=False, default="draft", index=True)
    current_turn_index: Mapped[int] = mapped_column(Integer, nullable=False, default=0)
    working_title: Mapped[str | None] = mapped_column(String(255), nullable=True)
    story_state: Mapped[dict] = mapped_column(JSON, default=dict)
    latest_user_transcript: Mapped[str | None] = mapped_column(Text, nullable=True)
    latest_assistant_text: Mapped[str | None] = mapped_column(Text, nullable=True)
    last_error: Mapped[str | None] = mapped_column(Text, nullable=True)
    created_at: Mapped[datetime] = mapped_column(
        DateTime(timezone=True), server_default=func.now(), index=True
    )
    updated_at: Mapped[datetime] = mapped_column(
        DateTime(timezone=True), server_default=func.now(), onupdate=func.now()
    )
 class VoiceTurn(Base):
    """One turn of user input and assistant response within a voice session."""
    __tablename__ = "voice_turns"
    __table_args__ = (
        UniqueConstraint("session_id", "turn_index", name="uq_voice_turn_session_turn_index"),
    )
    id: Mapped[str] = mapped_column(String(36), primary_key=True, default=_uuid)
    session_id: Mapped[str] = mapped_column(
        String(36), ForeignKey("voice_sessions.id", ondelete="CASCADE"), nullable=False, index=True
    )
    turn_index: Mapped[int] = mapped_column(Integer, nullable=False)
    status: Mapped[str] = mapped_column(String(32), nullable=False, default="received", index=True)
    user_audio_path: Mapped[str | None] = mapped_column(String(500), nullable=True)
    user_audio_mime_type: Mapped[str | None] = mapped_column(String(100), nullable=True)
    user_audio_duration_ms: Mapped[int | None] = mapped_column(Integer, nullable=True)
    user_transcript: Mapped[str | None] = mapped_column(Text, nullable=True)
    transcript_confidence: Mapped[float | None] = mapped_column(Float, nullable=True)
    detected_intent: Mapped[str] = mapped_column(String(32), nullable=False, default="unknown")
    intent_confidence: Mapped[float | None] = mapped_column(Float, nullable=True)
    story_patch: Mapped[dict] = mapped_column(JSON, default=dict)
    assistant_text: Mapped[str | None] = mapped_column(Text, nullable=True)
    assistant_audio_path: Mapped[str | None] = mapped_column(String(500), nullable=True)
    assistant_audio_duration_ms: Mapped[int | None] = mapped_column(Integer, nullable=True)
    error_message: Mapped[str | None] = mapped_column(Text, nullable=True)
    created_at: Mapped[datetime] = mapped_column(
        DateTime(timezone=True), server_default=func.now(), index=True
    )
    updated_at: Mapped[datetime] = mapped_column(
        DateTime(timezone=True), server_default=func.now(), onupdate=func.now()
    )
 class VoiceSessionEvent(Base):
    """Append-only event emitted by one voice co-creation session."""
    __tablename__ = "voice_session_events"
    id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True)
    session_id: Mapped[str] = mapped_column(
        String(36), ForeignKey("voice_sessions.id", ondelete="CASCADE"), nullable=False, index=True
    )
    turn_id: Mapped[str | None] = mapped_column(
        String(36), ForeignKey("voice_turns.id", ondelete="SET NULL"), nullable=True, index=True
    )
    event_type: Mapped[str] = mapped_column(String(64), nullable=False)
    status: Mapped[str] = mapped_column(String(32), nullable=False)
    message: Mapped[str | None] = mapped_column(Text, nullable=True)
    event_metadata: Mapped[dict] = mapped_column(JSON, default=dict)
    created_at: Mapped[datetime] = mapped_column(
        DateTime(timezone=True), server_default=func.now(), index=True
    )
 class ChildProfile(Base):
    """Child profile entity."""
--- a/backend/app/main.py
+++ b/backend/app/main.py
@@ -11,6 +11,7 @@ from app.api import (
    reading_events,
    stories,
    universes,
    voice_sessions,
 )
 from app.core.config import settings
 from app.core.logging import get_logger, setup_logging
@@ -67,6 +68,7 @@ app.add_middleware(
 app.include_router(auth.router, prefix="/auth", tags=["auth"])
 app.include_router(stories.router, prefix="/api", tags=["stories"])
 app.include_router(voice_sessions.router, prefix="/api", tags=["voice-sessions"])
 app.include_router(profiles.router, prefix="/api", tags=["profiles"])
 app.include_router(universes.router, prefix="/api", tags=["universes"])
 app.include_router(push_configs.router, prefix="/api", tags=["push-configs"])
--- a/backend/app/schemas/voice_session_schemas.py
+++ b/backend/app/schemas/voice_session_schemas.py
@@ -0,0 +1,116 @@
 """Pydantic schemas for voice co-creation sessions."""
 from datetime import datetime
 from typing import Any, Literal
 from pydantic import BaseModel, Field
 MAX_VOICE_TRANSCRIPT_LENGTH = 1000
 MAX_VOICE_ABORT_REASON_LENGTH = 200
 MAX_VOICE_TURN_DURATION_MS = 90_000
 class VoiceSessionCreateRequest(BaseModel):
    """Create one draft voice co-creation session."""
    child_profile_id: str | None = None
    universe_id: str | None = None
    target_mode: Literal["story"] = Field(default="story")
 class VoiceTurnCreateFallbackRequest(BaseModel):
    """Create one voice turn using text fallback instead of uploaded audio."""
    transcript_text: str = Field(..., min_length=1, max_length=MAX_VOICE_TRANSCRIPT_LENGTH)
    duration_ms: int | None = Field(default=None, ge=1, le=MAX_VOICE_TURN_DURATION_MS)
 class VoiceSessionFinalizeRequest(BaseModel):
    """Finalize one voice session into a persisted story."""
    save_story: bool = True
    generate_cover: bool = True
    generate_final_audio: bool = False
 class VoiceSessionAbandonRequest(BaseModel):
    """Explicitly abandon one in-progress session."""
    reason: str | None = Field(default=None, max_length=MAX_VOICE_ABORT_REASON_LENGTH)
 class VoiceSessionEventResponse(BaseModel):
    """One persisted session event."""
    id: int
    session_id: str
    turn_id: str | None = None
    event_type: str
    status: str
    message: str | None = None
    event_metadata: dict[str, Any] = Field(default_factory=dict)
    created_at: datetime
 class VoiceTurnSummaryResponse(BaseModel):
    """One summarized voice session turn."""
    id: str
    session_id: str
    turn_index: int
    status: str
    user_transcript: str | None = None
    transcript_confidence: float | None = None
    detected_intent: str
    intent_confidence: float | None = None
    assistant_text: str | None = None
    assistant_audio_ready: bool = False
    assistant_audio_url: str | None = None
    error_message: str | None = None
    created_at: datetime
    updated_at: datetime
 class VoiceSessionSummaryResponse(BaseModel):
    """One summarized voice co-creation session."""
    id: str
    child_profile_id: str | None = None
    universe_id: str | None = None
    final_story_id: int | None = None
    target_mode: str
    status: str
    current_turn_index: int
    working_title: str | None = None
    story_state: dict[str, Any] = Field(default_factory=dict)
    latest_user_transcript: str | None = None
    latest_assistant_text: str | None = None
    can_continue: bool = False
    can_finalize: bool = False
    last_error: str | None = None
    created_at: datetime
    updated_at: datetime
 class VoiceSessionDetailResponse(VoiceSessionSummaryResponse):
    """Detailed voice session payload with recent turns and events."""
    recent_turns: list[VoiceTurnSummaryResponse] = Field(default_factory=list)
    events: list[VoiceSessionEventResponse] = Field(default_factory=list)
 class VoiceTurnAcceptedResponse(BaseModel):
    """Accepted response for one asynchronously processed turn."""
    turn_id: str
    session_id: str
    status: str
 class VoiceSessionFinalizeResponse(BaseModel):
    """Finalize response after a session is converted into a story."""
    session_id: str
    status: str
    story_id: int | None = None
    generation_job_id: str | None = None
--- a/backend/app/services/voice_session_service.py
+++ b/backend/app/services/voice_session_service.py
@@ -0,0 +1,744 @@
 """Voice co-creation session service."""
 from __future__ import annotations
 from typing import Any
 from fastapi import HTTPException
 from sqlalchemy import desc, select
 from sqlalchemy.ext.asyncio import AsyncSession
 from app.core.logging import get_logger
 from app.db.models import VoiceSession, VoiceSessionEvent, VoiceTurn
 from app.schemas.voice_session_schemas import (
    VoiceSessionAbandonRequest,
    VoiceSessionCreateRequest,
    VoiceSessionDetailResponse,
    VoiceSessionFinalizeRequest,
    VoiceSessionFinalizeResponse,
    VoiceSessionSummaryResponse,
    VoiceTurnAcceptedResponse,
    VoiceTurnCreateFallbackRequest,
    VoiceTurnSummaryResponse,
 )
 from app.services.adapters.text.models import StoryOutput
 from app.services.memory_service import build_enhanced_memory_context
 from app.services.provider_router import generate_story_content, text_to_speech
 from app.services.story_service import (
    create_story_from_result,
    validate_profile_and_universe,
 )
 from app.services.voice_session_storage import (
    build_turn_assistant_audio_path,
    read_session_audio,
    session_audio_exists,
    write_session_audio,
 )
 logger = get_logger(__name__)
 ACTIVE_SESSION_STATUSES = {"draft", "active", "processing_turn", "waiting_user"}
 CONTINUABLE_SESSION_STATUSES = {"draft", "active", "waiting_user"}
 FINAL_SESSION_STATUSES = {"completed", "abandoned"}
 def _default_story_state() -> dict[str, Any]:
    return {
        "premise": None,
        "latest_direction": None,
        "cover_prompt": None,
        "narrative_segments": [],
        "safety_flags": [],
        "last_intent": None,
    }
 def _session_can_continue(session: VoiceSession) -> bool:
    return session.status in CONTINUABLE_SESSION_STATUSES
 def _session_can_finalize(session: VoiceSession) -> bool:
    segments = list((session.story_state or {}).get("narrative_segments") or [])
    return bool(segments) and session.status in {"active", "waiting_user"}
 def _assistant_audio_url(session_id: str, turn_id: str, audio_path: str | None) -> str | None:
    if not session_audio_exists(audio_path):
        return None
    return f"/api/voice-sessions/{session_id}/turns/{turn_id}/audio"
 def _turn_to_summary(turn: VoiceTurn) -> VoiceTurnSummaryResponse:
    return VoiceTurnSummaryResponse(
        id=turn.id,
        session_id=turn.session_id,
        turn_index=turn.turn_index,
        status=turn.status,
        user_transcript=turn.user_transcript,
        transcript_confidence=turn.transcript_confidence,
        detected_intent=turn.detected_intent,
        intent_confidence=turn.intent_confidence,
        assistant_text=turn.assistant_text,
        assistant_audio_ready=session_audio_exists(turn.assistant_audio_path),
        assistant_audio_url=_assistant_audio_url(
            turn.session_id,
            turn.id,
            turn.assistant_audio_path,
        ),
        error_message=turn.error_message,
        created_at=turn.created_at,
        updated_at=turn.updated_at,
    )
 def _session_to_summary(session: VoiceSession) -> VoiceSessionSummaryResponse:
    return VoiceSessionSummaryResponse(
        id=session.id,
        child_profile_id=session.child_profile_id,
        universe_id=session.universe_id,
        final_story_id=session.final_story_id,
        target_mode=session.target_mode,
        status=session.status,
        current_turn_index=session.current_turn_index,
        working_title=session.working_title,
        story_state=session.story_state or {},
        latest_user_transcript=session.latest_user_transcript,
        latest_assistant_text=session.latest_assistant_text,
        can_continue=_session_can_continue(session),
        can_finalize=_session_can_finalize(session),
        last_error=session.last_error,
        created_at=session.created_at,
        updated_at=session.updated_at,
    )
 async def _record_session_event(
    db: AsyncSession,
    *,
    session_id: str,
    turn_id: str | None,
    event_type: str,
    status: str,
    message: str | None = None,
    metadata: dict[str, Any] | None = None,
 ) -> VoiceSessionEvent:
    event = VoiceSessionEvent(
        session_id=session_id,
        turn_id=turn_id,
        event_type=event_type,
        status=status,
        message=message,
        event_metadata=metadata or {},
    )
    db.add(event)
    await db.commit()
    await db.refresh(event)
    return event
 async def _get_owned_session(
    db: AsyncSession,
    *,
    session_id: str,
    user_id: str,
 ) -> VoiceSession:
    result = await db.execute(
        select(VoiceSession).where(
            VoiceSession.id == session_id,
            VoiceSession.user_id == user_id,
        )
    )
    session = result.scalar_one_or_none()
    if not session:
        raise HTTPException(status_code=404, detail="Voice session not found")
    return session
 async def _get_owned_turn(
    db: AsyncSession,
    *,
    session_id: str,
    turn_id: str,
    user_id: str,
 ) -> VoiceTurn:
    result = await db.execute(
        select(VoiceTurn)
        .join(VoiceSession, VoiceTurn.session_id == VoiceSession.id)
        .where(
            VoiceTurn.id == turn_id,
            VoiceTurn.session_id == session_id,
            VoiceSession.user_id == user_id,
        )
    )
    turn = result.scalar_one_or_none()
    if not turn:
        raise HTTPException(status_code=404, detail="Voice turn not found")
    return turn
 def _detect_intent(
    transcript_text: str,
    *,
    current_turn_index: int,
 ) -> tuple[str, float]:
    normalized = transcript_text.replace(" ", "")
    if any(keyword in normalized for keyword in ("保存故事", "存起来", "保存吧", "保存到故事库")):
        return "save_story", 0.95
    if any(keyword in normalized for keyword in ("先到这里", "讲完了", "结束吧", "停在这里")):
        return "end_story", 0.88
    if current_turn_index == 0:
        return "start_story", 0.82
    if any(
        keyword in normalized
        for keyword in (
            "不要",
            "改成",
            "换成",
            "我想让",
            "让它",
            "改一下",
            "改一改",
            "其实",
        )
    ):
        return "correct_story", 0.76
    return "continue_story", 0.68
 def _recent_story_text(session: VoiceSession) -> str:
    story_state = session.story_state or {}
    segments = list(story_state.get("narrative_segments") or [])
    if not segments:
        return ""
    return "\n\n".join(segments[-2:])
 def _build_generation_prompt(
    *,
    session: VoiceSession,
    transcript_text: str,
    intent: str,
 ) -> str:
    recent_story = _recent_story_text(session)
    if intent == "start_story":
        return (
            "你是 DreamWeaver 的儿童故事共创助手。"
            "请为 3-8 岁儿童写一个温暖、安全、适合继续接龙的故事开头。"
            f"孩子刚刚说：{transcript_text}。"
            "请只输出一小段自然的中文故事，不要分点，不要解释，不要写“故事开始”。"
        )
    if intent == "correct_story":
        return (
            "你是 DreamWeaver 的儿童故事共创助手。"
            f"当前故事最近两段如下：{recent_story or '（暂时还没有已讲述内容）'}。"
            f"孩子希望修正故事走向：{transcript_text}。"
            "请顺着已有内容自然接住这个修改，继续写一小段新故事。"
            "不要从头重讲，不要解释规则。"
        )
    return (
        "你是 DreamWeaver 的儿童故事共创助手。"
        f"当前故事最近两段如下：{recent_story or '（暂时还没有已讲述内容）'}。"
        f"孩子接着说：{transcript_text}。"
        "请继续写一小段新的儿童故事内容，让故事自然往下发展。"
        "不要分点，不要做旁白说明。"
    )
 async def _generate_assistant_turn(
    db: AsyncSession,
    *,
    session: VoiceSession,
    transcript_text: str,
    intent: str,
 ) -> StoryOutput:
    memory_context = await build_enhanced_memory_context(
        session.child_profile_id,
        session.universe_id,
        db,
    )
    prompt = _build_generation_prompt(
        session=session,
        transcript_text=transcript_text,
        intent=intent,
    )
    return await generate_story_content(
        input_type="full_story",
        data=prompt,
        memory_context=memory_context,
        db=db,
        user_id=session.user_id,
    )
 def _merge_story_state(
    session: VoiceSession,
    *,
    transcript_text: str,
    intent: str,
    assistant_result: StoryOutput | None,
 ) -> tuple[dict[str, Any], dict[str, Any]]:
    current_state = _default_story_state() | (session.story_state or {})
    narrative_segments = list(current_state.get("narrative_segments") or [])
    if intent == "start_story" and not current_state.get("premise"):
        current_state["premise"] = transcript_text
    if assistant_result and assistant_result.story_text:
        narrative_segments.append(assistant_result.story_text.strip())
    current_state["narrative_segments"] = narrative_segments
    current_state["latest_direction"] = transcript_text
    current_state["last_intent"] = intent
    if assistant_result and assistant_result.cover_prompt_suggestion:
        current_state["cover_prompt"] = assistant_result.cover_prompt_suggestion
    patch = {
        "intent": intent,
        "transcript_text": transcript_text,
        "segment_added": bool(assistant_result and assistant_result.story_text),
        "working_title": assistant_result.title if assistant_result else session.working_title,
        "cover_prompt": current_state.get("cover_prompt"),
        "narrative_segments_count": len(narrative_segments),
    }
    return current_state, patch
 async def create_voice_session_service(
    request: VoiceSessionCreateRequest,
    user_id: str,
    db: AsyncSession,
 ) -> VoiceSessionSummaryResponse:
    profile_id, universe_id = await validate_profile_and_universe(
        request.child_profile_id,
        request.universe_id,
        user_id,
        db,
    )
    session = VoiceSession(
        user_id=user_id,
        child_profile_id=profile_id,
        universe_id=universe_id,
        target_mode=request.target_mode,
        status="draft",
        story_state=_default_story_state(),
    )
    db.add(session)
    await db.commit()
    await db.refresh(session)
    await _record_session_event(
        db,
        session_id=session.id,
        turn_id=None,
        event_type="session_created",
        status="succeeded",
        message="Voice co-creation session created.",
        metadata={
            "child_profile_id": session.child_profile_id,
            "universe_id": session.universe_id,
            "target_mode": session.target_mode,
        },
    )
    await db.refresh(session)
    return _session_to_summary(session)
 async def get_voice_session_detail_service(
    session_id: str,
    user_id: str,
    db: AsyncSession,
 ) -> VoiceSessionDetailResponse:
    session = await _get_owned_session(db, session_id=session_id, user_id=user_id)
    turns = (
        await db.execute(
            select(VoiceTurn)
            .where(VoiceTurn.session_id == session.id)
            .order_by(desc(VoiceTurn.turn_index))
            .limit(10)
        )
    ).scalars().all()
    turns = list(reversed(turns))
    events = (
        await db.execute(
            select(VoiceSessionEvent)
            .where(VoiceSessionEvent.session_id == session.id)
            .order_by(desc(VoiceSessionEvent.id))
            .limit(50)
        )
    ).scalars().all()
    events = list(reversed(events))
    summary = _session_to_summary(session)
    return VoiceSessionDetailResponse(
        **summary.model_dump(),
        recent_turns=[_turn_to_summary(turn) for turn in turns],
        events=[
            {
                "id": event.id,
                "session_id": event.session_id,
                "turn_id": event.turn_id,
                "event_type": event.event_type,
                "status": event.status,
                "message": event.message,
                "event_metadata": event.event_metadata or {},
                "created_at": event.created_at,
            }
            for event in events
        ],
    )
 async def create_voice_turn_from_text_service(
    session_id: str,
    request: VoiceTurnCreateFallbackRequest,
    user_id: str,
    db: AsyncSession,
 ) -> VoiceTurnAcceptedResponse:
    session = await _get_owned_session(db, session_id=session_id, user_id=user_id)
    if session.status not in CONTINUABLE_SESSION_STATUSES:
        raise HTTPException(
            status_code=409,
            detail="Voice session is not ready for another turn.",
        )
    transcript_text = request.transcript_text.strip()
    next_turn_index = session.current_turn_index + 1
    detected_intent, intent_confidence = _detect_intent(
        transcript_text,
        current_turn_index=session.current_turn_index,
    )
    turn = VoiceTurn(
        session_id=session.id,
        turn_index=next_turn_index,
        status="transcribing",
        user_audio_duration_ms=request.duration_ms,
        user_transcript=transcript_text,
        transcript_confidence=1.0,
        detected_intent=detected_intent,
        intent_confidence=intent_confidence,
    )
    session.status = "processing_turn"
    session.current_turn_index = next_turn_index
    session.latest_user_transcript = transcript_text
    session.last_error = None
    db.add(turn)
    await db.commit()
    await db.refresh(session)
    await db.refresh(turn)
    await _record_session_event(
        db,
        session_id=session.id,
        turn_id=turn.id,
        event_type="turn_received",
        status="received",
        message="Voice turn fallback text received.",
        metadata={"turn_index": turn.turn_index},
    )
    await _record_session_event(
        db,
        session_id=session.id,
        turn_id=turn.id,
        event_type="turn_transcribed",
        status="succeeded",
        message="Fallback transcript accepted.",
        metadata={"transcript_confidence": turn.transcript_confidence},
    )
    assistant_text: str | None = None
    assistant_result: StoryOutput | None = None
    try:
        await _record_session_event(
            db,
            session_id=session.id,
            turn_id=turn.id,
            event_type="intent_resolved",
            status="succeeded",
            message="Turn intent resolved.",
            metadata={
                "detected_intent": detected_intent,
                "intent_confidence": intent_confidence,
            },
        )
        if detected_intent == "save_story":
            assistant_text = "好的，这个故事已经准备好保存到故事库了。"
        elif detected_intent == "end_story":
            assistant_text = "好的，我们先把故事停在这里。想保存的话，现在就可以保存到故事库。"
        else:
            assistant_result = await _generate_assistant_turn(
                db,
                session=session,
                transcript_text=transcript_text,
                intent=detected_intent,
            )
            assistant_text = assistant_result.story_text.strip()
        merged_state, story_patch = _merge_story_state(
            session,
            transcript_text=transcript_text,
            intent=detected_intent,
            assistant_result=assistant_result,
        )
        turn.story_patch = story_patch
        turn.assistant_text = assistant_text
        turn.status = "narrative_ready"
        session.story_state = merged_state
        session.latest_assistant_text = assistant_text
        session.status = "waiting_user"
        if assistant_result and assistant_result.title and not session.working_title:
            session.working_title = assistant_result.title
        await db.commit()
        await db.refresh(session)
        await db.refresh(turn)
        await _record_session_event(
            db,
            session_id=session.id,
            turn_id=turn.id,
            event_type="story_patch_applied",
            status="succeeded",
            message="Story state updated after one turn.",
            metadata=story_patch,
        )
        await _record_session_event(
            db,
            session_id=session.id,
            turn_id=turn.id,
            event_type="assistant_text_ready",
            status="succeeded",
            message="Assistant text response generated.",
            metadata={
                "assistant_text_length": len(assistant_text or ""),
                "working_title": session.working_title,
            },
        )
    except Exception as exc:
        turn.status = "failed"
        turn.error_message = str(exc)
        session.status = "waiting_user"
        session.last_error = str(exc)
        await db.commit()
        await db.refresh(session)
        await db.refresh(turn)
        await _record_session_event(
            db,
            session_id=session.id,
            turn_id=turn.id,
            event_type="session_failed",
            status="failed",
            message="Assistant narrative generation failed for one voice turn.",
            metadata={"error": str(exc), "turn_index": turn.turn_index},
        )
        logger.warning(
            "voice_turn_generation_failed",
            session_id=session.id,
            turn_id=turn.id,
            error=str(exc),
        )
        return VoiceTurnAcceptedResponse(
            turn_id=turn.id,
            session_id=session.id,
            status=turn.status,
        )
    if assistant_text:
        try:
            audio_bytes = await text_to_speech(
                assistant_text,
                db=db,
                user_id=user_id,
            )
            saved_path = write_session_audio(
                build_turn_assistant_audio_path(session.id, turn.turn_index),
                audio_bytes,
            )
            turn.assistant_audio_path = saved_path
            turn.assistant_audio_duration_ms = None
            turn.status = "audio_ready"
            await db.commit()
            await db.refresh(turn)
            await _record_session_event(
                db,
                session_id=session.id,
                turn_id=turn.id,
                event_type="assistant_audio_ready",
                status="succeeded",
                message="Assistant audio response generated.",
                metadata={"audio_path": saved_path},
            )
        except Exception as exc:
            turn.status = "narrative_ready"
            turn.error_message = None
            session.last_error = None
            await db.commit()
            await db.refresh(turn)
            await db.refresh(session)
            await _record_session_event(
                db,
                session_id=session.id,
                turn_id=turn.id,
                event_type="assistant_audio_failed",
                status="failed",
                message="Assistant audio generation failed, text response kept.",
                metadata={"error": str(exc)},
            )
            logger.warning(
                "voice_turn_audio_failed",
                session_id=session.id,
                turn_id=turn.id,
                error=str(exc),
            )
    return VoiceTurnAcceptedResponse(
        turn_id=turn.id,
        session_id=session.id,
        status=turn.status,
    )
 async def get_voice_turn_service(
    session_id: str,
    turn_id: str,
    user_id: str,
    db: AsyncSession,
 ) -> VoiceTurnSummaryResponse:
    turn = await _get_owned_turn(
        db,
        session_id=session_id,
        turn_id=turn_id,
        user_id=user_id,
    )
    return _turn_to_summary(turn)
 async def get_voice_turn_audio_service(
    session_id: str,
    turn_id: str,
    user_id: str,
    db: AsyncSession,
 ) -> bytes:
    turn = await _get_owned_turn(
        db,
        session_id=session_id,
        turn_id=turn_id,
        user_id=user_id,
    )
    if not session_audio_exists(turn.assistant_audio_path):
        raise HTTPException(status_code=404, detail="Voice turn audio not found")
    return read_session_audio(turn.assistant_audio_path)
 async def finalize_voice_session_service(
    session_id: str,
    request: VoiceSessionFinalizeRequest,
    user_id: str,
    db: AsyncSession,
 ) -> VoiceSessionFinalizeResponse:
    if not request.save_story:
        raise HTTPException(
            status_code=400,
            detail="Voice session finalize requires save_story=true in Phase A.",
        )
    session = await _get_owned_session(db, session_id=session_id, user_id=user_id)
    if session.status in FINAL_SESSION_STATUSES:
        raise HTTPException(status_code=409, detail="Voice session is already closed.")
    if not _session_can_finalize(session):
        raise HTTPException(status_code=409, detail="Voice session is not ready to finalize.")
    session.status = "finalizing_story"
    await db.commit()
    await db.refresh(session)
    await _record_session_event(
        db,
        session_id=session.id,
        turn_id=None,
        event_type="session_finalizing",
        status="running",
        message="Voice session is being finalized into a story.",
        metadata={"generate_cover": request.generate_cover},
    )
    story_state = session.story_state or {}
    narrative_segments = list(story_state.get("narrative_segments") or [])
    final_story_text = "\n\n".join(
        segment.strip() for segment in narrative_segments if segment.strip()
    )
    if not final_story_text:
        raise HTTPException(status_code=409, detail="Voice session has no narrative to save.")
    story_result = StoryOutput(
        mode="generated",
        title=session.working_title or "一起编织的睡前故事",
        story_text=final_story_text,
        cover_prompt_suggestion=(
            (story_state.get("cover_prompt") or "") if request.generate_cover else ""
        ),
    )
    story = await create_story_from_result(
        result=story_result,
        user_id=user_id,
        profile_id=session.child_profile_id,
        universe_id=session.universe_id,
        db=db,
    )
    session.final_story_id = story.id
    session.status = "completed"
    session.last_error = None
    await db.commit()
    await db.refresh(session)
    await _record_session_event(
        db,
        session_id=session.id,
        turn_id=None,
        event_type="session_saved_as_story",
        status="succeeded",
        message="Voice session finalized into a story.",
        metadata={"story_id": story.id},
    )
    return VoiceSessionFinalizeResponse(
        session_id=session.id,
        status=session.status,
        story_id=story.id,
        generation_job_id=None,
    )
 async def abandon_voice_session_service(
    session_id: str,
    request: VoiceSessionAbandonRequest,
    user_id: str,
    db: AsyncSession,
 ) -> VoiceSessionSummaryResponse:
    session = await _get_owned_session(db, session_id=session_id, user_id=user_id)
    if session.status in FINAL_SESSION_STATUSES:
        raise HTTPException(status_code=409, detail="Voice session is already closed.")
    session.status = "abandoned"
    session.last_error = request.reason
    await db.commit()
    await db.refresh(session)
    await _record_session_event(
        db,
        session_id=session.id,
        turn_id=None,
        event_type="session_abandoned",
        status="succeeded",
        message="Voice session abandoned by the user.",
        metadata={"reason": request.reason},
    )
    await db.refresh(session)
    return _session_to_summary(session)
--- a/backend/app/services/voice_session_storage.py
+++ b/backend/app/services/voice_session_storage.py
@@ -0,0 +1,48 @@
 """Voice co-creation session storage helpers."""
 from __future__ import annotations
 from pathlib import Path
 from app.core.config import settings
 def session_storage_dir(session_id: str) -> Path:
    """Return the storage directory for one voice session."""
    return Path(settings.voice_session_storage_dir) / session_id
 def build_turn_user_audio_path(session_id: str, turn_index: int, suffix: str) -> Path:
    """Build the persisted path for one user-uploaded turn audio file."""
    normalized_suffix = suffix.lstrip(".") or "webm"
    return session_storage_dir(session_id) / f"turn-{turn_index:03d}-user.{normalized_suffix}"
 def build_turn_assistant_audio_path(session_id: str, turn_index: int) -> Path:
    """Build the persisted path for one generated assistant turn audio file."""
    return session_storage_dir(session_id) / f"turn-{turn_index:03d}-assistant.mp3"
 def write_session_audio(path: Path, audio_data: bytes) -> str:
    """Persist session audio bytes atomically and return the saved path."""
    path.parent.mkdir(parents=True, exist_ok=True)
    temp_path = path.with_suffix(f"{path.suffix}.tmp")
    temp_path.write_bytes(audio_data)
    temp_path.replace(path)
    return str(path)
 def read_session_audio(audio_path: str) -> bytes:
    """Read persisted session audio bytes."""
    return Path(audio_path).read_bytes()
 def session_audio_exists(audio_path: str | None) -> bool:
    """Whether one stored session audio file currently exists."""
    return bool(audio_path) and Path(audio_path).is_file()
--- a/backend/tests/conftest.py
+++ b/backend/tests/conftest.py
@@ -187,6 +187,18 @@ def isolated_story_audio_cache(tmp_path, monkeypatch):
    yield
@pytest.fixture(autouse=True)
 def isolated_voice_session_storage(tmp_path, monkeypatch):
    """Use an isolated directory for voice session assets."""
    monkeypatch.setattr(
        settings,
        "voice_session_storage_dir",
        str(tmp_path / "voice_sessions"),
    )
    yield
@pytest.fixture
 def mock_text_provider():
    """Mock text generation."""
--- a/backend/tests/test_voice_sessions.py
+++ b/backend/tests/test_voice_sessions.py
@@ -0,0 +1,201 @@
 from unittest.mock import AsyncMock, patch
 from httpx import ASGITransport, AsyncClient
 from app.db.database import get_db
 from app.main import app
 from app.services.adapters.text.models import StoryOutput
 async def test_voice_session_create_and_fallback_turn_returns_audio(
    db_session,
    auth_token,
 ):
    async def override_get_db():
        yield db_session
    app.dependency_overrides[get_db] = override_get_db
    with (
        patch(
            "app.services.voice_session_service.generate_story_content",
            new_callable=AsyncMock,
        ) as mock_generate,
        patch(
            "app.services.voice_session_service.text_to_speech",
            new_callable=AsyncMock,
        ) as mock_tts,
    ):
        mock_generate.return_value = StoryOutput(
            mode="generated",
            title="小猫去太空",
            story_text="小猫跳上纸飞机，朝着月亮轻轻挥手。",
            cover_prompt_suggestion="温暖儿童绘本封面，小猫与月亮",
        )
        mock_tts.return_value = b"fake-turn-audio"
        transport = ASGITransport(app=app)
        try:
            async with AsyncClient(transport=transport, base_url="http://test") as client:
                client.cookies.set("access_token", auth_token)
                response = await client.post("/api/voice-sessions", json={})
                assert response.status_code == 201
                session_id = response.json()["id"]
                response = await client.post(
                    f"/api/voice-sessions/{session_id}/turns/fallback",
                    json={"transcript_text": "我想听一个小猫去太空的故事"},
                )
                assert response.status_code == 202
                turn_id = response.json()["turn_id"]
                response = await client.get(
                    f"/api/voice-sessions/{session_id}/turns/{turn_id}"
                )
                assert response.status_code == 200
                turn_data = response.json()
                assert turn_data["status"] == "audio_ready"
                assert turn_data["detected_intent"] == "start_story"
                assert turn_data["assistant_audio_ready"] is True
                assert turn_data["assistant_audio_url"].endswith("/audio")
                response = await client.get(turn_data["assistant_audio_url"])
                assert response.status_code == 200
                assert response.content == b"fake-turn-audio"
                assert response.headers["content-type"] == "audio/mpeg"
                response = await client.get(f"/api/voice-sessions/{session_id}")
                assert response.status_code == 200
                session_data = response.json()
                assert session_data["status"] == "waiting_user"
                assert session_data["working_title"] == "小猫去太空"
                assert session_data["can_continue"] is True
                assert session_data["can_finalize"] is True
                assert len(session_data["recent_turns"]) == 1
        finally:
            app.dependency_overrides.clear()
 async def test_voice_session_correct_turn_and_finalize_to_story(
    db_session,
    auth_token,
 ):
    async def override_get_db():
        yield db_session
    app.dependency_overrides[get_db] = override_get_db
    with (
        patch(
            "app.services.voice_session_service.generate_story_content",
            new_callable=AsyncMock,
        ) as mock_generate,
        patch(
            "app.services.voice_session_service.text_to_speech",
            new_callable=AsyncMock,
        ) as mock_tts,
    ):
        mock_generate.side_effect = [
            StoryOutput(
                mode="generated",
                title="小猫去太空",
                story_text="第一段故事：小猫坐着纸飞机飞向月亮。",
                cover_prompt_suggestion="温暖儿童绘本封面，小猫飞向月亮",
            ),
            StoryOutput(
                mode="generated",
                title="小猫去太空",
                story_text="第二段故事：它在月亮上遇见了会发光的新朋友。",
                cover_prompt_suggestion="温暖儿童绘本封面，小猫与月亮朋友",
            ),
        ]
        mock_tts.side_effect = [b"turn-1-audio", b"turn-2-audio"]
        transport = ASGITransport(app=app)
        try:
            async with AsyncClient(transport=transport, base_url="http://test") as client:
                client.cookies.set("access_token", auth_token)
                response = await client.post("/api/voice-sessions", json={})
                assert response.status_code == 201
                session_id = response.json()["id"]
                response = await client.post(
                    f"/api/voice-sessions/{session_id}/turns/fallback",
                    json={"transcript_text": "我想听一个小猫去太空的故事"},
                )
                assert response.status_code == 202
                response = await client.post(
                    f"/api/voice-sessions/{session_id}/turns/fallback",
                    json={"transcript_text": "不要让它哭了，我想让它找到一个朋友"},
                )
                assert response.status_code == 202
                turn_id = response.json()["turn_id"]
                response = await client.get(
                    f"/api/voice-sessions/{session_id}/turns/{turn_id}"
                )
                assert response.status_code == 200
                assert response.json()["detected_intent"] == "correct_story"
                response = await client.post(
                    f"/api/voice-sessions/{session_id}/finalize",
                    json={"save_story": True, "generate_cover": True},
                )
                assert response.status_code == 200
                finalize_data = response.json()
                story_id = finalize_data["story_id"]
                assert finalize_data["status"] == "completed"
                response = await client.get(f"/api/stories/{story_id}")
                assert response.status_code == 200
                story_data = response.json()
                assert story_data["title"] == "小猫去太空"
                assert "第一段故事" in story_data["story_text"]
                assert "第二段故事" in story_data["story_text"]
                assert story_data["generation_status"] == "partial_ready"
                response = await client.get(f"/api/voice-sessions/{session_id}")
                assert response.status_code == 200
                session_data = response.json()
                assert session_data["status"] == "completed"
                assert session_data["final_story_id"] == story_id
                assert session_data["can_continue"] is False
        finally:
            app.dependency_overrides.clear()
 async def test_voice_session_abandon_blocks_future_turns(
    db_session,
    auth_token,
 ):
    async def override_get_db():
        yield db_session
    app.dependency_overrides[get_db] = override_get_db
    transport = ASGITransport(app=app)
    try:
        async with AsyncClient(transport=transport, base_url="http://test") as client:
            client.cookies.set("access_token", auth_token)
            response = await client.post("/api/voice-sessions", json={})
            assert response.status_code == 201
            session_id = response.json()["id"]
            response = await client.post(
                f"/api/voice-sessions/{session_id}/abandon",
                json={"reason": "孩子先去吃饭了"},
            )
            assert response.status_code == 200
            assert response.json()["status"] == "abandoned"
            response = await client.post(
                f"/api/voice-sessions/{session_id}/turns/fallback",
                json={"transcript_text": "我们继续讲吧"},
            )
            assert response.status_code == 409
    finally:
        app.dependency_overrides.clear()