feat: add voice co-creation session skeleton
This commit is contained in:
187
backend/alembic/versions/0013_add_voice_sessions_phase_a.py
Normal file
187
backend/alembic/versions/0013_add_voice_sessions_phase_a.py
Normal file
@@ -0,0 +1,187 @@
|
|||||||
|
"""add voice co-creation phase a tables
|
||||||
|
|
||||||
|
Revision ID: 0013_add_voice_sessions_phase_a
|
||||||
|
Revises: 0012_story_text_status
|
||||||
|
Create Date: 2026-04-19
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
import sqlalchemy as sa
|
||||||
|
|
||||||
|
from alembic import op
|
||||||
|
|
||||||
|
revision = "0013_add_voice_sessions_phase_a"
|
||||||
|
down_revision = "0012_story_text_status"
|
||||||
|
branch_labels = None
|
||||||
|
depends_on = None
|
||||||
|
|
||||||
|
|
||||||
|
def upgrade() -> None:
|
||||||
|
op.create_table(
|
||||||
|
"voice_sessions",
|
||||||
|
sa.Column("id", sa.String(length=36), nullable=False),
|
||||||
|
sa.Column("user_id", sa.String(length=255), nullable=False),
|
||||||
|
sa.Column("child_profile_id", sa.String(length=36), nullable=True),
|
||||||
|
sa.Column("universe_id", sa.String(length=36), nullable=True),
|
||||||
|
sa.Column("final_story_id", sa.Integer(), nullable=True),
|
||||||
|
sa.Column(
|
||||||
|
"target_mode",
|
||||||
|
sa.String(length=32),
|
||||||
|
nullable=False,
|
||||||
|
server_default="story",
|
||||||
|
),
|
||||||
|
sa.Column(
|
||||||
|
"status",
|
||||||
|
sa.String(length=32),
|
||||||
|
nullable=False,
|
||||||
|
server_default="draft",
|
||||||
|
),
|
||||||
|
sa.Column(
|
||||||
|
"current_turn_index",
|
||||||
|
sa.Integer(),
|
||||||
|
nullable=False,
|
||||||
|
server_default="0",
|
||||||
|
),
|
||||||
|
sa.Column("working_title", sa.String(length=255), nullable=True),
|
||||||
|
sa.Column("story_state", sa.JSON(), nullable=False, server_default="{}"),
|
||||||
|
sa.Column("latest_user_transcript", sa.Text(), nullable=True),
|
||||||
|
sa.Column("latest_assistant_text", sa.Text(), nullable=True),
|
||||||
|
sa.Column("last_error", sa.Text(), nullable=True),
|
||||||
|
sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.func.now()),
|
||||||
|
sa.Column("updated_at", sa.DateTime(timezone=True), server_default=sa.func.now()),
|
||||||
|
sa.ForeignKeyConstraint(["user_id"], ["users.id"], ondelete="CASCADE"),
|
||||||
|
sa.ForeignKeyConstraint(
|
||||||
|
["child_profile_id"],
|
||||||
|
["child_profiles.id"],
|
||||||
|
ondelete="SET NULL",
|
||||||
|
),
|
||||||
|
sa.ForeignKeyConstraint(
|
||||||
|
["universe_id"],
|
||||||
|
["story_universes.id"],
|
||||||
|
ondelete="SET NULL",
|
||||||
|
),
|
||||||
|
sa.ForeignKeyConstraint(
|
||||||
|
["final_story_id"],
|
||||||
|
["stories.id"],
|
||||||
|
ondelete="SET NULL",
|
||||||
|
),
|
||||||
|
sa.PrimaryKeyConstraint("id"),
|
||||||
|
)
|
||||||
|
op.create_index("ix_voice_sessions_user_id", "voice_sessions", ["user_id"])
|
||||||
|
op.create_index(
|
||||||
|
"ix_voice_sessions_child_profile_id",
|
||||||
|
"voice_sessions",
|
||||||
|
["child_profile_id"],
|
||||||
|
)
|
||||||
|
op.create_index("ix_voice_sessions_universe_id", "voice_sessions", ["universe_id"])
|
||||||
|
op.create_index(
|
||||||
|
"ix_voice_sessions_final_story_id",
|
||||||
|
"voice_sessions",
|
||||||
|
["final_story_id"],
|
||||||
|
)
|
||||||
|
op.create_index("ix_voice_sessions_status", "voice_sessions", ["status"])
|
||||||
|
op.create_index("ix_voice_sessions_created_at", "voice_sessions", ["created_at"])
|
||||||
|
|
||||||
|
op.create_table(
|
||||||
|
"voice_turns",
|
||||||
|
sa.Column("id", sa.String(length=36), nullable=False),
|
||||||
|
sa.Column("session_id", sa.String(length=36), nullable=False),
|
||||||
|
sa.Column("turn_index", sa.Integer(), nullable=False),
|
||||||
|
sa.Column(
|
||||||
|
"status",
|
||||||
|
sa.String(length=32),
|
||||||
|
nullable=False,
|
||||||
|
server_default="received",
|
||||||
|
),
|
||||||
|
sa.Column("user_audio_path", sa.String(length=500), nullable=True),
|
||||||
|
sa.Column("user_audio_mime_type", sa.String(length=100), nullable=True),
|
||||||
|
sa.Column("user_audio_duration_ms", sa.Integer(), nullable=True),
|
||||||
|
sa.Column("user_transcript", sa.Text(), nullable=True),
|
||||||
|
sa.Column("transcript_confidence", sa.Float(), nullable=True),
|
||||||
|
sa.Column(
|
||||||
|
"detected_intent",
|
||||||
|
sa.String(length=32),
|
||||||
|
nullable=False,
|
||||||
|
server_default="unknown",
|
||||||
|
),
|
||||||
|
sa.Column("intent_confidence", sa.Float(), nullable=True),
|
||||||
|
sa.Column("story_patch", sa.JSON(), nullable=False, server_default="{}"),
|
||||||
|
sa.Column("assistant_text", sa.Text(), nullable=True),
|
||||||
|
sa.Column("assistant_audio_path", sa.String(length=500), nullable=True),
|
||||||
|
sa.Column("assistant_audio_duration_ms", sa.Integer(), nullable=True),
|
||||||
|
sa.Column("error_message", sa.Text(), nullable=True),
|
||||||
|
sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.func.now()),
|
||||||
|
sa.Column("updated_at", sa.DateTime(timezone=True), server_default=sa.func.now()),
|
||||||
|
sa.ForeignKeyConstraint(["session_id"], ["voice_sessions.id"], ondelete="CASCADE"),
|
||||||
|
sa.PrimaryKeyConstraint("id"),
|
||||||
|
sa.UniqueConstraint(
|
||||||
|
"session_id",
|
||||||
|
"turn_index",
|
||||||
|
name="uq_voice_turn_session_turn_index",
|
||||||
|
),
|
||||||
|
)
|
||||||
|
op.create_index("ix_voice_turns_session_id", "voice_turns", ["session_id"])
|
||||||
|
op.create_index("ix_voice_turns_status", "voice_turns", ["status"])
|
||||||
|
op.create_index("ix_voice_turns_created_at", "voice_turns", ["created_at"])
|
||||||
|
|
||||||
|
op.create_table(
|
||||||
|
"voice_session_events",
|
||||||
|
sa.Column("id", sa.Integer(), autoincrement=True, nullable=False),
|
||||||
|
sa.Column("session_id", sa.String(length=36), nullable=False),
|
||||||
|
sa.Column("turn_id", sa.String(length=36), nullable=True),
|
||||||
|
sa.Column("event_type", sa.String(length=64), nullable=False),
|
||||||
|
sa.Column("status", sa.String(length=32), nullable=False),
|
||||||
|
sa.Column("message", sa.Text(), nullable=True),
|
||||||
|
sa.Column("event_metadata", sa.JSON(), nullable=False, server_default="{}"),
|
||||||
|
sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.func.now()),
|
||||||
|
sa.ForeignKeyConstraint(["session_id"], ["voice_sessions.id"], ondelete="CASCADE"),
|
||||||
|
sa.ForeignKeyConstraint(["turn_id"], ["voice_turns.id"], ondelete="SET NULL"),
|
||||||
|
sa.PrimaryKeyConstraint("id"),
|
||||||
|
)
|
||||||
|
op.create_index(
|
||||||
|
"ix_voice_session_events_session_id",
|
||||||
|
"voice_session_events",
|
||||||
|
["session_id"],
|
||||||
|
)
|
||||||
|
op.create_index(
|
||||||
|
"ix_voice_session_events_turn_id",
|
||||||
|
"voice_session_events",
|
||||||
|
["turn_id"],
|
||||||
|
)
|
||||||
|
op.create_index(
|
||||||
|
"ix_voice_session_events_created_at",
|
||||||
|
"voice_session_events",
|
||||||
|
["created_at"],
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def downgrade() -> None:
|
||||||
|
op.drop_index(
|
||||||
|
"ix_voice_session_events_created_at",
|
||||||
|
table_name="voice_session_events",
|
||||||
|
)
|
||||||
|
op.drop_index(
|
||||||
|
"ix_voice_session_events_turn_id",
|
||||||
|
table_name="voice_session_events",
|
||||||
|
)
|
||||||
|
op.drop_index(
|
||||||
|
"ix_voice_session_events_session_id",
|
||||||
|
table_name="voice_session_events",
|
||||||
|
)
|
||||||
|
op.drop_table("voice_session_events")
|
||||||
|
|
||||||
|
op.drop_index("ix_voice_turns_created_at", table_name="voice_turns")
|
||||||
|
op.drop_index("ix_voice_turns_status", table_name="voice_turns")
|
||||||
|
op.drop_index("ix_voice_turns_session_id", table_name="voice_turns")
|
||||||
|
op.drop_table("voice_turns")
|
||||||
|
|
||||||
|
op.drop_index("ix_voice_sessions_created_at", table_name="voice_sessions")
|
||||||
|
op.drop_index("ix_voice_sessions_status", table_name="voice_sessions")
|
||||||
|
op.drop_index("ix_voice_sessions_final_story_id", table_name="voice_sessions")
|
||||||
|
op.drop_index("ix_voice_sessions_universe_id", table_name="voice_sessions")
|
||||||
|
op.drop_index(
|
||||||
|
"ix_voice_sessions_child_profile_id",
|
||||||
|
table_name="voice_sessions",
|
||||||
|
)
|
||||||
|
op.drop_index("ix_voice_sessions_user_id", table_name="voice_sessions")
|
||||||
|
op.drop_table("voice_sessions")
|
||||||
137
backend/app/api/voice_sessions.py
Normal file
137
backend/app/api/voice_sessions.py
Normal file
@@ -0,0 +1,137 @@
|
|||||||
|
"""Voice co-creation session APIs."""
|
||||||
|
|
||||||
|
from fastapi import APIRouter, Depends, Response, status
|
||||||
|
from sqlalchemy.ext.asyncio import AsyncSession
|
||||||
|
|
||||||
|
from app.core.deps import require_user
|
||||||
|
from app.core.rate_limiter import check_rate_limit
|
||||||
|
from app.db.database import get_db
|
||||||
|
from app.db.models import User
|
||||||
|
from app.schemas.voice_session_schemas import (
|
||||||
|
VoiceSessionAbandonRequest,
|
||||||
|
VoiceSessionCreateRequest,
|
||||||
|
VoiceSessionDetailResponse,
|
||||||
|
VoiceSessionFinalizeRequest,
|
||||||
|
VoiceSessionFinalizeResponse,
|
||||||
|
VoiceSessionSummaryResponse,
|
||||||
|
VoiceTurnAcceptedResponse,
|
||||||
|
VoiceTurnCreateFallbackRequest,
|
||||||
|
VoiceTurnSummaryResponse,
|
||||||
|
)
|
||||||
|
from app.services.voice_session_service import (
|
||||||
|
abandon_voice_session_service,
|
||||||
|
create_voice_session_service,
|
||||||
|
create_voice_turn_from_text_service,
|
||||||
|
finalize_voice_session_service,
|
||||||
|
get_voice_session_detail_service,
|
||||||
|
get_voice_turn_audio_service,
|
||||||
|
get_voice_turn_service,
|
||||||
|
)
|
||||||
|
|
||||||
|
router = APIRouter()
|
||||||
|
|
||||||
|
VOICE_SESSION_RATE_LIMIT_WINDOW = 60
|
||||||
|
VOICE_SESSION_RATE_LIMIT_REQUESTS = 20
|
||||||
|
|
||||||
|
|
||||||
|
@router.post(
|
||||||
|
"/voice-sessions",
|
||||||
|
response_model=VoiceSessionSummaryResponse,
|
||||||
|
status_code=status.HTTP_201_CREATED,
|
||||||
|
)
|
||||||
|
async def create_voice_session(
|
||||||
|
request: VoiceSessionCreateRequest,
|
||||||
|
user: User = Depends(require_user),
|
||||||
|
db: AsyncSession = Depends(get_db),
|
||||||
|
):
|
||||||
|
"""Create one draft voice co-creation session."""
|
||||||
|
await check_rate_limit(
|
||||||
|
f"voice-session:{user.id}",
|
||||||
|
VOICE_SESSION_RATE_LIMIT_REQUESTS,
|
||||||
|
VOICE_SESSION_RATE_LIMIT_WINDOW,
|
||||||
|
)
|
||||||
|
return await create_voice_session_service(request, user.id, db)
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/voice-sessions/{session_id}", response_model=VoiceSessionDetailResponse)
|
||||||
|
async def get_voice_session(
|
||||||
|
session_id: str,
|
||||||
|
user: User = Depends(require_user),
|
||||||
|
db: AsyncSession = Depends(get_db),
|
||||||
|
):
|
||||||
|
"""Get one voice co-creation session with recent turns and events."""
|
||||||
|
return await get_voice_session_detail_service(session_id, user.id, db)
|
||||||
|
|
||||||
|
|
||||||
|
@router.post(
|
||||||
|
"/voice-sessions/{session_id}/turns/fallback",
|
||||||
|
response_model=VoiceTurnAcceptedResponse,
|
||||||
|
status_code=status.HTTP_202_ACCEPTED,
|
||||||
|
)
|
||||||
|
async def create_voice_turn_from_text(
|
||||||
|
session_id: str,
|
||||||
|
request: VoiceTurnCreateFallbackRequest,
|
||||||
|
user: User = Depends(require_user),
|
||||||
|
db: AsyncSession = Depends(get_db),
|
||||||
|
):
|
||||||
|
"""Create one turn using text fallback before real audio upload is added."""
|
||||||
|
await check_rate_limit(
|
||||||
|
f"voice-turn:{user.id}",
|
||||||
|
VOICE_SESSION_RATE_LIMIT_REQUESTS,
|
||||||
|
VOICE_SESSION_RATE_LIMIT_WINDOW,
|
||||||
|
)
|
||||||
|
return await create_voice_turn_from_text_service(session_id, request, user.id, db)
|
||||||
|
|
||||||
|
|
||||||
|
@router.get(
|
||||||
|
"/voice-sessions/{session_id}/turns/{turn_id}",
|
||||||
|
response_model=VoiceTurnSummaryResponse,
|
||||||
|
)
|
||||||
|
async def get_voice_turn(
|
||||||
|
session_id: str,
|
||||||
|
turn_id: str,
|
||||||
|
user: User = Depends(require_user),
|
||||||
|
db: AsyncSession = Depends(get_db),
|
||||||
|
):
|
||||||
|
"""Get one processed turn within a voice session."""
|
||||||
|
return await get_voice_turn_service(session_id, turn_id, user.id, db)
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/voice-sessions/{session_id}/turns/{turn_id}/audio")
|
||||||
|
async def get_voice_turn_audio(
|
||||||
|
session_id: str,
|
||||||
|
turn_id: str,
|
||||||
|
user: User = Depends(require_user),
|
||||||
|
db: AsyncSession = Depends(get_db),
|
||||||
|
):
|
||||||
|
"""Get synthesized assistant audio for one completed voice turn."""
|
||||||
|
audio_bytes = await get_voice_turn_audio_service(session_id, turn_id, user.id, db)
|
||||||
|
return Response(content=audio_bytes, media_type="audio/mpeg")
|
||||||
|
|
||||||
|
|
||||||
|
@router.post(
|
||||||
|
"/voice-sessions/{session_id}/finalize",
|
||||||
|
response_model=VoiceSessionFinalizeResponse,
|
||||||
|
)
|
||||||
|
async def finalize_voice_session(
|
||||||
|
session_id: str,
|
||||||
|
request: VoiceSessionFinalizeRequest,
|
||||||
|
user: User = Depends(require_user),
|
||||||
|
db: AsyncSession = Depends(get_db),
|
||||||
|
):
|
||||||
|
"""Finalize one voice session into a persisted story."""
|
||||||
|
return await finalize_voice_session_service(session_id, request, user.id, db)
|
||||||
|
|
||||||
|
|
||||||
|
@router.post(
|
||||||
|
"/voice-sessions/{session_id}/abandon",
|
||||||
|
response_model=VoiceSessionSummaryResponse,
|
||||||
|
)
|
||||||
|
async def abandon_voice_session(
|
||||||
|
session_id: str,
|
||||||
|
request: VoiceSessionAbandonRequest,
|
||||||
|
user: User = Depends(require_user),
|
||||||
|
db: AsyncSession = Depends(get_db),
|
||||||
|
):
|
||||||
|
"""Abandon one in-progress voice session without saving it as a story."""
|
||||||
|
return await abandon_voice_session_service(session_id, request, user.id, db)
|
||||||
@@ -66,6 +66,10 @@ class Settings(BaseSettings):
|
|||||||
"storage/audio",
|
"storage/audio",
|
||||||
description="Directory for cached story audio files",
|
description="Directory for cached story audio files",
|
||||||
)
|
)
|
||||||
|
voice_session_storage_dir: str = Field(
|
||||||
|
"storage/voice_sessions",
|
||||||
|
description="Directory for persisted voice co-creation session assets",
|
||||||
|
)
|
||||||
story_audio_cache_ttl_days: int = Field(
|
story_audio_cache_ttl_days: int = Field(
|
||||||
30,
|
30,
|
||||||
description="TTL in days before cached story audio is pruned",
|
description="TTL in days before cached story audio is pruned",
|
||||||
|
|||||||
@@ -168,6 +168,95 @@ class GenerationJobEvent(Base):
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class VoiceSession(Base):
|
||||||
|
"""Voice co-creation session before it is finalized as a formal story."""
|
||||||
|
|
||||||
|
__tablename__ = "voice_sessions"
|
||||||
|
|
||||||
|
id: Mapped[str] = mapped_column(String(36), primary_key=True, default=_uuid)
|
||||||
|
user_id: Mapped[str] = mapped_column(
|
||||||
|
String(255), ForeignKey("users.id", ondelete="CASCADE"), nullable=False, index=True
|
||||||
|
)
|
||||||
|
child_profile_id: Mapped[str | None] = mapped_column(
|
||||||
|
String(36), ForeignKey("child_profiles.id", ondelete="SET NULL"), nullable=True, index=True
|
||||||
|
)
|
||||||
|
universe_id: Mapped[str | None] = mapped_column(
|
||||||
|
String(36), ForeignKey("story_universes.id", ondelete="SET NULL"), nullable=True, index=True
|
||||||
|
)
|
||||||
|
final_story_id: Mapped[int | None] = mapped_column(
|
||||||
|
Integer, ForeignKey("stories.id", ondelete="SET NULL"), nullable=True, index=True
|
||||||
|
)
|
||||||
|
target_mode: Mapped[str] = mapped_column(String(32), nullable=False, default="story")
|
||||||
|
status: Mapped[str] = mapped_column(String(32), nullable=False, default="draft", index=True)
|
||||||
|
current_turn_index: Mapped[int] = mapped_column(Integer, nullable=False, default=0)
|
||||||
|
working_title: Mapped[str | None] = mapped_column(String(255), nullable=True)
|
||||||
|
story_state: Mapped[dict] = mapped_column(JSON, default=dict)
|
||||||
|
latest_user_transcript: Mapped[str | None] = mapped_column(Text, nullable=True)
|
||||||
|
latest_assistant_text: Mapped[str | None] = mapped_column(Text, nullable=True)
|
||||||
|
last_error: Mapped[str | None] = mapped_column(Text, nullable=True)
|
||||||
|
created_at: Mapped[datetime] = mapped_column(
|
||||||
|
DateTime(timezone=True), server_default=func.now(), index=True
|
||||||
|
)
|
||||||
|
updated_at: Mapped[datetime] = mapped_column(
|
||||||
|
DateTime(timezone=True), server_default=func.now(), onupdate=func.now()
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class VoiceTurn(Base):
|
||||||
|
"""One turn of user input and assistant response within a voice session."""
|
||||||
|
|
||||||
|
__tablename__ = "voice_turns"
|
||||||
|
__table_args__ = (
|
||||||
|
UniqueConstraint("session_id", "turn_index", name="uq_voice_turn_session_turn_index"),
|
||||||
|
)
|
||||||
|
|
||||||
|
id: Mapped[str] = mapped_column(String(36), primary_key=True, default=_uuid)
|
||||||
|
session_id: Mapped[str] = mapped_column(
|
||||||
|
String(36), ForeignKey("voice_sessions.id", ondelete="CASCADE"), nullable=False, index=True
|
||||||
|
)
|
||||||
|
turn_index: Mapped[int] = mapped_column(Integer, nullable=False)
|
||||||
|
status: Mapped[str] = mapped_column(String(32), nullable=False, default="received", index=True)
|
||||||
|
user_audio_path: Mapped[str | None] = mapped_column(String(500), nullable=True)
|
||||||
|
user_audio_mime_type: Mapped[str | None] = mapped_column(String(100), nullable=True)
|
||||||
|
user_audio_duration_ms: Mapped[int | None] = mapped_column(Integer, nullable=True)
|
||||||
|
user_transcript: Mapped[str | None] = mapped_column(Text, nullable=True)
|
||||||
|
transcript_confidence: Mapped[float | None] = mapped_column(Float, nullable=True)
|
||||||
|
detected_intent: Mapped[str] = mapped_column(String(32), nullable=False, default="unknown")
|
||||||
|
intent_confidence: Mapped[float | None] = mapped_column(Float, nullable=True)
|
||||||
|
story_patch: Mapped[dict] = mapped_column(JSON, default=dict)
|
||||||
|
assistant_text: Mapped[str | None] = mapped_column(Text, nullable=True)
|
||||||
|
assistant_audio_path: Mapped[str | None] = mapped_column(String(500), nullable=True)
|
||||||
|
assistant_audio_duration_ms: Mapped[int | None] = mapped_column(Integer, nullable=True)
|
||||||
|
error_message: Mapped[str | None] = mapped_column(Text, nullable=True)
|
||||||
|
created_at: Mapped[datetime] = mapped_column(
|
||||||
|
DateTime(timezone=True), server_default=func.now(), index=True
|
||||||
|
)
|
||||||
|
updated_at: Mapped[datetime] = mapped_column(
|
||||||
|
DateTime(timezone=True), server_default=func.now(), onupdate=func.now()
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class VoiceSessionEvent(Base):
|
||||||
|
"""Append-only event emitted by one voice co-creation session."""
|
||||||
|
|
||||||
|
__tablename__ = "voice_session_events"
|
||||||
|
|
||||||
|
id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True)
|
||||||
|
session_id: Mapped[str] = mapped_column(
|
||||||
|
String(36), ForeignKey("voice_sessions.id", ondelete="CASCADE"), nullable=False, index=True
|
||||||
|
)
|
||||||
|
turn_id: Mapped[str | None] = mapped_column(
|
||||||
|
String(36), ForeignKey("voice_turns.id", ondelete="SET NULL"), nullable=True, index=True
|
||||||
|
)
|
||||||
|
event_type: Mapped[str] = mapped_column(String(64), nullable=False)
|
||||||
|
status: Mapped[str] = mapped_column(String(32), nullable=False)
|
||||||
|
message: Mapped[str | None] = mapped_column(Text, nullable=True)
|
||||||
|
event_metadata: Mapped[dict] = mapped_column(JSON, default=dict)
|
||||||
|
created_at: Mapped[datetime] = mapped_column(
|
||||||
|
DateTime(timezone=True), server_default=func.now(), index=True
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class ChildProfile(Base):
|
class ChildProfile(Base):
|
||||||
"""Child profile entity."""
|
"""Child profile entity."""
|
||||||
|
|
||||||
|
|||||||
@@ -11,6 +11,7 @@ from app.api import (
|
|||||||
reading_events,
|
reading_events,
|
||||||
stories,
|
stories,
|
||||||
universes,
|
universes,
|
||||||
|
voice_sessions,
|
||||||
)
|
)
|
||||||
from app.core.config import settings
|
from app.core.config import settings
|
||||||
from app.core.logging import get_logger, setup_logging
|
from app.core.logging import get_logger, setup_logging
|
||||||
@@ -67,6 +68,7 @@ app.add_middleware(
|
|||||||
|
|
||||||
app.include_router(auth.router, prefix="/auth", tags=["auth"])
|
app.include_router(auth.router, prefix="/auth", tags=["auth"])
|
||||||
app.include_router(stories.router, prefix="/api", tags=["stories"])
|
app.include_router(stories.router, prefix="/api", tags=["stories"])
|
||||||
|
app.include_router(voice_sessions.router, prefix="/api", tags=["voice-sessions"])
|
||||||
app.include_router(profiles.router, prefix="/api", tags=["profiles"])
|
app.include_router(profiles.router, prefix="/api", tags=["profiles"])
|
||||||
app.include_router(universes.router, prefix="/api", tags=["universes"])
|
app.include_router(universes.router, prefix="/api", tags=["universes"])
|
||||||
app.include_router(push_configs.router, prefix="/api", tags=["push-configs"])
|
app.include_router(push_configs.router, prefix="/api", tags=["push-configs"])
|
||||||
|
|||||||
116
backend/app/schemas/voice_session_schemas.py
Normal file
116
backend/app/schemas/voice_session_schemas.py
Normal file
@@ -0,0 +1,116 @@
|
|||||||
|
"""Pydantic schemas for voice co-creation sessions."""
|
||||||
|
|
||||||
|
from datetime import datetime
|
||||||
|
from typing import Any, Literal
|
||||||
|
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
|
MAX_VOICE_TRANSCRIPT_LENGTH = 1000
|
||||||
|
MAX_VOICE_ABORT_REASON_LENGTH = 200
|
||||||
|
MAX_VOICE_TURN_DURATION_MS = 90_000
|
||||||
|
|
||||||
|
|
||||||
|
class VoiceSessionCreateRequest(BaseModel):
|
||||||
|
"""Create one draft voice co-creation session."""
|
||||||
|
|
||||||
|
child_profile_id: str | None = None
|
||||||
|
universe_id: str | None = None
|
||||||
|
target_mode: Literal["story"] = Field(default="story")
|
||||||
|
|
||||||
|
|
||||||
|
class VoiceTurnCreateFallbackRequest(BaseModel):
|
||||||
|
"""Create one voice turn using text fallback instead of uploaded audio."""
|
||||||
|
|
||||||
|
transcript_text: str = Field(..., min_length=1, max_length=MAX_VOICE_TRANSCRIPT_LENGTH)
|
||||||
|
duration_ms: int | None = Field(default=None, ge=1, le=MAX_VOICE_TURN_DURATION_MS)
|
||||||
|
|
||||||
|
|
||||||
|
class VoiceSessionFinalizeRequest(BaseModel):
|
||||||
|
"""Finalize one voice session into a persisted story."""
|
||||||
|
|
||||||
|
save_story: bool = True
|
||||||
|
generate_cover: bool = True
|
||||||
|
generate_final_audio: bool = False
|
||||||
|
|
||||||
|
|
||||||
|
class VoiceSessionAbandonRequest(BaseModel):
|
||||||
|
"""Explicitly abandon one in-progress session."""
|
||||||
|
|
||||||
|
reason: str | None = Field(default=None, max_length=MAX_VOICE_ABORT_REASON_LENGTH)
|
||||||
|
|
||||||
|
|
||||||
|
class VoiceSessionEventResponse(BaseModel):
|
||||||
|
"""One persisted session event."""
|
||||||
|
|
||||||
|
id: int
|
||||||
|
session_id: str
|
||||||
|
turn_id: str | None = None
|
||||||
|
event_type: str
|
||||||
|
status: str
|
||||||
|
message: str | None = None
|
||||||
|
event_metadata: dict[str, Any] = Field(default_factory=dict)
|
||||||
|
created_at: datetime
|
||||||
|
|
||||||
|
|
||||||
|
class VoiceTurnSummaryResponse(BaseModel):
|
||||||
|
"""One summarized voice session turn."""
|
||||||
|
|
||||||
|
id: str
|
||||||
|
session_id: str
|
||||||
|
turn_index: int
|
||||||
|
status: str
|
||||||
|
user_transcript: str | None = None
|
||||||
|
transcript_confidence: float | None = None
|
||||||
|
detected_intent: str
|
||||||
|
intent_confidence: float | None = None
|
||||||
|
assistant_text: str | None = None
|
||||||
|
assistant_audio_ready: bool = False
|
||||||
|
assistant_audio_url: str | None = None
|
||||||
|
error_message: str | None = None
|
||||||
|
created_at: datetime
|
||||||
|
updated_at: datetime
|
||||||
|
|
||||||
|
|
||||||
|
class VoiceSessionSummaryResponse(BaseModel):
|
||||||
|
"""One summarized voice co-creation session."""
|
||||||
|
|
||||||
|
id: str
|
||||||
|
child_profile_id: str | None = None
|
||||||
|
universe_id: str | None = None
|
||||||
|
final_story_id: int | None = None
|
||||||
|
target_mode: str
|
||||||
|
status: str
|
||||||
|
current_turn_index: int
|
||||||
|
working_title: str | None = None
|
||||||
|
story_state: dict[str, Any] = Field(default_factory=dict)
|
||||||
|
latest_user_transcript: str | None = None
|
||||||
|
latest_assistant_text: str | None = None
|
||||||
|
can_continue: bool = False
|
||||||
|
can_finalize: bool = False
|
||||||
|
last_error: str | None = None
|
||||||
|
created_at: datetime
|
||||||
|
updated_at: datetime
|
||||||
|
|
||||||
|
|
||||||
|
class VoiceSessionDetailResponse(VoiceSessionSummaryResponse):
|
||||||
|
"""Detailed voice session payload with recent turns and events."""
|
||||||
|
|
||||||
|
recent_turns: list[VoiceTurnSummaryResponse] = Field(default_factory=list)
|
||||||
|
events: list[VoiceSessionEventResponse] = Field(default_factory=list)
|
||||||
|
|
||||||
|
|
||||||
|
class VoiceTurnAcceptedResponse(BaseModel):
|
||||||
|
"""Accepted response for one asynchronously processed turn."""
|
||||||
|
|
||||||
|
turn_id: str
|
||||||
|
session_id: str
|
||||||
|
status: str
|
||||||
|
|
||||||
|
|
||||||
|
class VoiceSessionFinalizeResponse(BaseModel):
|
||||||
|
"""Finalize response after a session is converted into a story."""
|
||||||
|
|
||||||
|
session_id: str
|
||||||
|
status: str
|
||||||
|
story_id: int | None = None
|
||||||
|
generation_job_id: str | None = None
|
||||||
744
backend/app/services/voice_session_service.py
Normal file
744
backend/app/services/voice_session_service.py
Normal file
@@ -0,0 +1,744 @@
|
|||||||
|
"""Voice co-creation session service."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from fastapi import HTTPException
|
||||||
|
from sqlalchemy import desc, select
|
||||||
|
from sqlalchemy.ext.asyncio import AsyncSession
|
||||||
|
|
||||||
|
from app.core.logging import get_logger
|
||||||
|
from app.db.models import VoiceSession, VoiceSessionEvent, VoiceTurn
|
||||||
|
from app.schemas.voice_session_schemas import (
|
||||||
|
VoiceSessionAbandonRequest,
|
||||||
|
VoiceSessionCreateRequest,
|
||||||
|
VoiceSessionDetailResponse,
|
||||||
|
VoiceSessionFinalizeRequest,
|
||||||
|
VoiceSessionFinalizeResponse,
|
||||||
|
VoiceSessionSummaryResponse,
|
||||||
|
VoiceTurnAcceptedResponse,
|
||||||
|
VoiceTurnCreateFallbackRequest,
|
||||||
|
VoiceTurnSummaryResponse,
|
||||||
|
)
|
||||||
|
from app.services.adapters.text.models import StoryOutput
|
||||||
|
from app.services.memory_service import build_enhanced_memory_context
|
||||||
|
from app.services.provider_router import generate_story_content, text_to_speech
|
||||||
|
from app.services.story_service import (
|
||||||
|
create_story_from_result,
|
||||||
|
validate_profile_and_universe,
|
||||||
|
)
|
||||||
|
from app.services.voice_session_storage import (
|
||||||
|
build_turn_assistant_audio_path,
|
||||||
|
read_session_audio,
|
||||||
|
session_audio_exists,
|
||||||
|
write_session_audio,
|
||||||
|
)
|
||||||
|
|
||||||
|
logger = get_logger(__name__)
|
||||||
|
|
||||||
|
ACTIVE_SESSION_STATUSES = {"draft", "active", "processing_turn", "waiting_user"}
|
||||||
|
CONTINUABLE_SESSION_STATUSES = {"draft", "active", "waiting_user"}
|
||||||
|
FINAL_SESSION_STATUSES = {"completed", "abandoned"}
|
||||||
|
|
||||||
|
|
||||||
|
def _default_story_state() -> dict[str, Any]:
|
||||||
|
return {
|
||||||
|
"premise": None,
|
||||||
|
"latest_direction": None,
|
||||||
|
"cover_prompt": None,
|
||||||
|
"narrative_segments": [],
|
||||||
|
"safety_flags": [],
|
||||||
|
"last_intent": None,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _session_can_continue(session: VoiceSession) -> bool:
|
||||||
|
return session.status in CONTINUABLE_SESSION_STATUSES
|
||||||
|
|
||||||
|
|
||||||
|
def _session_can_finalize(session: VoiceSession) -> bool:
|
||||||
|
segments = list((session.story_state or {}).get("narrative_segments") or [])
|
||||||
|
return bool(segments) and session.status in {"active", "waiting_user"}
|
||||||
|
|
||||||
|
|
||||||
|
def _assistant_audio_url(session_id: str, turn_id: str, audio_path: str | None) -> str | None:
|
||||||
|
if not session_audio_exists(audio_path):
|
||||||
|
return None
|
||||||
|
return f"/api/voice-sessions/{session_id}/turns/{turn_id}/audio"
|
||||||
|
|
||||||
|
|
||||||
|
def _turn_to_summary(turn: VoiceTurn) -> VoiceTurnSummaryResponse:
|
||||||
|
return VoiceTurnSummaryResponse(
|
||||||
|
id=turn.id,
|
||||||
|
session_id=turn.session_id,
|
||||||
|
turn_index=turn.turn_index,
|
||||||
|
status=turn.status,
|
||||||
|
user_transcript=turn.user_transcript,
|
||||||
|
transcript_confidence=turn.transcript_confidence,
|
||||||
|
detected_intent=turn.detected_intent,
|
||||||
|
intent_confidence=turn.intent_confidence,
|
||||||
|
assistant_text=turn.assistant_text,
|
||||||
|
assistant_audio_ready=session_audio_exists(turn.assistant_audio_path),
|
||||||
|
assistant_audio_url=_assistant_audio_url(
|
||||||
|
turn.session_id,
|
||||||
|
turn.id,
|
||||||
|
turn.assistant_audio_path,
|
||||||
|
),
|
||||||
|
error_message=turn.error_message,
|
||||||
|
created_at=turn.created_at,
|
||||||
|
updated_at=turn.updated_at,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _session_to_summary(session: VoiceSession) -> VoiceSessionSummaryResponse:
|
||||||
|
return VoiceSessionSummaryResponse(
|
||||||
|
id=session.id,
|
||||||
|
child_profile_id=session.child_profile_id,
|
||||||
|
universe_id=session.universe_id,
|
||||||
|
final_story_id=session.final_story_id,
|
||||||
|
target_mode=session.target_mode,
|
||||||
|
status=session.status,
|
||||||
|
current_turn_index=session.current_turn_index,
|
||||||
|
working_title=session.working_title,
|
||||||
|
story_state=session.story_state or {},
|
||||||
|
latest_user_transcript=session.latest_user_transcript,
|
||||||
|
latest_assistant_text=session.latest_assistant_text,
|
||||||
|
can_continue=_session_can_continue(session),
|
||||||
|
can_finalize=_session_can_finalize(session),
|
||||||
|
last_error=session.last_error,
|
||||||
|
created_at=session.created_at,
|
||||||
|
updated_at=session.updated_at,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
async def _record_session_event(
|
||||||
|
db: AsyncSession,
|
||||||
|
*,
|
||||||
|
session_id: str,
|
||||||
|
turn_id: str | None,
|
||||||
|
event_type: str,
|
||||||
|
status: str,
|
||||||
|
message: str | None = None,
|
||||||
|
metadata: dict[str, Any] | None = None,
|
||||||
|
) -> VoiceSessionEvent:
|
||||||
|
event = VoiceSessionEvent(
|
||||||
|
session_id=session_id,
|
||||||
|
turn_id=turn_id,
|
||||||
|
event_type=event_type,
|
||||||
|
status=status,
|
||||||
|
message=message,
|
||||||
|
event_metadata=metadata or {},
|
||||||
|
)
|
||||||
|
db.add(event)
|
||||||
|
await db.commit()
|
||||||
|
await db.refresh(event)
|
||||||
|
return event
|
||||||
|
|
||||||
|
|
||||||
|
async def _get_owned_session(
|
||||||
|
db: AsyncSession,
|
||||||
|
*,
|
||||||
|
session_id: str,
|
||||||
|
user_id: str,
|
||||||
|
) -> VoiceSession:
|
||||||
|
result = await db.execute(
|
||||||
|
select(VoiceSession).where(
|
||||||
|
VoiceSession.id == session_id,
|
||||||
|
VoiceSession.user_id == user_id,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
session = result.scalar_one_or_none()
|
||||||
|
if not session:
|
||||||
|
raise HTTPException(status_code=404, detail="Voice session not found")
|
||||||
|
return session
|
||||||
|
|
||||||
|
|
||||||
|
async def _get_owned_turn(
|
||||||
|
db: AsyncSession,
|
||||||
|
*,
|
||||||
|
session_id: str,
|
||||||
|
turn_id: str,
|
||||||
|
user_id: str,
|
||||||
|
) -> VoiceTurn:
|
||||||
|
result = await db.execute(
|
||||||
|
select(VoiceTurn)
|
||||||
|
.join(VoiceSession, VoiceTurn.session_id == VoiceSession.id)
|
||||||
|
.where(
|
||||||
|
VoiceTurn.id == turn_id,
|
||||||
|
VoiceTurn.session_id == session_id,
|
||||||
|
VoiceSession.user_id == user_id,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
turn = result.scalar_one_or_none()
|
||||||
|
if not turn:
|
||||||
|
raise HTTPException(status_code=404, detail="Voice turn not found")
|
||||||
|
return turn
|
||||||
|
|
||||||
|
|
||||||
|
def _detect_intent(
|
||||||
|
transcript_text: str,
|
||||||
|
*,
|
||||||
|
current_turn_index: int,
|
||||||
|
) -> tuple[str, float]:
|
||||||
|
normalized = transcript_text.replace(" ", "")
|
||||||
|
|
||||||
|
if any(keyword in normalized for keyword in ("保存故事", "存起来", "保存吧", "保存到故事库")):
|
||||||
|
return "save_story", 0.95
|
||||||
|
if any(keyword in normalized for keyword in ("先到这里", "讲完了", "结束吧", "停在这里")):
|
||||||
|
return "end_story", 0.88
|
||||||
|
if current_turn_index == 0:
|
||||||
|
return "start_story", 0.82
|
||||||
|
if any(
|
||||||
|
keyword in normalized
|
||||||
|
for keyword in (
|
||||||
|
"不要",
|
||||||
|
"改成",
|
||||||
|
"换成",
|
||||||
|
"我想让",
|
||||||
|
"让它",
|
||||||
|
"改一下",
|
||||||
|
"改一改",
|
||||||
|
"其实",
|
||||||
|
)
|
||||||
|
):
|
||||||
|
return "correct_story", 0.76
|
||||||
|
return "continue_story", 0.68
|
||||||
|
|
||||||
|
|
||||||
|
def _recent_story_text(session: VoiceSession) -> str:
|
||||||
|
story_state = session.story_state or {}
|
||||||
|
segments = list(story_state.get("narrative_segments") or [])
|
||||||
|
if not segments:
|
||||||
|
return ""
|
||||||
|
return "\n\n".join(segments[-2:])
|
||||||
|
|
||||||
|
|
||||||
|
def _build_generation_prompt(
|
||||||
|
*,
|
||||||
|
session: VoiceSession,
|
||||||
|
transcript_text: str,
|
||||||
|
intent: str,
|
||||||
|
) -> str:
|
||||||
|
recent_story = _recent_story_text(session)
|
||||||
|
|
||||||
|
if intent == "start_story":
|
||||||
|
return (
|
||||||
|
"你是 DreamWeaver 的儿童故事共创助手。"
|
||||||
|
"请为 3-8 岁儿童写一个温暖、安全、适合继续接龙的故事开头。"
|
||||||
|
f"孩子刚刚说:{transcript_text}。"
|
||||||
|
"请只输出一小段自然的中文故事,不要分点,不要解释,不要写“故事开始”。"
|
||||||
|
)
|
||||||
|
|
||||||
|
if intent == "correct_story":
|
||||||
|
return (
|
||||||
|
"你是 DreamWeaver 的儿童故事共创助手。"
|
||||||
|
f"当前故事最近两段如下:{recent_story or '(暂时还没有已讲述内容)'}。"
|
||||||
|
f"孩子希望修正故事走向:{transcript_text}。"
|
||||||
|
"请顺着已有内容自然接住这个修改,继续写一小段新故事。"
|
||||||
|
"不要从头重讲,不要解释规则。"
|
||||||
|
)
|
||||||
|
|
||||||
|
return (
|
||||||
|
"你是 DreamWeaver 的儿童故事共创助手。"
|
||||||
|
f"当前故事最近两段如下:{recent_story or '(暂时还没有已讲述内容)'}。"
|
||||||
|
f"孩子接着说:{transcript_text}。"
|
||||||
|
"请继续写一小段新的儿童故事内容,让故事自然往下发展。"
|
||||||
|
"不要分点,不要做旁白说明。"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
async def _generate_assistant_turn(
|
||||||
|
db: AsyncSession,
|
||||||
|
*,
|
||||||
|
session: VoiceSession,
|
||||||
|
transcript_text: str,
|
||||||
|
intent: str,
|
||||||
|
) -> StoryOutput:
|
||||||
|
memory_context = await build_enhanced_memory_context(
|
||||||
|
session.child_profile_id,
|
||||||
|
session.universe_id,
|
||||||
|
db,
|
||||||
|
)
|
||||||
|
prompt = _build_generation_prompt(
|
||||||
|
session=session,
|
||||||
|
transcript_text=transcript_text,
|
||||||
|
intent=intent,
|
||||||
|
)
|
||||||
|
return await generate_story_content(
|
||||||
|
input_type="full_story",
|
||||||
|
data=prompt,
|
||||||
|
memory_context=memory_context,
|
||||||
|
db=db,
|
||||||
|
user_id=session.user_id,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _merge_story_state(
|
||||||
|
session: VoiceSession,
|
||||||
|
*,
|
||||||
|
transcript_text: str,
|
||||||
|
intent: str,
|
||||||
|
assistant_result: StoryOutput | None,
|
||||||
|
) -> tuple[dict[str, Any], dict[str, Any]]:
|
||||||
|
current_state = _default_story_state() | (session.story_state or {})
|
||||||
|
narrative_segments = list(current_state.get("narrative_segments") or [])
|
||||||
|
|
||||||
|
if intent == "start_story" and not current_state.get("premise"):
|
||||||
|
current_state["premise"] = transcript_text
|
||||||
|
|
||||||
|
if assistant_result and assistant_result.story_text:
|
||||||
|
narrative_segments.append(assistant_result.story_text.strip())
|
||||||
|
|
||||||
|
current_state["narrative_segments"] = narrative_segments
|
||||||
|
current_state["latest_direction"] = transcript_text
|
||||||
|
current_state["last_intent"] = intent
|
||||||
|
if assistant_result and assistant_result.cover_prompt_suggestion:
|
||||||
|
current_state["cover_prompt"] = assistant_result.cover_prompt_suggestion
|
||||||
|
|
||||||
|
patch = {
|
||||||
|
"intent": intent,
|
||||||
|
"transcript_text": transcript_text,
|
||||||
|
"segment_added": bool(assistant_result and assistant_result.story_text),
|
||||||
|
"working_title": assistant_result.title if assistant_result else session.working_title,
|
||||||
|
"cover_prompt": current_state.get("cover_prompt"),
|
||||||
|
"narrative_segments_count": len(narrative_segments),
|
||||||
|
}
|
||||||
|
return current_state, patch
|
||||||
|
|
||||||
|
|
||||||
|
async def create_voice_session_service(
|
||||||
|
request: VoiceSessionCreateRequest,
|
||||||
|
user_id: str,
|
||||||
|
db: AsyncSession,
|
||||||
|
) -> VoiceSessionSummaryResponse:
|
||||||
|
profile_id, universe_id = await validate_profile_and_universe(
|
||||||
|
request.child_profile_id,
|
||||||
|
request.universe_id,
|
||||||
|
user_id,
|
||||||
|
db,
|
||||||
|
)
|
||||||
|
|
||||||
|
session = VoiceSession(
|
||||||
|
user_id=user_id,
|
||||||
|
child_profile_id=profile_id,
|
||||||
|
universe_id=universe_id,
|
||||||
|
target_mode=request.target_mode,
|
||||||
|
status="draft",
|
||||||
|
story_state=_default_story_state(),
|
||||||
|
)
|
||||||
|
db.add(session)
|
||||||
|
await db.commit()
|
||||||
|
await db.refresh(session)
|
||||||
|
await _record_session_event(
|
||||||
|
db,
|
||||||
|
session_id=session.id,
|
||||||
|
turn_id=None,
|
||||||
|
event_type="session_created",
|
||||||
|
status="succeeded",
|
||||||
|
message="Voice co-creation session created.",
|
||||||
|
metadata={
|
||||||
|
"child_profile_id": session.child_profile_id,
|
||||||
|
"universe_id": session.universe_id,
|
||||||
|
"target_mode": session.target_mode,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
await db.refresh(session)
|
||||||
|
return _session_to_summary(session)
|
||||||
|
|
||||||
|
|
||||||
|
async def get_voice_session_detail_service(
|
||||||
|
session_id: str,
|
||||||
|
user_id: str,
|
||||||
|
db: AsyncSession,
|
||||||
|
) -> VoiceSessionDetailResponse:
|
||||||
|
session = await _get_owned_session(db, session_id=session_id, user_id=user_id)
|
||||||
|
turns = (
|
||||||
|
await db.execute(
|
||||||
|
select(VoiceTurn)
|
||||||
|
.where(VoiceTurn.session_id == session.id)
|
||||||
|
.order_by(desc(VoiceTurn.turn_index))
|
||||||
|
.limit(10)
|
||||||
|
)
|
||||||
|
).scalars().all()
|
||||||
|
turns = list(reversed(turns))
|
||||||
|
|
||||||
|
events = (
|
||||||
|
await db.execute(
|
||||||
|
select(VoiceSessionEvent)
|
||||||
|
.where(VoiceSessionEvent.session_id == session.id)
|
||||||
|
.order_by(desc(VoiceSessionEvent.id))
|
||||||
|
.limit(50)
|
||||||
|
)
|
||||||
|
).scalars().all()
|
||||||
|
events = list(reversed(events))
|
||||||
|
|
||||||
|
summary = _session_to_summary(session)
|
||||||
|
return VoiceSessionDetailResponse(
|
||||||
|
**summary.model_dump(),
|
||||||
|
recent_turns=[_turn_to_summary(turn) for turn in turns],
|
||||||
|
events=[
|
||||||
|
{
|
||||||
|
"id": event.id,
|
||||||
|
"session_id": event.session_id,
|
||||||
|
"turn_id": event.turn_id,
|
||||||
|
"event_type": event.event_type,
|
||||||
|
"status": event.status,
|
||||||
|
"message": event.message,
|
||||||
|
"event_metadata": event.event_metadata or {},
|
||||||
|
"created_at": event.created_at,
|
||||||
|
}
|
||||||
|
for event in events
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
async def create_voice_turn_from_text_service(
|
||||||
|
session_id: str,
|
||||||
|
request: VoiceTurnCreateFallbackRequest,
|
||||||
|
user_id: str,
|
||||||
|
db: AsyncSession,
|
||||||
|
) -> VoiceTurnAcceptedResponse:
|
||||||
|
session = await _get_owned_session(db, session_id=session_id, user_id=user_id)
|
||||||
|
if session.status not in CONTINUABLE_SESSION_STATUSES:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=409,
|
||||||
|
detail="Voice session is not ready for another turn.",
|
||||||
|
)
|
||||||
|
|
||||||
|
transcript_text = request.transcript_text.strip()
|
||||||
|
next_turn_index = session.current_turn_index + 1
|
||||||
|
detected_intent, intent_confidence = _detect_intent(
|
||||||
|
transcript_text,
|
||||||
|
current_turn_index=session.current_turn_index,
|
||||||
|
)
|
||||||
|
|
||||||
|
turn = VoiceTurn(
|
||||||
|
session_id=session.id,
|
||||||
|
turn_index=next_turn_index,
|
||||||
|
status="transcribing",
|
||||||
|
user_audio_duration_ms=request.duration_ms,
|
||||||
|
user_transcript=transcript_text,
|
||||||
|
transcript_confidence=1.0,
|
||||||
|
detected_intent=detected_intent,
|
||||||
|
intent_confidence=intent_confidence,
|
||||||
|
)
|
||||||
|
session.status = "processing_turn"
|
||||||
|
session.current_turn_index = next_turn_index
|
||||||
|
session.latest_user_transcript = transcript_text
|
||||||
|
session.last_error = None
|
||||||
|
db.add(turn)
|
||||||
|
await db.commit()
|
||||||
|
await db.refresh(session)
|
||||||
|
await db.refresh(turn)
|
||||||
|
|
||||||
|
await _record_session_event(
|
||||||
|
db,
|
||||||
|
session_id=session.id,
|
||||||
|
turn_id=turn.id,
|
||||||
|
event_type="turn_received",
|
||||||
|
status="received",
|
||||||
|
message="Voice turn fallback text received.",
|
||||||
|
metadata={"turn_index": turn.turn_index},
|
||||||
|
)
|
||||||
|
await _record_session_event(
|
||||||
|
db,
|
||||||
|
session_id=session.id,
|
||||||
|
turn_id=turn.id,
|
||||||
|
event_type="turn_transcribed",
|
||||||
|
status="succeeded",
|
||||||
|
message="Fallback transcript accepted.",
|
||||||
|
metadata={"transcript_confidence": turn.transcript_confidence},
|
||||||
|
)
|
||||||
|
|
||||||
|
assistant_text: str | None = None
|
||||||
|
assistant_result: StoryOutput | None = None
|
||||||
|
|
||||||
|
try:
|
||||||
|
await _record_session_event(
|
||||||
|
db,
|
||||||
|
session_id=session.id,
|
||||||
|
turn_id=turn.id,
|
||||||
|
event_type="intent_resolved",
|
||||||
|
status="succeeded",
|
||||||
|
message="Turn intent resolved.",
|
||||||
|
metadata={
|
||||||
|
"detected_intent": detected_intent,
|
||||||
|
"intent_confidence": intent_confidence,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
if detected_intent == "save_story":
|
||||||
|
assistant_text = "好的,这个故事已经准备好保存到故事库了。"
|
||||||
|
elif detected_intent == "end_story":
|
||||||
|
assistant_text = "好的,我们先把故事停在这里。想保存的话,现在就可以保存到故事库。"
|
||||||
|
else:
|
||||||
|
assistant_result = await _generate_assistant_turn(
|
||||||
|
db,
|
||||||
|
session=session,
|
||||||
|
transcript_text=transcript_text,
|
||||||
|
intent=detected_intent,
|
||||||
|
)
|
||||||
|
assistant_text = assistant_result.story_text.strip()
|
||||||
|
|
||||||
|
merged_state, story_patch = _merge_story_state(
|
||||||
|
session,
|
||||||
|
transcript_text=transcript_text,
|
||||||
|
intent=detected_intent,
|
||||||
|
assistant_result=assistant_result,
|
||||||
|
)
|
||||||
|
turn.story_patch = story_patch
|
||||||
|
turn.assistant_text = assistant_text
|
||||||
|
turn.status = "narrative_ready"
|
||||||
|
session.story_state = merged_state
|
||||||
|
session.latest_assistant_text = assistant_text
|
||||||
|
session.status = "waiting_user"
|
||||||
|
if assistant_result and assistant_result.title and not session.working_title:
|
||||||
|
session.working_title = assistant_result.title
|
||||||
|
await db.commit()
|
||||||
|
await db.refresh(session)
|
||||||
|
await db.refresh(turn)
|
||||||
|
|
||||||
|
await _record_session_event(
|
||||||
|
db,
|
||||||
|
session_id=session.id,
|
||||||
|
turn_id=turn.id,
|
||||||
|
event_type="story_patch_applied",
|
||||||
|
status="succeeded",
|
||||||
|
message="Story state updated after one turn.",
|
||||||
|
metadata=story_patch,
|
||||||
|
)
|
||||||
|
await _record_session_event(
|
||||||
|
db,
|
||||||
|
session_id=session.id,
|
||||||
|
turn_id=turn.id,
|
||||||
|
event_type="assistant_text_ready",
|
||||||
|
status="succeeded",
|
||||||
|
message="Assistant text response generated.",
|
||||||
|
metadata={
|
||||||
|
"assistant_text_length": len(assistant_text or ""),
|
||||||
|
"working_title": session.working_title,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
except Exception as exc:
|
||||||
|
turn.status = "failed"
|
||||||
|
turn.error_message = str(exc)
|
||||||
|
session.status = "waiting_user"
|
||||||
|
session.last_error = str(exc)
|
||||||
|
await db.commit()
|
||||||
|
await db.refresh(session)
|
||||||
|
await db.refresh(turn)
|
||||||
|
await _record_session_event(
|
||||||
|
db,
|
||||||
|
session_id=session.id,
|
||||||
|
turn_id=turn.id,
|
||||||
|
event_type="session_failed",
|
||||||
|
status="failed",
|
||||||
|
message="Assistant narrative generation failed for one voice turn.",
|
||||||
|
metadata={"error": str(exc), "turn_index": turn.turn_index},
|
||||||
|
)
|
||||||
|
logger.warning(
|
||||||
|
"voice_turn_generation_failed",
|
||||||
|
session_id=session.id,
|
||||||
|
turn_id=turn.id,
|
||||||
|
error=str(exc),
|
||||||
|
)
|
||||||
|
return VoiceTurnAcceptedResponse(
|
||||||
|
turn_id=turn.id,
|
||||||
|
session_id=session.id,
|
||||||
|
status=turn.status,
|
||||||
|
)
|
||||||
|
|
||||||
|
if assistant_text:
|
||||||
|
try:
|
||||||
|
audio_bytes = await text_to_speech(
|
||||||
|
assistant_text,
|
||||||
|
db=db,
|
||||||
|
user_id=user_id,
|
||||||
|
)
|
||||||
|
saved_path = write_session_audio(
|
||||||
|
build_turn_assistant_audio_path(session.id, turn.turn_index),
|
||||||
|
audio_bytes,
|
||||||
|
)
|
||||||
|
turn.assistant_audio_path = saved_path
|
||||||
|
turn.assistant_audio_duration_ms = None
|
||||||
|
turn.status = "audio_ready"
|
||||||
|
await db.commit()
|
||||||
|
await db.refresh(turn)
|
||||||
|
await _record_session_event(
|
||||||
|
db,
|
||||||
|
session_id=session.id,
|
||||||
|
turn_id=turn.id,
|
||||||
|
event_type="assistant_audio_ready",
|
||||||
|
status="succeeded",
|
||||||
|
message="Assistant audio response generated.",
|
||||||
|
metadata={"audio_path": saved_path},
|
||||||
|
)
|
||||||
|
except Exception as exc:
|
||||||
|
turn.status = "narrative_ready"
|
||||||
|
turn.error_message = None
|
||||||
|
session.last_error = None
|
||||||
|
await db.commit()
|
||||||
|
await db.refresh(turn)
|
||||||
|
await db.refresh(session)
|
||||||
|
await _record_session_event(
|
||||||
|
db,
|
||||||
|
session_id=session.id,
|
||||||
|
turn_id=turn.id,
|
||||||
|
event_type="assistant_audio_failed",
|
||||||
|
status="failed",
|
||||||
|
message="Assistant audio generation failed, text response kept.",
|
||||||
|
metadata={"error": str(exc)},
|
||||||
|
)
|
||||||
|
logger.warning(
|
||||||
|
"voice_turn_audio_failed",
|
||||||
|
session_id=session.id,
|
||||||
|
turn_id=turn.id,
|
||||||
|
error=str(exc),
|
||||||
|
)
|
||||||
|
|
||||||
|
return VoiceTurnAcceptedResponse(
|
||||||
|
turn_id=turn.id,
|
||||||
|
session_id=session.id,
|
||||||
|
status=turn.status,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
async def get_voice_turn_service(
|
||||||
|
session_id: str,
|
||||||
|
turn_id: str,
|
||||||
|
user_id: str,
|
||||||
|
db: AsyncSession,
|
||||||
|
) -> VoiceTurnSummaryResponse:
|
||||||
|
turn = await _get_owned_turn(
|
||||||
|
db,
|
||||||
|
session_id=session_id,
|
||||||
|
turn_id=turn_id,
|
||||||
|
user_id=user_id,
|
||||||
|
)
|
||||||
|
return _turn_to_summary(turn)
|
||||||
|
|
||||||
|
|
||||||
|
async def get_voice_turn_audio_service(
|
||||||
|
session_id: str,
|
||||||
|
turn_id: str,
|
||||||
|
user_id: str,
|
||||||
|
db: AsyncSession,
|
||||||
|
) -> bytes:
|
||||||
|
turn = await _get_owned_turn(
|
||||||
|
db,
|
||||||
|
session_id=session_id,
|
||||||
|
turn_id=turn_id,
|
||||||
|
user_id=user_id,
|
||||||
|
)
|
||||||
|
if not session_audio_exists(turn.assistant_audio_path):
|
||||||
|
raise HTTPException(status_code=404, detail="Voice turn audio not found")
|
||||||
|
return read_session_audio(turn.assistant_audio_path)
|
||||||
|
|
||||||
|
|
||||||
|
async def finalize_voice_session_service(
|
||||||
|
session_id: str,
|
||||||
|
request: VoiceSessionFinalizeRequest,
|
||||||
|
user_id: str,
|
||||||
|
db: AsyncSession,
|
||||||
|
) -> VoiceSessionFinalizeResponse:
|
||||||
|
if not request.save_story:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=400,
|
||||||
|
detail="Voice session finalize requires save_story=true in Phase A.",
|
||||||
|
)
|
||||||
|
|
||||||
|
session = await _get_owned_session(db, session_id=session_id, user_id=user_id)
|
||||||
|
if session.status in FINAL_SESSION_STATUSES:
|
||||||
|
raise HTTPException(status_code=409, detail="Voice session is already closed.")
|
||||||
|
if not _session_can_finalize(session):
|
||||||
|
raise HTTPException(status_code=409, detail="Voice session is not ready to finalize.")
|
||||||
|
|
||||||
|
session.status = "finalizing_story"
|
||||||
|
await db.commit()
|
||||||
|
await db.refresh(session)
|
||||||
|
await _record_session_event(
|
||||||
|
db,
|
||||||
|
session_id=session.id,
|
||||||
|
turn_id=None,
|
||||||
|
event_type="session_finalizing",
|
||||||
|
status="running",
|
||||||
|
message="Voice session is being finalized into a story.",
|
||||||
|
metadata={"generate_cover": request.generate_cover},
|
||||||
|
)
|
||||||
|
|
||||||
|
story_state = session.story_state or {}
|
||||||
|
narrative_segments = list(story_state.get("narrative_segments") or [])
|
||||||
|
final_story_text = "\n\n".join(
|
||||||
|
segment.strip() for segment in narrative_segments if segment.strip()
|
||||||
|
)
|
||||||
|
if not final_story_text:
|
||||||
|
raise HTTPException(status_code=409, detail="Voice session has no narrative to save.")
|
||||||
|
|
||||||
|
story_result = StoryOutput(
|
||||||
|
mode="generated",
|
||||||
|
title=session.working_title or "一起编织的睡前故事",
|
||||||
|
story_text=final_story_text,
|
||||||
|
cover_prompt_suggestion=(
|
||||||
|
(story_state.get("cover_prompt") or "") if request.generate_cover else ""
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
story = await create_story_from_result(
|
||||||
|
result=story_result,
|
||||||
|
user_id=user_id,
|
||||||
|
profile_id=session.child_profile_id,
|
||||||
|
universe_id=session.universe_id,
|
||||||
|
db=db,
|
||||||
|
)
|
||||||
|
|
||||||
|
session.final_story_id = story.id
|
||||||
|
session.status = "completed"
|
||||||
|
session.last_error = None
|
||||||
|
await db.commit()
|
||||||
|
await db.refresh(session)
|
||||||
|
|
||||||
|
await _record_session_event(
|
||||||
|
db,
|
||||||
|
session_id=session.id,
|
||||||
|
turn_id=None,
|
||||||
|
event_type="session_saved_as_story",
|
||||||
|
status="succeeded",
|
||||||
|
message="Voice session finalized into a story.",
|
||||||
|
metadata={"story_id": story.id},
|
||||||
|
)
|
||||||
|
|
||||||
|
return VoiceSessionFinalizeResponse(
|
||||||
|
session_id=session.id,
|
||||||
|
status=session.status,
|
||||||
|
story_id=story.id,
|
||||||
|
generation_job_id=None,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
async def abandon_voice_session_service(
|
||||||
|
session_id: str,
|
||||||
|
request: VoiceSessionAbandonRequest,
|
||||||
|
user_id: str,
|
||||||
|
db: AsyncSession,
|
||||||
|
) -> VoiceSessionSummaryResponse:
|
||||||
|
session = await _get_owned_session(db, session_id=session_id, user_id=user_id)
|
||||||
|
if session.status in FINAL_SESSION_STATUSES:
|
||||||
|
raise HTTPException(status_code=409, detail="Voice session is already closed.")
|
||||||
|
|
||||||
|
session.status = "abandoned"
|
||||||
|
session.last_error = request.reason
|
||||||
|
await db.commit()
|
||||||
|
await db.refresh(session)
|
||||||
|
|
||||||
|
await _record_session_event(
|
||||||
|
db,
|
||||||
|
session_id=session.id,
|
||||||
|
turn_id=None,
|
||||||
|
event_type="session_abandoned",
|
||||||
|
status="succeeded",
|
||||||
|
message="Voice session abandoned by the user.",
|
||||||
|
metadata={"reason": request.reason},
|
||||||
|
)
|
||||||
|
await db.refresh(session)
|
||||||
|
return _session_to_summary(session)
|
||||||
48
backend/app/services/voice_session_storage.py
Normal file
48
backend/app/services/voice_session_storage.py
Normal file
@@ -0,0 +1,48 @@
|
|||||||
|
"""Voice co-creation session storage helpers."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from app.core.config import settings
|
||||||
|
|
||||||
|
|
||||||
|
def session_storage_dir(session_id: str) -> Path:
|
||||||
|
"""Return the storage directory for one voice session."""
|
||||||
|
|
||||||
|
return Path(settings.voice_session_storage_dir) / session_id
|
||||||
|
|
||||||
|
|
||||||
|
def build_turn_user_audio_path(session_id: str, turn_index: int, suffix: str) -> Path:
|
||||||
|
"""Build the persisted path for one user-uploaded turn audio file."""
|
||||||
|
|
||||||
|
normalized_suffix = suffix.lstrip(".") or "webm"
|
||||||
|
return session_storage_dir(session_id) / f"turn-{turn_index:03d}-user.{normalized_suffix}"
|
||||||
|
|
||||||
|
|
||||||
|
def build_turn_assistant_audio_path(session_id: str, turn_index: int) -> Path:
|
||||||
|
"""Build the persisted path for one generated assistant turn audio file."""
|
||||||
|
|
||||||
|
return session_storage_dir(session_id) / f"turn-{turn_index:03d}-assistant.mp3"
|
||||||
|
|
||||||
|
|
||||||
|
def write_session_audio(path: Path, audio_data: bytes) -> str:
|
||||||
|
"""Persist session audio bytes atomically and return the saved path."""
|
||||||
|
|
||||||
|
path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
temp_path = path.with_suffix(f"{path.suffix}.tmp")
|
||||||
|
temp_path.write_bytes(audio_data)
|
||||||
|
temp_path.replace(path)
|
||||||
|
return str(path)
|
||||||
|
|
||||||
|
|
||||||
|
def read_session_audio(audio_path: str) -> bytes:
|
||||||
|
"""Read persisted session audio bytes."""
|
||||||
|
|
||||||
|
return Path(audio_path).read_bytes()
|
||||||
|
|
||||||
|
|
||||||
|
def session_audio_exists(audio_path: str | None) -> bool:
|
||||||
|
"""Whether one stored session audio file currently exists."""
|
||||||
|
|
||||||
|
return bool(audio_path) and Path(audio_path).is_file()
|
||||||
@@ -187,6 +187,18 @@ def isolated_story_audio_cache(tmp_path, monkeypatch):
|
|||||||
yield
|
yield
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(autouse=True)
|
||||||
|
def isolated_voice_session_storage(tmp_path, monkeypatch):
|
||||||
|
"""Use an isolated directory for voice session assets."""
|
||||||
|
|
||||||
|
monkeypatch.setattr(
|
||||||
|
settings,
|
||||||
|
"voice_session_storage_dir",
|
||||||
|
str(tmp_path / "voice_sessions"),
|
||||||
|
)
|
||||||
|
yield
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def mock_text_provider():
|
def mock_text_provider():
|
||||||
"""Mock text generation."""
|
"""Mock text generation."""
|
||||||
|
|||||||
201
backend/tests/test_voice_sessions.py
Normal file
201
backend/tests/test_voice_sessions.py
Normal file
@@ -0,0 +1,201 @@
|
|||||||
|
from unittest.mock import AsyncMock, patch
|
||||||
|
|
||||||
|
from httpx import ASGITransport, AsyncClient
|
||||||
|
|
||||||
|
from app.db.database import get_db
|
||||||
|
from app.main import app
|
||||||
|
from app.services.adapters.text.models import StoryOutput
|
||||||
|
|
||||||
|
|
||||||
|
async def test_voice_session_create_and_fallback_turn_returns_audio(
|
||||||
|
db_session,
|
||||||
|
auth_token,
|
||||||
|
):
|
||||||
|
async def override_get_db():
|
||||||
|
yield db_session
|
||||||
|
|
||||||
|
app.dependency_overrides[get_db] = override_get_db
|
||||||
|
|
||||||
|
with (
|
||||||
|
patch(
|
||||||
|
"app.services.voice_session_service.generate_story_content",
|
||||||
|
new_callable=AsyncMock,
|
||||||
|
) as mock_generate,
|
||||||
|
patch(
|
||||||
|
"app.services.voice_session_service.text_to_speech",
|
||||||
|
new_callable=AsyncMock,
|
||||||
|
) as mock_tts,
|
||||||
|
):
|
||||||
|
mock_generate.return_value = StoryOutput(
|
||||||
|
mode="generated",
|
||||||
|
title="小猫去太空",
|
||||||
|
story_text="小猫跳上纸飞机,朝着月亮轻轻挥手。",
|
||||||
|
cover_prompt_suggestion="温暖儿童绘本封面,小猫与月亮",
|
||||||
|
)
|
||||||
|
mock_tts.return_value = b"fake-turn-audio"
|
||||||
|
|
||||||
|
transport = ASGITransport(app=app)
|
||||||
|
try:
|
||||||
|
async with AsyncClient(transport=transport, base_url="http://test") as client:
|
||||||
|
client.cookies.set("access_token", auth_token)
|
||||||
|
|
||||||
|
response = await client.post("/api/voice-sessions", json={})
|
||||||
|
assert response.status_code == 201
|
||||||
|
session_id = response.json()["id"]
|
||||||
|
|
||||||
|
response = await client.post(
|
||||||
|
f"/api/voice-sessions/{session_id}/turns/fallback",
|
||||||
|
json={"transcript_text": "我想听一个小猫去太空的故事"},
|
||||||
|
)
|
||||||
|
assert response.status_code == 202
|
||||||
|
turn_id = response.json()["turn_id"]
|
||||||
|
|
||||||
|
response = await client.get(
|
||||||
|
f"/api/voice-sessions/{session_id}/turns/{turn_id}"
|
||||||
|
)
|
||||||
|
assert response.status_code == 200
|
||||||
|
turn_data = response.json()
|
||||||
|
assert turn_data["status"] == "audio_ready"
|
||||||
|
assert turn_data["detected_intent"] == "start_story"
|
||||||
|
assert turn_data["assistant_audio_ready"] is True
|
||||||
|
assert turn_data["assistant_audio_url"].endswith("/audio")
|
||||||
|
|
||||||
|
response = await client.get(turn_data["assistant_audio_url"])
|
||||||
|
assert response.status_code == 200
|
||||||
|
assert response.content == b"fake-turn-audio"
|
||||||
|
assert response.headers["content-type"] == "audio/mpeg"
|
||||||
|
|
||||||
|
response = await client.get(f"/api/voice-sessions/{session_id}")
|
||||||
|
assert response.status_code == 200
|
||||||
|
session_data = response.json()
|
||||||
|
assert session_data["status"] == "waiting_user"
|
||||||
|
assert session_data["working_title"] == "小猫去太空"
|
||||||
|
assert session_data["can_continue"] is True
|
||||||
|
assert session_data["can_finalize"] is True
|
||||||
|
assert len(session_data["recent_turns"]) == 1
|
||||||
|
finally:
|
||||||
|
app.dependency_overrides.clear()
|
||||||
|
|
||||||
|
|
||||||
|
async def test_voice_session_correct_turn_and_finalize_to_story(
|
||||||
|
db_session,
|
||||||
|
auth_token,
|
||||||
|
):
|
||||||
|
async def override_get_db():
|
||||||
|
yield db_session
|
||||||
|
|
||||||
|
app.dependency_overrides[get_db] = override_get_db
|
||||||
|
|
||||||
|
with (
|
||||||
|
patch(
|
||||||
|
"app.services.voice_session_service.generate_story_content",
|
||||||
|
new_callable=AsyncMock,
|
||||||
|
) as mock_generate,
|
||||||
|
patch(
|
||||||
|
"app.services.voice_session_service.text_to_speech",
|
||||||
|
new_callable=AsyncMock,
|
||||||
|
) as mock_tts,
|
||||||
|
):
|
||||||
|
mock_generate.side_effect = [
|
||||||
|
StoryOutput(
|
||||||
|
mode="generated",
|
||||||
|
title="小猫去太空",
|
||||||
|
story_text="第一段故事:小猫坐着纸飞机飞向月亮。",
|
||||||
|
cover_prompt_suggestion="温暖儿童绘本封面,小猫飞向月亮",
|
||||||
|
),
|
||||||
|
StoryOutput(
|
||||||
|
mode="generated",
|
||||||
|
title="小猫去太空",
|
||||||
|
story_text="第二段故事:它在月亮上遇见了会发光的新朋友。",
|
||||||
|
cover_prompt_suggestion="温暖儿童绘本封面,小猫与月亮朋友",
|
||||||
|
),
|
||||||
|
]
|
||||||
|
mock_tts.side_effect = [b"turn-1-audio", b"turn-2-audio"]
|
||||||
|
|
||||||
|
transport = ASGITransport(app=app)
|
||||||
|
try:
|
||||||
|
async with AsyncClient(transport=transport, base_url="http://test") as client:
|
||||||
|
client.cookies.set("access_token", auth_token)
|
||||||
|
|
||||||
|
response = await client.post("/api/voice-sessions", json={})
|
||||||
|
assert response.status_code == 201
|
||||||
|
session_id = response.json()["id"]
|
||||||
|
|
||||||
|
response = await client.post(
|
||||||
|
f"/api/voice-sessions/{session_id}/turns/fallback",
|
||||||
|
json={"transcript_text": "我想听一个小猫去太空的故事"},
|
||||||
|
)
|
||||||
|
assert response.status_code == 202
|
||||||
|
|
||||||
|
response = await client.post(
|
||||||
|
f"/api/voice-sessions/{session_id}/turns/fallback",
|
||||||
|
json={"transcript_text": "不要让它哭了,我想让它找到一个朋友"},
|
||||||
|
)
|
||||||
|
assert response.status_code == 202
|
||||||
|
turn_id = response.json()["turn_id"]
|
||||||
|
|
||||||
|
response = await client.get(
|
||||||
|
f"/api/voice-sessions/{session_id}/turns/{turn_id}"
|
||||||
|
)
|
||||||
|
assert response.status_code == 200
|
||||||
|
assert response.json()["detected_intent"] == "correct_story"
|
||||||
|
|
||||||
|
response = await client.post(
|
||||||
|
f"/api/voice-sessions/{session_id}/finalize",
|
||||||
|
json={"save_story": True, "generate_cover": True},
|
||||||
|
)
|
||||||
|
assert response.status_code == 200
|
||||||
|
finalize_data = response.json()
|
||||||
|
story_id = finalize_data["story_id"]
|
||||||
|
assert finalize_data["status"] == "completed"
|
||||||
|
|
||||||
|
response = await client.get(f"/api/stories/{story_id}")
|
||||||
|
assert response.status_code == 200
|
||||||
|
story_data = response.json()
|
||||||
|
assert story_data["title"] == "小猫去太空"
|
||||||
|
assert "第一段故事" in story_data["story_text"]
|
||||||
|
assert "第二段故事" in story_data["story_text"]
|
||||||
|
assert story_data["generation_status"] == "partial_ready"
|
||||||
|
|
||||||
|
response = await client.get(f"/api/voice-sessions/{session_id}")
|
||||||
|
assert response.status_code == 200
|
||||||
|
session_data = response.json()
|
||||||
|
assert session_data["status"] == "completed"
|
||||||
|
assert session_data["final_story_id"] == story_id
|
||||||
|
assert session_data["can_continue"] is False
|
||||||
|
finally:
|
||||||
|
app.dependency_overrides.clear()
|
||||||
|
|
||||||
|
|
||||||
|
async def test_voice_session_abandon_blocks_future_turns(
|
||||||
|
db_session,
|
||||||
|
auth_token,
|
||||||
|
):
|
||||||
|
async def override_get_db():
|
||||||
|
yield db_session
|
||||||
|
|
||||||
|
app.dependency_overrides[get_db] = override_get_db
|
||||||
|
|
||||||
|
transport = ASGITransport(app=app)
|
||||||
|
try:
|
||||||
|
async with AsyncClient(transport=transport, base_url="http://test") as client:
|
||||||
|
client.cookies.set("access_token", auth_token)
|
||||||
|
|
||||||
|
response = await client.post("/api/voice-sessions", json={})
|
||||||
|
assert response.status_code == 201
|
||||||
|
session_id = response.json()["id"]
|
||||||
|
|
||||||
|
response = await client.post(
|
||||||
|
f"/api/voice-sessions/{session_id}/abandon",
|
||||||
|
json={"reason": "孩子先去吃饭了"},
|
||||||
|
)
|
||||||
|
assert response.status_code == 200
|
||||||
|
assert response.json()["status"] == "abandoned"
|
||||||
|
|
||||||
|
response = await client.post(
|
||||||
|
f"/api/voice-sessions/{session_id}/turns/fallback",
|
||||||
|
json={"transcript_text": "我们继续讲吧"},
|
||||||
|
)
|
||||||
|
assert response.status_code == 409
|
||||||
|
finally:
|
||||||
|
app.dependency_overrides.clear()
|
||||||
Reference in New Issue
Block a user