feat: add voice studio prototype flow
This commit is contained in:
@@ -1,6 +1,15 @@
|
||||
"""Voice co-creation session APIs."""
|
||||
|
||||
from fastapi import APIRouter, Depends, Response, status
|
||||
from fastapi import (
|
||||
APIRouter,
|
||||
Depends,
|
||||
File,
|
||||
Form,
|
||||
Query,
|
||||
Response,
|
||||
UploadFile,
|
||||
status,
|
||||
)
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from app.core.deps import require_user
|
||||
@@ -17,15 +26,19 @@ from app.schemas.voice_session_schemas import (
|
||||
VoiceTurnAcceptedResponse,
|
||||
VoiceTurnCreateFallbackRequest,
|
||||
VoiceTurnSummaryResponse,
|
||||
VoiceTurnUploadAcceptedResponse,
|
||||
)
|
||||
from app.services.voice_session_service import (
|
||||
abandon_voice_session_service,
|
||||
create_voice_session_service,
|
||||
create_voice_turn_from_text_service,
|
||||
create_voice_turn_from_upload_service,
|
||||
finalize_voice_session_service,
|
||||
get_voice_session_detail_service,
|
||||
get_voice_turn_audio_service,
|
||||
get_voice_turn_service,
|
||||
get_voice_turn_user_audio_service,
|
||||
list_voice_sessions_service,
|
||||
)
|
||||
|
||||
router = APIRouter()
|
||||
@@ -53,6 +66,22 @@ async def create_voice_session(
|
||||
return await create_voice_session_service(request, user.id, db)
|
||||
|
||||
|
||||
@router.get("/voice-sessions", response_model=list[VoiceSessionSummaryResponse])
|
||||
async def list_voice_sessions(
|
||||
limit: int = Query(default=8, ge=1, le=20),
|
||||
active_only: bool = Query(default=False),
|
||||
user: User = Depends(require_user),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
):
|
||||
"""List recent voice co-creation sessions for restore/resume behavior."""
|
||||
return await list_voice_sessions_service(
|
||||
user.id,
|
||||
db,
|
||||
limit=limit,
|
||||
active_only=active_only,
|
||||
)
|
||||
|
||||
|
||||
@router.get("/voice-sessions/{session_id}", response_model=VoiceSessionDetailResponse)
|
||||
async def get_voice_session(
|
||||
session_id: str,
|
||||
@@ -83,6 +112,38 @@ async def create_voice_turn_from_text(
|
||||
return await create_voice_turn_from_text_service(session_id, request, user.id, db)
|
||||
|
||||
|
||||
@router.post(
|
||||
"/voice-sessions/{session_id}/turns",
|
||||
response_model=VoiceTurnUploadAcceptedResponse,
|
||||
status_code=status.HTTP_202_ACCEPTED,
|
||||
)
|
||||
async def create_voice_turn_from_upload(
|
||||
session_id: str,
|
||||
audio_file: UploadFile = File(...),
|
||||
duration_ms: int | None = Form(default=None),
|
||||
transcript_hint: str | None = Form(default=None),
|
||||
user: User = Depends(require_user),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
):
|
||||
"""Create one turn from uploaded audio and configured ASR behavior."""
|
||||
await check_rate_limit(
|
||||
f"voice-turn:{user.id}",
|
||||
VOICE_SESSION_RATE_LIMIT_REQUESTS,
|
||||
VOICE_SESSION_RATE_LIMIT_WINDOW,
|
||||
)
|
||||
audio_bytes = await audio_file.read()
|
||||
return await create_voice_turn_from_upload_service(
|
||||
session_id=session_id,
|
||||
user_id=user.id,
|
||||
audio_bytes=audio_bytes,
|
||||
file_name=audio_file.filename or "voice-turn.webm",
|
||||
mime_type=audio_file.content_type,
|
||||
duration_ms=duration_ms,
|
||||
transcript_hint=transcript_hint,
|
||||
db=db,
|
||||
)
|
||||
|
||||
|
||||
@router.get(
|
||||
"/voice-sessions/{session_id}/turns/{turn_id}",
|
||||
response_model=VoiceTurnSummaryResponse,
|
||||
@@ -109,6 +170,23 @@ async def get_voice_turn_audio(
|
||||
return Response(content=audio_bytes, media_type="audio/mpeg")
|
||||
|
||||
|
||||
@router.get("/voice-sessions/{session_id}/turns/{turn_id}/user-audio")
|
||||
async def get_voice_turn_user_audio(
|
||||
session_id: str,
|
||||
turn_id: str,
|
||||
user: User = Depends(require_user),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
):
|
||||
"""Get uploaded user audio for one voice turn."""
|
||||
audio_bytes, mime_type = await get_voice_turn_user_audio_service(
|
||||
session_id,
|
||||
turn_id,
|
||||
user.id,
|
||||
db,
|
||||
)
|
||||
return Response(content=audio_bytes, media_type=mime_type)
|
||||
|
||||
|
||||
@router.post(
|
||||
"/voice-sessions/{session_id}/finalize",
|
||||
response_model=VoiceSessionFinalizeResponse,
|
||||
|
||||
@@ -70,6 +70,18 @@ class Settings(BaseSettings):
|
||||
"storage/voice_sessions",
|
||||
description="Directory for persisted voice co-creation session assets",
|
||||
)
|
||||
voice_transcription_mode: str = Field(
|
||||
"demo",
|
||||
description="Voice transcription mode: demo, openai, or disabled",
|
||||
)
|
||||
voice_transcription_model: str = Field(
|
||||
"gpt-4o-mini-transcribe",
|
||||
description="Model used when voice transcription mode is OpenAI-backed",
|
||||
)
|
||||
voice_transcription_language: str = Field(
|
||||
"zh",
|
||||
description="Preferred language hint for voice transcription",
|
||||
)
|
||||
story_audio_cache_ttl_days: int = Field(
|
||||
30,
|
||||
description="TTL in days before cached story audio is pruned",
|
||||
|
||||
@@ -25,6 +25,15 @@ class VoiceTurnCreateFallbackRequest(BaseModel):
|
||||
duration_ms: int | None = Field(default=None, ge=1, le=MAX_VOICE_TURN_DURATION_MS)
|
||||
|
||||
|
||||
class VoiceTurnUploadAcceptedResponse(BaseModel):
|
||||
"""Accepted response for one uploaded-audio voice turn."""
|
||||
|
||||
turn_id: str
|
||||
session_id: str
|
||||
status: str
|
||||
transcription_provider: str | None = None
|
||||
|
||||
|
||||
class VoiceSessionFinalizeRequest(BaseModel):
|
||||
"""Finalize one voice session into a persisted story."""
|
||||
|
||||
@@ -61,11 +70,14 @@ class VoiceTurnSummaryResponse(BaseModel):
|
||||
status: str
|
||||
user_transcript: str | None = None
|
||||
transcript_confidence: float | None = None
|
||||
transcription_provider: str | None = None
|
||||
detected_intent: str
|
||||
intent_confidence: float | None = None
|
||||
assistant_text: str | None = None
|
||||
assistant_audio_ready: bool = False
|
||||
assistant_audio_url: str | None = None
|
||||
user_audio_ready: bool = False
|
||||
user_audio_url: str | None = None
|
||||
error_message: str | None = None
|
||||
created_at: datetime
|
||||
updated_at: datetime
|
||||
@@ -81,10 +93,14 @@ class VoiceSessionSummaryResponse(BaseModel):
|
||||
target_mode: str
|
||||
status: str
|
||||
current_turn_index: int
|
||||
total_turns: int = 0
|
||||
working_title: str | None = None
|
||||
story_state: dict[str, Any] = Field(default_factory=dict)
|
||||
latest_user_transcript: str | None = None
|
||||
latest_assistant_text: str | None = None
|
||||
latest_detected_intent: str | None = None
|
||||
latest_assistant_audio_ready: bool = False
|
||||
last_turn_status: str | None = None
|
||||
can_continue: bool = False
|
||||
can_finalize: bool = False
|
||||
last_error: str | None = None
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any
|
||||
|
||||
from fastapi import HTTPException
|
||||
@@ -20,24 +21,23 @@ from app.schemas.voice_session_schemas import (
|
||||
VoiceTurnAcceptedResponse,
|
||||
VoiceTurnCreateFallbackRequest,
|
||||
VoiceTurnSummaryResponse,
|
||||
VoiceTurnUploadAcceptedResponse,
|
||||
)
|
||||
from app.services.adapters.text.models import StoryOutput
|
||||
from app.services.memory_service import build_enhanced_memory_context
|
||||
from app.services.provider_router import generate_story_content, text_to_speech
|
||||
from app.services.story_service import (
|
||||
create_story_from_result,
|
||||
validate_profile_and_universe,
|
||||
)
|
||||
from app.services.story_service import create_story_from_result, validate_profile_and_universe
|
||||
from app.services.voice_session_storage import (
|
||||
build_turn_assistant_audio_path,
|
||||
read_session_audio,
|
||||
session_audio_exists,
|
||||
write_session_audio,
|
||||
write_uploaded_user_audio,
|
||||
)
|
||||
from app.services.voice_transcription_service import transcribe_voice_audio
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
ACTIVE_SESSION_STATUSES = {"draft", "active", "processing_turn", "waiting_user"}
|
||||
CONTINUABLE_SESSION_STATUSES = {"draft", "active", "waiting_user"}
|
||||
FINAL_SESSION_STATUSES = {"completed", "abandoned"}
|
||||
|
||||
@@ -62,13 +62,24 @@ def _session_can_finalize(session: VoiceSession) -> bool:
|
||||
return bool(segments) and session.status in {"active", "waiting_user"}
|
||||
|
||||
|
||||
def _utcnow() -> datetime:
|
||||
return datetime.now(timezone.utc)
|
||||
|
||||
|
||||
def _assistant_audio_url(session_id: str, turn_id: str, audio_path: str | None) -> str | None:
|
||||
if not session_audio_exists(audio_path):
|
||||
return None
|
||||
return f"/api/voice-sessions/{session_id}/turns/{turn_id}/audio"
|
||||
|
||||
|
||||
def _user_audio_url(session_id: str, turn_id: str, audio_path: str | None) -> str | None:
|
||||
if not session_audio_exists(audio_path):
|
||||
return None
|
||||
return f"/api/voice-sessions/{session_id}/turns/{turn_id}/user-audio"
|
||||
|
||||
|
||||
def _turn_to_summary(turn: VoiceTurn) -> VoiceTurnSummaryResponse:
|
||||
turn_patch = turn.story_patch or {}
|
||||
return VoiceTurnSummaryResponse(
|
||||
id=turn.id,
|
||||
session_id=turn.session_id,
|
||||
@@ -76,6 +87,7 @@ def _turn_to_summary(turn: VoiceTurn) -> VoiceTurnSummaryResponse:
|
||||
status=turn.status,
|
||||
user_transcript=turn.user_transcript,
|
||||
transcript_confidence=turn.transcript_confidence,
|
||||
transcription_provider=turn_patch.get("transcription_provider"),
|
||||
detected_intent=turn.detected_intent,
|
||||
intent_confidence=turn.intent_confidence,
|
||||
assistant_text=turn.assistant_text,
|
||||
@@ -85,13 +97,25 @@ def _turn_to_summary(turn: VoiceTurn) -> VoiceTurnSummaryResponse:
|
||||
turn.id,
|
||||
turn.assistant_audio_path,
|
||||
),
|
||||
user_audio_ready=session_audio_exists(turn.user_audio_path),
|
||||
user_audio_url=_user_audio_url(turn.session_id, turn.id, turn.user_audio_path),
|
||||
error_message=turn.error_message,
|
||||
created_at=turn.created_at,
|
||||
updated_at=turn.updated_at,
|
||||
)
|
||||
|
||||
|
||||
def _session_to_summary(session: VoiceSession) -> VoiceSessionSummaryResponse:
|
||||
def _session_to_summary(
|
||||
session: VoiceSession,
|
||||
*,
|
||||
latest_turn: VoiceTurn | None = None,
|
||||
total_turns: int | None = None,
|
||||
) -> VoiceSessionSummaryResponse:
|
||||
if latest_turn is None:
|
||||
total_turns = total_turns if total_turns is not None else session.current_turn_index
|
||||
else:
|
||||
total_turns = total_turns if total_turns is not None else latest_turn.turn_index
|
||||
|
||||
return VoiceSessionSummaryResponse(
|
||||
id=session.id,
|
||||
child_profile_id=session.child_profile_id,
|
||||
@@ -100,10 +124,16 @@ def _session_to_summary(session: VoiceSession) -> VoiceSessionSummaryResponse:
|
||||
target_mode=session.target_mode,
|
||||
status=session.status,
|
||||
current_turn_index=session.current_turn_index,
|
||||
total_turns=total_turns or 0,
|
||||
working_title=session.working_title,
|
||||
story_state=session.story_state or {},
|
||||
latest_user_transcript=session.latest_user_transcript,
|
||||
latest_assistant_text=session.latest_assistant_text,
|
||||
latest_detected_intent=latest_turn.detected_intent if latest_turn else None,
|
||||
latest_assistant_audio_ready=(
|
||||
session_audio_exists(latest_turn.assistant_audio_path) if latest_turn else False
|
||||
),
|
||||
last_turn_status=latest_turn.status if latest_turn else None,
|
||||
can_continue=_session_can_continue(session),
|
||||
can_finalize=_session_can_finalize(session),
|
||||
last_error=session.last_error,
|
||||
@@ -154,6 +184,20 @@ async def _get_owned_session(
|
||||
return session
|
||||
|
||||
|
||||
async def _get_latest_turn(
|
||||
db: AsyncSession,
|
||||
*,
|
||||
session_id: str,
|
||||
) -> VoiceTurn | None:
|
||||
result = await db.execute(
|
||||
select(VoiceTurn)
|
||||
.where(VoiceTurn.session_id == session_id)
|
||||
.order_by(desc(VoiceTurn.turn_index))
|
||||
.limit(1)
|
||||
)
|
||||
return result.scalar_one_or_none()
|
||||
|
||||
|
||||
async def _get_owned_turn(
|
||||
db: AsyncSession,
|
||||
*,
|
||||
@@ -307,6 +351,283 @@ def _merge_story_state(
|
||||
return current_state, patch
|
||||
|
||||
|
||||
async def _create_pending_turn(
|
||||
db: AsyncSession,
|
||||
*,
|
||||
session: VoiceSession,
|
||||
transcript_text: str,
|
||||
transcript_confidence: float | None,
|
||||
transcription_provider: str | None,
|
||||
user_audio_path: str | None = None,
|
||||
user_audio_mime_type: str | None = None,
|
||||
user_audio_duration_ms: int | None = None,
|
||||
) -> tuple[VoiceSession, VoiceTurn]:
|
||||
if session.status not in CONTINUABLE_SESSION_STATUSES:
|
||||
raise HTTPException(
|
||||
status_code=409,
|
||||
detail="Voice session is not ready for another turn.",
|
||||
)
|
||||
|
||||
next_turn_index = session.current_turn_index + 1
|
||||
detected_intent, intent_confidence = _detect_intent(
|
||||
transcript_text,
|
||||
current_turn_index=session.current_turn_index,
|
||||
)
|
||||
turn = VoiceTurn(
|
||||
session_id=session.id,
|
||||
turn_index=next_turn_index,
|
||||
status="transcribing",
|
||||
user_audio_path=user_audio_path,
|
||||
user_audio_mime_type=user_audio_mime_type,
|
||||
user_audio_duration_ms=user_audio_duration_ms,
|
||||
user_transcript=transcript_text,
|
||||
transcript_confidence=transcript_confidence,
|
||||
detected_intent=detected_intent,
|
||||
intent_confidence=intent_confidence,
|
||||
story_patch={"transcription_provider": transcription_provider},
|
||||
)
|
||||
session.status = "processing_turn"
|
||||
session.current_turn_index = next_turn_index
|
||||
session.latest_user_transcript = transcript_text
|
||||
session.last_error = None
|
||||
session.updated_at = _utcnow()
|
||||
db.add(turn)
|
||||
await db.commit()
|
||||
await db.refresh(session)
|
||||
await db.refresh(turn)
|
||||
|
||||
await _record_session_event(
|
||||
db,
|
||||
session_id=session.id,
|
||||
turn_id=turn.id,
|
||||
event_type="turn_received",
|
||||
status="received",
|
||||
message="Voice turn received.",
|
||||
metadata={
|
||||
"turn_index": turn.turn_index,
|
||||
"has_user_audio": bool(user_audio_path),
|
||||
"transcription_provider": transcription_provider,
|
||||
},
|
||||
)
|
||||
if user_audio_path:
|
||||
await _record_session_event(
|
||||
db,
|
||||
session_id=session.id,
|
||||
turn_id=turn.id,
|
||||
event_type="turn_audio_uploaded",
|
||||
status="succeeded",
|
||||
message="User audio uploaded for one voice turn.",
|
||||
metadata={
|
||||
"mime_type": user_audio_mime_type,
|
||||
"audio_path": user_audio_path,
|
||||
},
|
||||
)
|
||||
await _record_session_event(
|
||||
db,
|
||||
session_id=session.id,
|
||||
turn_id=turn.id,
|
||||
event_type="turn_transcribed",
|
||||
status="succeeded",
|
||||
message="Voice turn transcript is available.",
|
||||
metadata={
|
||||
"transcript_confidence": transcript_confidence,
|
||||
"transcription_provider": transcription_provider,
|
||||
},
|
||||
)
|
||||
return session, turn
|
||||
|
||||
|
||||
async def _process_pending_turn(
|
||||
db: AsyncSession,
|
||||
*,
|
||||
session: VoiceSession,
|
||||
turn: VoiceTurn,
|
||||
transcript_text: str,
|
||||
user_id: str,
|
||||
) -> str:
|
||||
assistant_text: str | None = None
|
||||
assistant_result: StoryOutput | None = None
|
||||
detected_intent = turn.detected_intent
|
||||
intent_confidence = turn.intent_confidence
|
||||
|
||||
try:
|
||||
await _record_session_event(
|
||||
db,
|
||||
session_id=session.id,
|
||||
turn_id=turn.id,
|
||||
event_type="intent_resolved",
|
||||
status="succeeded",
|
||||
message="Turn intent resolved.",
|
||||
metadata={
|
||||
"detected_intent": detected_intent,
|
||||
"intent_confidence": intent_confidence,
|
||||
},
|
||||
)
|
||||
|
||||
if detected_intent == "save_story":
|
||||
assistant_text = "好的,这个故事已经准备好保存到故事库了。"
|
||||
elif detected_intent == "end_story":
|
||||
assistant_text = "好的,我们先把故事停在这里。想保存的话,现在就可以保存到故事库。"
|
||||
else:
|
||||
assistant_result = await _generate_assistant_turn(
|
||||
db,
|
||||
session=session,
|
||||
transcript_text=transcript_text,
|
||||
intent=detected_intent,
|
||||
)
|
||||
assistant_text = assistant_result.story_text.strip()
|
||||
|
||||
merged_state, story_patch = _merge_story_state(
|
||||
session,
|
||||
transcript_text=transcript_text,
|
||||
intent=detected_intent,
|
||||
assistant_result=assistant_result,
|
||||
)
|
||||
story_patch["transcription_provider"] = (
|
||||
(turn.story_patch or {}).get("transcription_provider")
|
||||
)
|
||||
turn.story_patch = story_patch
|
||||
turn.assistant_text = assistant_text
|
||||
turn.status = "narrative_ready"
|
||||
session.story_state = merged_state
|
||||
session.latest_assistant_text = assistant_text
|
||||
session.status = "waiting_user"
|
||||
session.updated_at = _utcnow()
|
||||
if assistant_result and assistant_result.title and not session.working_title:
|
||||
session.working_title = assistant_result.title
|
||||
await db.commit()
|
||||
await db.refresh(session)
|
||||
await db.refresh(turn)
|
||||
|
||||
await _record_session_event(
|
||||
db,
|
||||
session_id=session.id,
|
||||
turn_id=turn.id,
|
||||
event_type="story_patch_applied",
|
||||
status="succeeded",
|
||||
message="Story state updated after one turn.",
|
||||
metadata=story_patch,
|
||||
)
|
||||
await _record_session_event(
|
||||
db,
|
||||
session_id=session.id,
|
||||
turn_id=turn.id,
|
||||
event_type="assistant_text_ready",
|
||||
status="succeeded",
|
||||
message="Assistant text response generated.",
|
||||
metadata={
|
||||
"assistant_text_length": len(assistant_text or ""),
|
||||
"working_title": session.working_title,
|
||||
},
|
||||
)
|
||||
except Exception as exc:
|
||||
turn.status = "failed"
|
||||
turn.error_message = str(exc)
|
||||
session.status = "waiting_user"
|
||||
session.last_error = str(exc)
|
||||
session.updated_at = _utcnow()
|
||||
await db.commit()
|
||||
await db.refresh(session)
|
||||
await db.refresh(turn)
|
||||
await _record_session_event(
|
||||
db,
|
||||
session_id=session.id,
|
||||
turn_id=turn.id,
|
||||
event_type="session_failed",
|
||||
status="failed",
|
||||
message="Assistant narrative generation failed for one voice turn.",
|
||||
metadata={"error": str(exc), "turn_index": turn.turn_index},
|
||||
)
|
||||
logger.warning(
|
||||
"voice_turn_generation_failed",
|
||||
session_id=session.id,
|
||||
turn_id=turn.id,
|
||||
error=str(exc),
|
||||
)
|
||||
return turn.status
|
||||
|
||||
if assistant_text:
|
||||
try:
|
||||
audio_bytes = await text_to_speech(
|
||||
assistant_text,
|
||||
db=db,
|
||||
user_id=user_id,
|
||||
)
|
||||
saved_path = write_session_audio(
|
||||
build_turn_assistant_audio_path(session.id, turn.turn_index),
|
||||
audio_bytes,
|
||||
)
|
||||
turn.assistant_audio_path = saved_path
|
||||
turn.assistant_audio_duration_ms = None
|
||||
turn.status = "audio_ready"
|
||||
await db.commit()
|
||||
await db.refresh(turn)
|
||||
await _record_session_event(
|
||||
db,
|
||||
session_id=session.id,
|
||||
turn_id=turn.id,
|
||||
event_type="assistant_audio_ready",
|
||||
status="succeeded",
|
||||
message="Assistant audio response generated.",
|
||||
metadata={"audio_path": saved_path},
|
||||
)
|
||||
except Exception as exc:
|
||||
turn.status = "narrative_ready"
|
||||
turn.error_message = None
|
||||
session.last_error = None
|
||||
session.updated_at = _utcnow()
|
||||
await db.commit()
|
||||
await db.refresh(turn)
|
||||
await db.refresh(session)
|
||||
await _record_session_event(
|
||||
db,
|
||||
session_id=session.id,
|
||||
turn_id=turn.id,
|
||||
event_type="assistant_audio_failed",
|
||||
status="failed",
|
||||
message="Assistant audio generation failed, text response kept.",
|
||||
metadata={"error": str(exc)},
|
||||
)
|
||||
logger.warning(
|
||||
"voice_turn_audio_failed",
|
||||
session_id=session.id,
|
||||
turn_id=turn.id,
|
||||
error=str(exc),
|
||||
)
|
||||
|
||||
return turn.status
|
||||
|
||||
|
||||
async def list_voice_sessions_service(
|
||||
user_id: str,
|
||||
db: AsyncSession,
|
||||
*,
|
||||
limit: int = 8,
|
||||
active_only: bool = False,
|
||||
) -> list[VoiceSessionSummaryResponse]:
|
||||
query = (
|
||||
select(VoiceSession)
|
||||
.where(VoiceSession.user_id == user_id)
|
||||
.order_by(desc(VoiceSession.updated_at), desc(VoiceSession.created_at))
|
||||
.limit(limit)
|
||||
)
|
||||
if active_only:
|
||||
query = query.where(VoiceSession.status.in_(CONTINUABLE_SESSION_STATUSES))
|
||||
|
||||
sessions = (await db.execute(query)).scalars().all()
|
||||
summaries: list[VoiceSessionSummaryResponse] = []
|
||||
for session in sessions:
|
||||
latest_turn = await _get_latest_turn(db, session_id=session.id)
|
||||
summaries.append(
|
||||
_session_to_summary(
|
||||
session,
|
||||
latest_turn=latest_turn,
|
||||
total_turns=session.current_turn_index,
|
||||
)
|
||||
)
|
||||
return summaries
|
||||
|
||||
|
||||
async def create_voice_session_service(
|
||||
request: VoiceSessionCreateRequest,
|
||||
user_id: str,
|
||||
@@ -373,7 +694,12 @@ async def get_voice_session_detail_service(
|
||||
).scalars().all()
|
||||
events = list(reversed(events))
|
||||
|
||||
summary = _session_to_summary(session)
|
||||
latest_turn = turns[-1] if turns else None
|
||||
summary = _session_to_summary(
|
||||
session,
|
||||
latest_turn=latest_turn,
|
||||
total_turns=session.current_turn_index,
|
||||
)
|
||||
return VoiceSessionDetailResponse(
|
||||
**summary.model_dump(),
|
||||
recent_turns=[_turn_to_summary(turn) for turn in turns],
|
||||
@@ -399,208 +725,83 @@ async def create_voice_turn_from_text_service(
|
||||
user_id: str,
|
||||
db: AsyncSession,
|
||||
) -> VoiceTurnAcceptedResponse:
|
||||
session = await _get_owned_session(db, session_id=session_id, user_id=user_id)
|
||||
transcript_text = request.transcript_text.strip()
|
||||
session, turn = await _create_pending_turn(
|
||||
db,
|
||||
session=session,
|
||||
transcript_text=transcript_text,
|
||||
transcript_confidence=1.0,
|
||||
transcription_provider="fallback",
|
||||
user_audio_duration_ms=request.duration_ms,
|
||||
)
|
||||
status = await _process_pending_turn(
|
||||
db,
|
||||
session=session,
|
||||
turn=turn,
|
||||
transcript_text=transcript_text,
|
||||
user_id=user_id,
|
||||
)
|
||||
return VoiceTurnAcceptedResponse(
|
||||
turn_id=turn.id,
|
||||
session_id=session.id,
|
||||
status=status,
|
||||
)
|
||||
|
||||
|
||||
async def create_voice_turn_from_upload_service(
|
||||
*,
|
||||
session_id: str,
|
||||
user_id: str,
|
||||
audio_bytes: bytes,
|
||||
file_name: str,
|
||||
mime_type: str | None,
|
||||
duration_ms: int | None,
|
||||
transcript_hint: str | None,
|
||||
db: AsyncSession,
|
||||
) -> VoiceTurnUploadAcceptedResponse:
|
||||
session = await _get_owned_session(db, session_id=session_id, user_id=user_id)
|
||||
if session.status not in CONTINUABLE_SESSION_STATUSES:
|
||||
raise HTTPException(
|
||||
status_code=409,
|
||||
detail="Voice session is not ready for another turn.",
|
||||
)
|
||||
|
||||
transcript_text = request.transcript_text.strip()
|
||||
next_turn_index = session.current_turn_index + 1
|
||||
detected_intent, intent_confidence = _detect_intent(
|
||||
transcript_text,
|
||||
current_turn_index=session.current_turn_index,
|
||||
)
|
||||
|
||||
turn = VoiceTurn(
|
||||
user_audio_path = write_uploaded_user_audio(
|
||||
session_id=session.id,
|
||||
turn_index=next_turn_index,
|
||||
status="transcribing",
|
||||
user_audio_duration_ms=request.duration_ms,
|
||||
user_transcript=transcript_text,
|
||||
transcript_confidence=1.0,
|
||||
detected_intent=detected_intent,
|
||||
intent_confidence=intent_confidence,
|
||||
file_name=file_name,
|
||||
mime_type=mime_type,
|
||||
audio_data=audio_bytes,
|
||||
)
|
||||
session.status = "processing_turn"
|
||||
session.current_turn_index = next_turn_index
|
||||
session.latest_user_transcript = transcript_text
|
||||
session.last_error = None
|
||||
db.add(turn)
|
||||
await db.commit()
|
||||
await db.refresh(session)
|
||||
await db.refresh(turn)
|
||||
|
||||
await _record_session_event(
|
||||
transcription = await transcribe_voice_audio(
|
||||
audio_bytes=audio_bytes,
|
||||
file_name=file_name,
|
||||
mime_type=mime_type,
|
||||
transcript_hint=transcript_hint,
|
||||
)
|
||||
session, turn = await _create_pending_turn(
|
||||
db,
|
||||
session_id=session.id,
|
||||
turn_id=turn.id,
|
||||
event_type="turn_received",
|
||||
status="received",
|
||||
message="Voice turn fallback text received.",
|
||||
metadata={"turn_index": turn.turn_index},
|
||||
session=session,
|
||||
transcript_text=transcription.transcript_text,
|
||||
transcript_confidence=transcription.confidence,
|
||||
transcription_provider=transcription.provider,
|
||||
user_audio_path=user_audio_path,
|
||||
user_audio_mime_type=mime_type,
|
||||
user_audio_duration_ms=duration_ms,
|
||||
)
|
||||
await _record_session_event(
|
||||
status = await _process_pending_turn(
|
||||
db,
|
||||
session_id=session.id,
|
||||
turn_id=turn.id,
|
||||
event_type="turn_transcribed",
|
||||
status="succeeded",
|
||||
message="Fallback transcript accepted.",
|
||||
metadata={"transcript_confidence": turn.transcript_confidence},
|
||||
session=session,
|
||||
turn=turn,
|
||||
transcript_text=transcription.transcript_text,
|
||||
user_id=user_id,
|
||||
)
|
||||
|
||||
assistant_text: str | None = None
|
||||
assistant_result: StoryOutput | None = None
|
||||
|
||||
try:
|
||||
await _record_session_event(
|
||||
db,
|
||||
session_id=session.id,
|
||||
turn_id=turn.id,
|
||||
event_type="intent_resolved",
|
||||
status="succeeded",
|
||||
message="Turn intent resolved.",
|
||||
metadata={
|
||||
"detected_intent": detected_intent,
|
||||
"intent_confidence": intent_confidence,
|
||||
},
|
||||
)
|
||||
|
||||
if detected_intent == "save_story":
|
||||
assistant_text = "好的,这个故事已经准备好保存到故事库了。"
|
||||
elif detected_intent == "end_story":
|
||||
assistant_text = "好的,我们先把故事停在这里。想保存的话,现在就可以保存到故事库。"
|
||||
else:
|
||||
assistant_result = await _generate_assistant_turn(
|
||||
db,
|
||||
session=session,
|
||||
transcript_text=transcript_text,
|
||||
intent=detected_intent,
|
||||
)
|
||||
assistant_text = assistant_result.story_text.strip()
|
||||
|
||||
merged_state, story_patch = _merge_story_state(
|
||||
session,
|
||||
transcript_text=transcript_text,
|
||||
intent=detected_intent,
|
||||
assistant_result=assistant_result,
|
||||
)
|
||||
turn.story_patch = story_patch
|
||||
turn.assistant_text = assistant_text
|
||||
turn.status = "narrative_ready"
|
||||
session.story_state = merged_state
|
||||
session.latest_assistant_text = assistant_text
|
||||
session.status = "waiting_user"
|
||||
if assistant_result and assistant_result.title and not session.working_title:
|
||||
session.working_title = assistant_result.title
|
||||
await db.commit()
|
||||
await db.refresh(session)
|
||||
await db.refresh(turn)
|
||||
|
||||
await _record_session_event(
|
||||
db,
|
||||
session_id=session.id,
|
||||
turn_id=turn.id,
|
||||
event_type="story_patch_applied",
|
||||
status="succeeded",
|
||||
message="Story state updated after one turn.",
|
||||
metadata=story_patch,
|
||||
)
|
||||
await _record_session_event(
|
||||
db,
|
||||
session_id=session.id,
|
||||
turn_id=turn.id,
|
||||
event_type="assistant_text_ready",
|
||||
status="succeeded",
|
||||
message="Assistant text response generated.",
|
||||
metadata={
|
||||
"assistant_text_length": len(assistant_text or ""),
|
||||
"working_title": session.working_title,
|
||||
},
|
||||
)
|
||||
except Exception as exc:
|
||||
turn.status = "failed"
|
||||
turn.error_message = str(exc)
|
||||
session.status = "waiting_user"
|
||||
session.last_error = str(exc)
|
||||
await db.commit()
|
||||
await db.refresh(session)
|
||||
await db.refresh(turn)
|
||||
await _record_session_event(
|
||||
db,
|
||||
session_id=session.id,
|
||||
turn_id=turn.id,
|
||||
event_type="session_failed",
|
||||
status="failed",
|
||||
message="Assistant narrative generation failed for one voice turn.",
|
||||
metadata={"error": str(exc), "turn_index": turn.turn_index},
|
||||
)
|
||||
logger.warning(
|
||||
"voice_turn_generation_failed",
|
||||
session_id=session.id,
|
||||
turn_id=turn.id,
|
||||
error=str(exc),
|
||||
)
|
||||
return VoiceTurnAcceptedResponse(
|
||||
turn_id=turn.id,
|
||||
session_id=session.id,
|
||||
status=turn.status,
|
||||
)
|
||||
|
||||
if assistant_text:
|
||||
try:
|
||||
audio_bytes = await text_to_speech(
|
||||
assistant_text,
|
||||
db=db,
|
||||
user_id=user_id,
|
||||
)
|
||||
saved_path = write_session_audio(
|
||||
build_turn_assistant_audio_path(session.id, turn.turn_index),
|
||||
audio_bytes,
|
||||
)
|
||||
turn.assistant_audio_path = saved_path
|
||||
turn.assistant_audio_duration_ms = None
|
||||
turn.status = "audio_ready"
|
||||
await db.commit()
|
||||
await db.refresh(turn)
|
||||
await _record_session_event(
|
||||
db,
|
||||
session_id=session.id,
|
||||
turn_id=turn.id,
|
||||
event_type="assistant_audio_ready",
|
||||
status="succeeded",
|
||||
message="Assistant audio response generated.",
|
||||
metadata={"audio_path": saved_path},
|
||||
)
|
||||
except Exception as exc:
|
||||
turn.status = "narrative_ready"
|
||||
turn.error_message = None
|
||||
session.last_error = None
|
||||
await db.commit()
|
||||
await db.refresh(turn)
|
||||
await db.refresh(session)
|
||||
await _record_session_event(
|
||||
db,
|
||||
session_id=session.id,
|
||||
turn_id=turn.id,
|
||||
event_type="assistant_audio_failed",
|
||||
status="failed",
|
||||
message="Assistant audio generation failed, text response kept.",
|
||||
metadata={"error": str(exc)},
|
||||
)
|
||||
logger.warning(
|
||||
"voice_turn_audio_failed",
|
||||
session_id=session.id,
|
||||
turn_id=turn.id,
|
||||
error=str(exc),
|
||||
)
|
||||
|
||||
return VoiceTurnAcceptedResponse(
|
||||
return VoiceTurnUploadAcceptedResponse(
|
||||
turn_id=turn.id,
|
||||
session_id=session.id,
|
||||
status=turn.status,
|
||||
status=status,
|
||||
transcription_provider=transcription.provider,
|
||||
)
|
||||
|
||||
|
||||
@@ -636,6 +837,23 @@ async def get_voice_turn_audio_service(
|
||||
return read_session_audio(turn.assistant_audio_path)
|
||||
|
||||
|
||||
async def get_voice_turn_user_audio_service(
|
||||
session_id: str,
|
||||
turn_id: str,
|
||||
user_id: str,
|
||||
db: AsyncSession,
|
||||
) -> tuple[bytes, str]:
|
||||
turn = await _get_owned_turn(
|
||||
db,
|
||||
session_id=session_id,
|
||||
turn_id=turn_id,
|
||||
user_id=user_id,
|
||||
)
|
||||
if not session_audio_exists(turn.user_audio_path):
|
||||
raise HTTPException(status_code=404, detail="Uploaded user audio not found")
|
||||
return read_session_audio(turn.user_audio_path), (turn.user_audio_mime_type or "audio/webm")
|
||||
|
||||
|
||||
async def finalize_voice_session_service(
|
||||
session_id: str,
|
||||
request: VoiceSessionFinalizeRequest,
|
||||
@@ -655,6 +873,7 @@ async def finalize_voice_session_service(
|
||||
raise HTTPException(status_code=409, detail="Voice session is not ready to finalize.")
|
||||
|
||||
session.status = "finalizing_story"
|
||||
session.updated_at = _utcnow()
|
||||
await db.commit()
|
||||
await db.refresh(session)
|
||||
await _record_session_event(
|
||||
@@ -695,6 +914,7 @@ async def finalize_voice_session_service(
|
||||
session.final_story_id = story.id
|
||||
session.status = "completed"
|
||||
session.last_error = None
|
||||
session.updated_at = _utcnow()
|
||||
await db.commit()
|
||||
await db.refresh(session)
|
||||
|
||||
@@ -728,6 +948,7 @@ async def abandon_voice_session_service(
|
||||
|
||||
session.status = "abandoned"
|
||||
session.last_error = request.reason
|
||||
session.updated_at = _utcnow()
|
||||
await db.commit()
|
||||
await db.refresh(session)
|
||||
|
||||
@@ -741,4 +962,9 @@ async def abandon_voice_session_service(
|
||||
metadata={"reason": request.reason},
|
||||
)
|
||||
await db.refresh(session)
|
||||
return _session_to_summary(session)
|
||||
latest_turn = await _get_latest_turn(db, session_id=session.id)
|
||||
return _session_to_summary(
|
||||
session,
|
||||
latest_turn=latest_turn,
|
||||
total_turns=session.current_turn_index,
|
||||
)
|
||||
|
||||
@@ -26,6 +26,38 @@ def build_turn_assistant_audio_path(session_id: str, turn_index: int) -> Path:
|
||||
return session_storage_dir(session_id) / f"turn-{turn_index:03d}-assistant.mp3"
|
||||
|
||||
|
||||
def _normalize_audio_suffix(file_name: str | None, mime_type: str | None) -> str:
|
||||
if file_name and "." in file_name:
|
||||
return file_name.rsplit(".", 1)[-1].lower()
|
||||
|
||||
if mime_type == "audio/webm":
|
||||
return "webm"
|
||||
if mime_type == "audio/wav":
|
||||
return "wav"
|
||||
if mime_type == "audio/mpeg":
|
||||
return "mp3"
|
||||
if mime_type == "audio/mp4":
|
||||
return "m4a"
|
||||
return "bin"
|
||||
|
||||
|
||||
def write_uploaded_user_audio(
|
||||
*,
|
||||
session_id: str,
|
||||
turn_index: int,
|
||||
file_name: str | None,
|
||||
mime_type: str | None,
|
||||
audio_data: bytes,
|
||||
) -> str:
|
||||
"""Persist one uploaded user-audio turn and return the saved file path."""
|
||||
|
||||
suffix = _normalize_audio_suffix(file_name, mime_type)
|
||||
return write_session_audio(
|
||||
build_turn_user_audio_path(session_id, turn_index, suffix),
|
||||
audio_data,
|
||||
)
|
||||
|
||||
|
||||
def write_session_audio(path: Path, audio_data: bytes) -> str:
|
||||
"""Persist session audio bytes atomically and return the saved path."""
|
||||
|
||||
|
||||
134
backend/app/services/voice_transcription_service.py
Normal file
134
backend/app/services/voice_transcription_service.py
Normal file
@@ -0,0 +1,134 @@
|
||||
"""Voice transcription helpers for co-creation sessions."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from io import BytesIO
|
||||
|
||||
from fastapi import HTTPException
|
||||
from openai import AsyncOpenAI
|
||||
|
||||
from app.core.config import settings
|
||||
from app.core.logging import get_logger
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class VoiceTranscriptionResult:
|
||||
"""Normalized transcription result for one uploaded voice turn."""
|
||||
|
||||
transcript_text: str
|
||||
confidence: float | None = None
|
||||
provider: str = "demo"
|
||||
|
||||
|
||||
def _normalize_transcript(transcript_text: str) -> str:
|
||||
return transcript_text.strip()
|
||||
|
||||
|
||||
async def _transcribe_demo(
|
||||
*,
|
||||
audio_bytes: bytes,
|
||||
mime_type: str | None,
|
||||
transcript_hint: str | None,
|
||||
) -> VoiceTranscriptionResult:
|
||||
hint = _normalize_transcript(transcript_hint or "")
|
||||
if hint:
|
||||
return VoiceTranscriptionResult(
|
||||
transcript_text=hint,
|
||||
confidence=1.0,
|
||||
provider="demo",
|
||||
)
|
||||
|
||||
if mime_type and mime_type.startswith("text/"):
|
||||
text = _normalize_transcript(audio_bytes.decode("utf-8", errors="ignore"))
|
||||
if text:
|
||||
return VoiceTranscriptionResult(
|
||||
transcript_text=text,
|
||||
confidence=1.0,
|
||||
provider="demo",
|
||||
)
|
||||
|
||||
raise HTTPException(
|
||||
status_code=503,
|
||||
detail=(
|
||||
"当前环境未配置真实语音转写,请先使用文本共创模式,"
|
||||
"或在开发模式下提供 transcript_hint。"
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
async def _transcribe_openai(
|
||||
*,
|
||||
audio_bytes: bytes,
|
||||
file_name: str,
|
||||
mime_type: str | None,
|
||||
transcript_hint: str | None,
|
||||
) -> VoiceTranscriptionResult:
|
||||
if not settings.openai_api_key:
|
||||
raise HTTPException(
|
||||
status_code=503,
|
||||
detail="OPENAI_API_KEY 未配置,无法使用 OpenAI 语音转写。",
|
||||
)
|
||||
|
||||
client = AsyncOpenAI(api_key=settings.openai_api_key)
|
||||
audio_file = BytesIO(audio_bytes)
|
||||
audio_file.name = file_name
|
||||
|
||||
prompt = transcript_hint.strip() if transcript_hint else None
|
||||
|
||||
try:
|
||||
response = await client.audio.transcriptions.create(
|
||||
model=settings.voice_transcription_model,
|
||||
file=audio_file,
|
||||
language=settings.voice_transcription_language,
|
||||
prompt=prompt,
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.warning("voice_transcription_openai_failed", error=str(exc))
|
||||
raise HTTPException(
|
||||
status_code=503,
|
||||
detail="语音转写服务暂时不可用,请稍后重试。",
|
||||
) from exc
|
||||
|
||||
transcript_text = _normalize_transcript(getattr(response, "text", "") or "")
|
||||
if not transcript_text:
|
||||
raise HTTPException(status_code=502, detail="语音转写结果为空,请重试。")
|
||||
|
||||
return VoiceTranscriptionResult(
|
||||
transcript_text=transcript_text,
|
||||
confidence=None,
|
||||
provider="openai",
|
||||
)
|
||||
|
||||
|
||||
async def transcribe_voice_audio(
|
||||
*,
|
||||
audio_bytes: bytes,
|
||||
file_name: str,
|
||||
mime_type: str | None,
|
||||
transcript_hint: str | None = None,
|
||||
) -> VoiceTranscriptionResult:
|
||||
"""Transcribe one uploaded audio turn according to the configured mode."""
|
||||
|
||||
mode = (settings.voice_transcription_mode or "demo").strip().lower()
|
||||
|
||||
if mode == "disabled":
|
||||
raise HTTPException(
|
||||
status_code=503,
|
||||
detail="当前环境已禁用语音转写,请先使用文本共创模式。",
|
||||
)
|
||||
if mode == "openai":
|
||||
return await _transcribe_openai(
|
||||
audio_bytes=audio_bytes,
|
||||
file_name=file_name,
|
||||
mime_type=mime_type,
|
||||
transcript_hint=transcript_hint,
|
||||
)
|
||||
|
||||
return await _transcribe_demo(
|
||||
audio_bytes=audio_bytes,
|
||||
mime_type=mime_type,
|
||||
transcript_hint=transcript_hint,
|
||||
)
|
||||
@@ -22,6 +22,7 @@ dependencies = [
|
||||
"redis>=5.0.0",
|
||||
"edge-tts>=6.1.0",
|
||||
"openai>=1.0.0",
|
||||
"python-multipart>=0.0.9",
|
||||
]
|
||||
|
||||
[project.optional-dependencies]
|
||||
|
||||
@@ -199,3 +199,147 @@ async def test_voice_session_abandon_blocks_future_turns(
|
||||
assert response.status_code == 409
|
||||
finally:
|
||||
app.dependency_overrides.clear()
|
||||
|
||||
|
||||
async def test_voice_session_uploaded_audio_turn_uses_demo_transcript_hint(
|
||||
db_session,
|
||||
auth_token,
|
||||
):
|
||||
async def override_get_db():
|
||||
yield db_session
|
||||
|
||||
app.dependency_overrides[get_db] = override_get_db
|
||||
|
||||
with (
|
||||
patch(
|
||||
"app.services.voice_session_service.generate_story_content",
|
||||
new_callable=AsyncMock,
|
||||
) as mock_generate,
|
||||
patch(
|
||||
"app.services.voice_session_service.text_to_speech",
|
||||
new_callable=AsyncMock,
|
||||
) as mock_tts,
|
||||
):
|
||||
mock_generate.return_value = StoryOutput(
|
||||
mode="generated",
|
||||
title="小鲸鱼找朋友",
|
||||
story_text="小鲸鱼在海面上遇见了一只会唱歌的海鸥。",
|
||||
cover_prompt_suggestion="温暖儿童绘本封面,小鲸鱼和海鸥",
|
||||
)
|
||||
mock_tts.return_value = b"fake-upload-audio"
|
||||
|
||||
transport = ASGITransport(app=app)
|
||||
try:
|
||||
async with AsyncClient(transport=transport, base_url="http://test") as client:
|
||||
client.cookies.set("access_token", auth_token)
|
||||
|
||||
response = await client.post("/api/voice-sessions", json={})
|
||||
assert response.status_code == 201
|
||||
session_id = response.json()["id"]
|
||||
|
||||
response = await client.post(
|
||||
f"/api/voice-sessions/{session_id}/turns",
|
||||
files={
|
||||
"audio_file": ("turn.webm", b"fake-webm-audio", "audio/webm"),
|
||||
},
|
||||
data={
|
||||
"duration_ms": "3200",
|
||||
"transcript_hint": "我想听一个小鲸鱼找朋友的故事",
|
||||
},
|
||||
)
|
||||
assert response.status_code == 202
|
||||
turn_data = response.json()
|
||||
assert turn_data["status"] == "audio_ready"
|
||||
assert turn_data["transcription_provider"] == "demo"
|
||||
|
||||
turn_id = turn_data["turn_id"]
|
||||
response = await client.get(
|
||||
f"/api/voice-sessions/{session_id}/turns/{turn_id}"
|
||||
)
|
||||
assert response.status_code == 200
|
||||
detail = response.json()
|
||||
assert detail["user_audio_ready"] is True
|
||||
assert detail["user_audio_url"].endswith("/user-audio")
|
||||
assert detail["transcription_provider"] == "demo"
|
||||
assert detail["assistant_audio_ready"] is True
|
||||
|
||||
response = await client.get(detail["user_audio_url"])
|
||||
assert response.status_code == 200
|
||||
assert response.content == b"fake-webm-audio"
|
||||
assert response.headers["content-type"] == "audio/webm"
|
||||
finally:
|
||||
app.dependency_overrides.clear()
|
||||
|
||||
|
||||
async def test_voice_session_list_orders_recent_sessions_first(
|
||||
db_session,
|
||||
auth_token,
|
||||
):
|
||||
async def override_get_db():
|
||||
yield db_session
|
||||
|
||||
app.dependency_overrides[get_db] = override_get_db
|
||||
|
||||
with (
|
||||
patch(
|
||||
"app.services.voice_session_service.generate_story_content",
|
||||
new_callable=AsyncMock,
|
||||
) as mock_generate,
|
||||
patch(
|
||||
"app.services.voice_session_service.text_to_speech",
|
||||
new_callable=AsyncMock,
|
||||
) as mock_tts,
|
||||
):
|
||||
mock_generate.side_effect = [
|
||||
StoryOutput(
|
||||
mode="generated",
|
||||
title="第一场冒险",
|
||||
story_text="第一段故事。",
|
||||
cover_prompt_suggestion="封面一",
|
||||
),
|
||||
StoryOutput(
|
||||
mode="generated",
|
||||
title="第二场冒险",
|
||||
story_text="第二段故事。",
|
||||
cover_prompt_suggestion="封面二",
|
||||
),
|
||||
]
|
||||
mock_tts.side_effect = [b"audio-1", b"audio-2"]
|
||||
|
||||
transport = ASGITransport(app=app)
|
||||
try:
|
||||
async with AsyncClient(transport=transport, base_url="http://test") as client:
|
||||
client.cookies.set("access_token", auth_token)
|
||||
|
||||
response = await client.post("/api/voice-sessions", json={})
|
||||
first_session_id = response.json()["id"]
|
||||
await client.post(
|
||||
f"/api/voice-sessions/{first_session_id}/turns/fallback",
|
||||
json={"transcript_text": "第一个故事"},
|
||||
)
|
||||
|
||||
response = await client.post("/api/voice-sessions", json={})
|
||||
second_session_id = response.json()["id"]
|
||||
await client.post(
|
||||
f"/api/voice-sessions/{second_session_id}/turns/fallback",
|
||||
json={"transcript_text": "第二个故事"},
|
||||
)
|
||||
|
||||
response = await client.get("/api/voice-sessions?limit=8")
|
||||
assert response.status_code == 200
|
||||
sessions = response.json()
|
||||
assert len(sessions) >= 2
|
||||
assert sessions[0]["id"] == second_session_id
|
||||
assert sessions[1]["id"] == first_session_id
|
||||
assert sessions[0]["total_turns"] == 1
|
||||
assert sessions[0]["last_turn_status"] == "audio_ready"
|
||||
|
||||
response = await client.get("/api/voice-sessions?active_only=true")
|
||||
assert response.status_code == 200
|
||||
active_sessions = response.json()
|
||||
assert {item["id"] for item in active_sessions} >= {
|
||||
first_session_id,
|
||||
second_session_id,
|
||||
}
|
||||
finally:
|
||||
app.dependency_overrides.clear()
|
||||
|
||||
Reference in New Issue
Block a user