feat: add voice studio prototype flow
This commit is contained in:
@@ -1,6 +1,15 @@
|
|||||||
"""Voice co-creation session APIs."""
|
"""Voice co-creation session APIs."""
|
||||||
|
|
||||||
from fastapi import APIRouter, Depends, Response, status
|
from fastapi import (
|
||||||
|
APIRouter,
|
||||||
|
Depends,
|
||||||
|
File,
|
||||||
|
Form,
|
||||||
|
Query,
|
||||||
|
Response,
|
||||||
|
UploadFile,
|
||||||
|
status,
|
||||||
|
)
|
||||||
from sqlalchemy.ext.asyncio import AsyncSession
|
from sqlalchemy.ext.asyncio import AsyncSession
|
||||||
|
|
||||||
from app.core.deps import require_user
|
from app.core.deps import require_user
|
||||||
@@ -17,15 +26,19 @@ from app.schemas.voice_session_schemas import (
|
|||||||
VoiceTurnAcceptedResponse,
|
VoiceTurnAcceptedResponse,
|
||||||
VoiceTurnCreateFallbackRequest,
|
VoiceTurnCreateFallbackRequest,
|
||||||
VoiceTurnSummaryResponse,
|
VoiceTurnSummaryResponse,
|
||||||
|
VoiceTurnUploadAcceptedResponse,
|
||||||
)
|
)
|
||||||
from app.services.voice_session_service import (
|
from app.services.voice_session_service import (
|
||||||
abandon_voice_session_service,
|
abandon_voice_session_service,
|
||||||
create_voice_session_service,
|
create_voice_session_service,
|
||||||
create_voice_turn_from_text_service,
|
create_voice_turn_from_text_service,
|
||||||
|
create_voice_turn_from_upload_service,
|
||||||
finalize_voice_session_service,
|
finalize_voice_session_service,
|
||||||
get_voice_session_detail_service,
|
get_voice_session_detail_service,
|
||||||
get_voice_turn_audio_service,
|
get_voice_turn_audio_service,
|
||||||
get_voice_turn_service,
|
get_voice_turn_service,
|
||||||
|
get_voice_turn_user_audio_service,
|
||||||
|
list_voice_sessions_service,
|
||||||
)
|
)
|
||||||
|
|
||||||
router = APIRouter()
|
router = APIRouter()
|
||||||
@@ -53,6 +66,22 @@ async def create_voice_session(
|
|||||||
return await create_voice_session_service(request, user.id, db)
|
return await create_voice_session_service(request, user.id, db)
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/voice-sessions", response_model=list[VoiceSessionSummaryResponse])
|
||||||
|
async def list_voice_sessions(
|
||||||
|
limit: int = Query(default=8, ge=1, le=20),
|
||||||
|
active_only: bool = Query(default=False),
|
||||||
|
user: User = Depends(require_user),
|
||||||
|
db: AsyncSession = Depends(get_db),
|
||||||
|
):
|
||||||
|
"""List recent voice co-creation sessions for restore/resume behavior."""
|
||||||
|
return await list_voice_sessions_service(
|
||||||
|
user.id,
|
||||||
|
db,
|
||||||
|
limit=limit,
|
||||||
|
active_only=active_only,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@router.get("/voice-sessions/{session_id}", response_model=VoiceSessionDetailResponse)
|
@router.get("/voice-sessions/{session_id}", response_model=VoiceSessionDetailResponse)
|
||||||
async def get_voice_session(
|
async def get_voice_session(
|
||||||
session_id: str,
|
session_id: str,
|
||||||
@@ -83,6 +112,38 @@ async def create_voice_turn_from_text(
|
|||||||
return await create_voice_turn_from_text_service(session_id, request, user.id, db)
|
return await create_voice_turn_from_text_service(session_id, request, user.id, db)
|
||||||
|
|
||||||
|
|
||||||
|
@router.post(
|
||||||
|
"/voice-sessions/{session_id}/turns",
|
||||||
|
response_model=VoiceTurnUploadAcceptedResponse,
|
||||||
|
status_code=status.HTTP_202_ACCEPTED,
|
||||||
|
)
|
||||||
|
async def create_voice_turn_from_upload(
|
||||||
|
session_id: str,
|
||||||
|
audio_file: UploadFile = File(...),
|
||||||
|
duration_ms: int | None = Form(default=None),
|
||||||
|
transcript_hint: str | None = Form(default=None),
|
||||||
|
user: User = Depends(require_user),
|
||||||
|
db: AsyncSession = Depends(get_db),
|
||||||
|
):
|
||||||
|
"""Create one turn from uploaded audio and configured ASR behavior."""
|
||||||
|
await check_rate_limit(
|
||||||
|
f"voice-turn:{user.id}",
|
||||||
|
VOICE_SESSION_RATE_LIMIT_REQUESTS,
|
||||||
|
VOICE_SESSION_RATE_LIMIT_WINDOW,
|
||||||
|
)
|
||||||
|
audio_bytes = await audio_file.read()
|
||||||
|
return await create_voice_turn_from_upload_service(
|
||||||
|
session_id=session_id,
|
||||||
|
user_id=user.id,
|
||||||
|
audio_bytes=audio_bytes,
|
||||||
|
file_name=audio_file.filename or "voice-turn.webm",
|
||||||
|
mime_type=audio_file.content_type,
|
||||||
|
duration_ms=duration_ms,
|
||||||
|
transcript_hint=transcript_hint,
|
||||||
|
db=db,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@router.get(
|
@router.get(
|
||||||
"/voice-sessions/{session_id}/turns/{turn_id}",
|
"/voice-sessions/{session_id}/turns/{turn_id}",
|
||||||
response_model=VoiceTurnSummaryResponse,
|
response_model=VoiceTurnSummaryResponse,
|
||||||
@@ -109,6 +170,23 @@ async def get_voice_turn_audio(
|
|||||||
return Response(content=audio_bytes, media_type="audio/mpeg")
|
return Response(content=audio_bytes, media_type="audio/mpeg")
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/voice-sessions/{session_id}/turns/{turn_id}/user-audio")
|
||||||
|
async def get_voice_turn_user_audio(
|
||||||
|
session_id: str,
|
||||||
|
turn_id: str,
|
||||||
|
user: User = Depends(require_user),
|
||||||
|
db: AsyncSession = Depends(get_db),
|
||||||
|
):
|
||||||
|
"""Get uploaded user audio for one voice turn."""
|
||||||
|
audio_bytes, mime_type = await get_voice_turn_user_audio_service(
|
||||||
|
session_id,
|
||||||
|
turn_id,
|
||||||
|
user.id,
|
||||||
|
db,
|
||||||
|
)
|
||||||
|
return Response(content=audio_bytes, media_type=mime_type)
|
||||||
|
|
||||||
|
|
||||||
@router.post(
|
@router.post(
|
||||||
"/voice-sessions/{session_id}/finalize",
|
"/voice-sessions/{session_id}/finalize",
|
||||||
response_model=VoiceSessionFinalizeResponse,
|
response_model=VoiceSessionFinalizeResponse,
|
||||||
|
|||||||
@@ -70,6 +70,18 @@ class Settings(BaseSettings):
|
|||||||
"storage/voice_sessions",
|
"storage/voice_sessions",
|
||||||
description="Directory for persisted voice co-creation session assets",
|
description="Directory for persisted voice co-creation session assets",
|
||||||
)
|
)
|
||||||
|
voice_transcription_mode: str = Field(
|
||||||
|
"demo",
|
||||||
|
description="Voice transcription mode: demo, openai, or disabled",
|
||||||
|
)
|
||||||
|
voice_transcription_model: str = Field(
|
||||||
|
"gpt-4o-mini-transcribe",
|
||||||
|
description="Model used when voice transcription mode is OpenAI-backed",
|
||||||
|
)
|
||||||
|
voice_transcription_language: str = Field(
|
||||||
|
"zh",
|
||||||
|
description="Preferred language hint for voice transcription",
|
||||||
|
)
|
||||||
story_audio_cache_ttl_days: int = Field(
|
story_audio_cache_ttl_days: int = Field(
|
||||||
30,
|
30,
|
||||||
description="TTL in days before cached story audio is pruned",
|
description="TTL in days before cached story audio is pruned",
|
||||||
|
|||||||
@@ -25,6 +25,15 @@ class VoiceTurnCreateFallbackRequest(BaseModel):
|
|||||||
duration_ms: int | None = Field(default=None, ge=1, le=MAX_VOICE_TURN_DURATION_MS)
|
duration_ms: int | None = Field(default=None, ge=1, le=MAX_VOICE_TURN_DURATION_MS)
|
||||||
|
|
||||||
|
|
||||||
|
class VoiceTurnUploadAcceptedResponse(BaseModel):
|
||||||
|
"""Accepted response for one uploaded-audio voice turn."""
|
||||||
|
|
||||||
|
turn_id: str
|
||||||
|
session_id: str
|
||||||
|
status: str
|
||||||
|
transcription_provider: str | None = None
|
||||||
|
|
||||||
|
|
||||||
class VoiceSessionFinalizeRequest(BaseModel):
|
class VoiceSessionFinalizeRequest(BaseModel):
|
||||||
"""Finalize one voice session into a persisted story."""
|
"""Finalize one voice session into a persisted story."""
|
||||||
|
|
||||||
@@ -61,11 +70,14 @@ class VoiceTurnSummaryResponse(BaseModel):
|
|||||||
status: str
|
status: str
|
||||||
user_transcript: str | None = None
|
user_transcript: str | None = None
|
||||||
transcript_confidence: float | None = None
|
transcript_confidence: float | None = None
|
||||||
|
transcription_provider: str | None = None
|
||||||
detected_intent: str
|
detected_intent: str
|
||||||
intent_confidence: float | None = None
|
intent_confidence: float | None = None
|
||||||
assistant_text: str | None = None
|
assistant_text: str | None = None
|
||||||
assistant_audio_ready: bool = False
|
assistant_audio_ready: bool = False
|
||||||
assistant_audio_url: str | None = None
|
assistant_audio_url: str | None = None
|
||||||
|
user_audio_ready: bool = False
|
||||||
|
user_audio_url: str | None = None
|
||||||
error_message: str | None = None
|
error_message: str | None = None
|
||||||
created_at: datetime
|
created_at: datetime
|
||||||
updated_at: datetime
|
updated_at: datetime
|
||||||
@@ -81,10 +93,14 @@ class VoiceSessionSummaryResponse(BaseModel):
|
|||||||
target_mode: str
|
target_mode: str
|
||||||
status: str
|
status: str
|
||||||
current_turn_index: int
|
current_turn_index: int
|
||||||
|
total_turns: int = 0
|
||||||
working_title: str | None = None
|
working_title: str | None = None
|
||||||
story_state: dict[str, Any] = Field(default_factory=dict)
|
story_state: dict[str, Any] = Field(default_factory=dict)
|
||||||
latest_user_transcript: str | None = None
|
latest_user_transcript: str | None = None
|
||||||
latest_assistant_text: str | None = None
|
latest_assistant_text: str | None = None
|
||||||
|
latest_detected_intent: str | None = None
|
||||||
|
latest_assistant_audio_ready: bool = False
|
||||||
|
last_turn_status: str | None = None
|
||||||
can_continue: bool = False
|
can_continue: bool = False
|
||||||
can_finalize: bool = False
|
can_finalize: bool = False
|
||||||
last_error: str | None = None
|
last_error: str | None = None
|
||||||
|
|||||||
@@ -2,6 +2,7 @@
|
|||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from datetime import datetime, timezone
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
from fastapi import HTTPException
|
from fastapi import HTTPException
|
||||||
@@ -20,24 +21,23 @@ from app.schemas.voice_session_schemas import (
|
|||||||
VoiceTurnAcceptedResponse,
|
VoiceTurnAcceptedResponse,
|
||||||
VoiceTurnCreateFallbackRequest,
|
VoiceTurnCreateFallbackRequest,
|
||||||
VoiceTurnSummaryResponse,
|
VoiceTurnSummaryResponse,
|
||||||
|
VoiceTurnUploadAcceptedResponse,
|
||||||
)
|
)
|
||||||
from app.services.adapters.text.models import StoryOutput
|
from app.services.adapters.text.models import StoryOutput
|
||||||
from app.services.memory_service import build_enhanced_memory_context
|
from app.services.memory_service import build_enhanced_memory_context
|
||||||
from app.services.provider_router import generate_story_content, text_to_speech
|
from app.services.provider_router import generate_story_content, text_to_speech
|
||||||
from app.services.story_service import (
|
from app.services.story_service import create_story_from_result, validate_profile_and_universe
|
||||||
create_story_from_result,
|
|
||||||
validate_profile_and_universe,
|
|
||||||
)
|
|
||||||
from app.services.voice_session_storage import (
|
from app.services.voice_session_storage import (
|
||||||
build_turn_assistant_audio_path,
|
build_turn_assistant_audio_path,
|
||||||
read_session_audio,
|
read_session_audio,
|
||||||
session_audio_exists,
|
session_audio_exists,
|
||||||
write_session_audio,
|
write_session_audio,
|
||||||
|
write_uploaded_user_audio,
|
||||||
)
|
)
|
||||||
|
from app.services.voice_transcription_service import transcribe_voice_audio
|
||||||
|
|
||||||
logger = get_logger(__name__)
|
logger = get_logger(__name__)
|
||||||
|
|
||||||
ACTIVE_SESSION_STATUSES = {"draft", "active", "processing_turn", "waiting_user"}
|
|
||||||
CONTINUABLE_SESSION_STATUSES = {"draft", "active", "waiting_user"}
|
CONTINUABLE_SESSION_STATUSES = {"draft", "active", "waiting_user"}
|
||||||
FINAL_SESSION_STATUSES = {"completed", "abandoned"}
|
FINAL_SESSION_STATUSES = {"completed", "abandoned"}
|
||||||
|
|
||||||
@@ -62,13 +62,24 @@ def _session_can_finalize(session: VoiceSession) -> bool:
|
|||||||
return bool(segments) and session.status in {"active", "waiting_user"}
|
return bool(segments) and session.status in {"active", "waiting_user"}
|
||||||
|
|
||||||
|
|
||||||
|
def _utcnow() -> datetime:
|
||||||
|
return datetime.now(timezone.utc)
|
||||||
|
|
||||||
|
|
||||||
def _assistant_audio_url(session_id: str, turn_id: str, audio_path: str | None) -> str | None:
|
def _assistant_audio_url(session_id: str, turn_id: str, audio_path: str | None) -> str | None:
|
||||||
if not session_audio_exists(audio_path):
|
if not session_audio_exists(audio_path):
|
||||||
return None
|
return None
|
||||||
return f"/api/voice-sessions/{session_id}/turns/{turn_id}/audio"
|
return f"/api/voice-sessions/{session_id}/turns/{turn_id}/audio"
|
||||||
|
|
||||||
|
|
||||||
|
def _user_audio_url(session_id: str, turn_id: str, audio_path: str | None) -> str | None:
|
||||||
|
if not session_audio_exists(audio_path):
|
||||||
|
return None
|
||||||
|
return f"/api/voice-sessions/{session_id}/turns/{turn_id}/user-audio"
|
||||||
|
|
||||||
|
|
||||||
def _turn_to_summary(turn: VoiceTurn) -> VoiceTurnSummaryResponse:
|
def _turn_to_summary(turn: VoiceTurn) -> VoiceTurnSummaryResponse:
|
||||||
|
turn_patch = turn.story_patch or {}
|
||||||
return VoiceTurnSummaryResponse(
|
return VoiceTurnSummaryResponse(
|
||||||
id=turn.id,
|
id=turn.id,
|
||||||
session_id=turn.session_id,
|
session_id=turn.session_id,
|
||||||
@@ -76,6 +87,7 @@ def _turn_to_summary(turn: VoiceTurn) -> VoiceTurnSummaryResponse:
|
|||||||
status=turn.status,
|
status=turn.status,
|
||||||
user_transcript=turn.user_transcript,
|
user_transcript=turn.user_transcript,
|
||||||
transcript_confidence=turn.transcript_confidence,
|
transcript_confidence=turn.transcript_confidence,
|
||||||
|
transcription_provider=turn_patch.get("transcription_provider"),
|
||||||
detected_intent=turn.detected_intent,
|
detected_intent=turn.detected_intent,
|
||||||
intent_confidence=turn.intent_confidence,
|
intent_confidence=turn.intent_confidence,
|
||||||
assistant_text=turn.assistant_text,
|
assistant_text=turn.assistant_text,
|
||||||
@@ -85,13 +97,25 @@ def _turn_to_summary(turn: VoiceTurn) -> VoiceTurnSummaryResponse:
|
|||||||
turn.id,
|
turn.id,
|
||||||
turn.assistant_audio_path,
|
turn.assistant_audio_path,
|
||||||
),
|
),
|
||||||
|
user_audio_ready=session_audio_exists(turn.user_audio_path),
|
||||||
|
user_audio_url=_user_audio_url(turn.session_id, turn.id, turn.user_audio_path),
|
||||||
error_message=turn.error_message,
|
error_message=turn.error_message,
|
||||||
created_at=turn.created_at,
|
created_at=turn.created_at,
|
||||||
updated_at=turn.updated_at,
|
updated_at=turn.updated_at,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def _session_to_summary(session: VoiceSession) -> VoiceSessionSummaryResponse:
|
def _session_to_summary(
|
||||||
|
session: VoiceSession,
|
||||||
|
*,
|
||||||
|
latest_turn: VoiceTurn | None = None,
|
||||||
|
total_turns: int | None = None,
|
||||||
|
) -> VoiceSessionSummaryResponse:
|
||||||
|
if latest_turn is None:
|
||||||
|
total_turns = total_turns if total_turns is not None else session.current_turn_index
|
||||||
|
else:
|
||||||
|
total_turns = total_turns if total_turns is not None else latest_turn.turn_index
|
||||||
|
|
||||||
return VoiceSessionSummaryResponse(
|
return VoiceSessionSummaryResponse(
|
||||||
id=session.id,
|
id=session.id,
|
||||||
child_profile_id=session.child_profile_id,
|
child_profile_id=session.child_profile_id,
|
||||||
@@ -100,10 +124,16 @@ def _session_to_summary(session: VoiceSession) -> VoiceSessionSummaryResponse:
|
|||||||
target_mode=session.target_mode,
|
target_mode=session.target_mode,
|
||||||
status=session.status,
|
status=session.status,
|
||||||
current_turn_index=session.current_turn_index,
|
current_turn_index=session.current_turn_index,
|
||||||
|
total_turns=total_turns or 0,
|
||||||
working_title=session.working_title,
|
working_title=session.working_title,
|
||||||
story_state=session.story_state or {},
|
story_state=session.story_state or {},
|
||||||
latest_user_transcript=session.latest_user_transcript,
|
latest_user_transcript=session.latest_user_transcript,
|
||||||
latest_assistant_text=session.latest_assistant_text,
|
latest_assistant_text=session.latest_assistant_text,
|
||||||
|
latest_detected_intent=latest_turn.detected_intent if latest_turn else None,
|
||||||
|
latest_assistant_audio_ready=(
|
||||||
|
session_audio_exists(latest_turn.assistant_audio_path) if latest_turn else False
|
||||||
|
),
|
||||||
|
last_turn_status=latest_turn.status if latest_turn else None,
|
||||||
can_continue=_session_can_continue(session),
|
can_continue=_session_can_continue(session),
|
||||||
can_finalize=_session_can_finalize(session),
|
can_finalize=_session_can_finalize(session),
|
||||||
last_error=session.last_error,
|
last_error=session.last_error,
|
||||||
@@ -154,6 +184,20 @@ async def _get_owned_session(
|
|||||||
return session
|
return session
|
||||||
|
|
||||||
|
|
||||||
|
async def _get_latest_turn(
|
||||||
|
db: AsyncSession,
|
||||||
|
*,
|
||||||
|
session_id: str,
|
||||||
|
) -> VoiceTurn | None:
|
||||||
|
result = await db.execute(
|
||||||
|
select(VoiceTurn)
|
||||||
|
.where(VoiceTurn.session_id == session_id)
|
||||||
|
.order_by(desc(VoiceTurn.turn_index))
|
||||||
|
.limit(1)
|
||||||
|
)
|
||||||
|
return result.scalar_one_or_none()
|
||||||
|
|
||||||
|
|
||||||
async def _get_owned_turn(
|
async def _get_owned_turn(
|
||||||
db: AsyncSession,
|
db: AsyncSession,
|
||||||
*,
|
*,
|
||||||
@@ -307,6 +351,283 @@ def _merge_story_state(
|
|||||||
return current_state, patch
|
return current_state, patch
|
||||||
|
|
||||||
|
|
||||||
|
async def _create_pending_turn(
|
||||||
|
db: AsyncSession,
|
||||||
|
*,
|
||||||
|
session: VoiceSession,
|
||||||
|
transcript_text: str,
|
||||||
|
transcript_confidence: float | None,
|
||||||
|
transcription_provider: str | None,
|
||||||
|
user_audio_path: str | None = None,
|
||||||
|
user_audio_mime_type: str | None = None,
|
||||||
|
user_audio_duration_ms: int | None = None,
|
||||||
|
) -> tuple[VoiceSession, VoiceTurn]:
|
||||||
|
if session.status not in CONTINUABLE_SESSION_STATUSES:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=409,
|
||||||
|
detail="Voice session is not ready for another turn.",
|
||||||
|
)
|
||||||
|
|
||||||
|
next_turn_index = session.current_turn_index + 1
|
||||||
|
detected_intent, intent_confidence = _detect_intent(
|
||||||
|
transcript_text,
|
||||||
|
current_turn_index=session.current_turn_index,
|
||||||
|
)
|
||||||
|
turn = VoiceTurn(
|
||||||
|
session_id=session.id,
|
||||||
|
turn_index=next_turn_index,
|
||||||
|
status="transcribing",
|
||||||
|
user_audio_path=user_audio_path,
|
||||||
|
user_audio_mime_type=user_audio_mime_type,
|
||||||
|
user_audio_duration_ms=user_audio_duration_ms,
|
||||||
|
user_transcript=transcript_text,
|
||||||
|
transcript_confidence=transcript_confidence,
|
||||||
|
detected_intent=detected_intent,
|
||||||
|
intent_confidence=intent_confidence,
|
||||||
|
story_patch={"transcription_provider": transcription_provider},
|
||||||
|
)
|
||||||
|
session.status = "processing_turn"
|
||||||
|
session.current_turn_index = next_turn_index
|
||||||
|
session.latest_user_transcript = transcript_text
|
||||||
|
session.last_error = None
|
||||||
|
session.updated_at = _utcnow()
|
||||||
|
db.add(turn)
|
||||||
|
await db.commit()
|
||||||
|
await db.refresh(session)
|
||||||
|
await db.refresh(turn)
|
||||||
|
|
||||||
|
await _record_session_event(
|
||||||
|
db,
|
||||||
|
session_id=session.id,
|
||||||
|
turn_id=turn.id,
|
||||||
|
event_type="turn_received",
|
||||||
|
status="received",
|
||||||
|
message="Voice turn received.",
|
||||||
|
metadata={
|
||||||
|
"turn_index": turn.turn_index,
|
||||||
|
"has_user_audio": bool(user_audio_path),
|
||||||
|
"transcription_provider": transcription_provider,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
if user_audio_path:
|
||||||
|
await _record_session_event(
|
||||||
|
db,
|
||||||
|
session_id=session.id,
|
||||||
|
turn_id=turn.id,
|
||||||
|
event_type="turn_audio_uploaded",
|
||||||
|
status="succeeded",
|
||||||
|
message="User audio uploaded for one voice turn.",
|
||||||
|
metadata={
|
||||||
|
"mime_type": user_audio_mime_type,
|
||||||
|
"audio_path": user_audio_path,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
await _record_session_event(
|
||||||
|
db,
|
||||||
|
session_id=session.id,
|
||||||
|
turn_id=turn.id,
|
||||||
|
event_type="turn_transcribed",
|
||||||
|
status="succeeded",
|
||||||
|
message="Voice turn transcript is available.",
|
||||||
|
metadata={
|
||||||
|
"transcript_confidence": transcript_confidence,
|
||||||
|
"transcription_provider": transcription_provider,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
return session, turn
|
||||||
|
|
||||||
|
|
||||||
|
async def _process_pending_turn(
|
||||||
|
db: AsyncSession,
|
||||||
|
*,
|
||||||
|
session: VoiceSession,
|
||||||
|
turn: VoiceTurn,
|
||||||
|
transcript_text: str,
|
||||||
|
user_id: str,
|
||||||
|
) -> str:
|
||||||
|
assistant_text: str | None = None
|
||||||
|
assistant_result: StoryOutput | None = None
|
||||||
|
detected_intent = turn.detected_intent
|
||||||
|
intent_confidence = turn.intent_confidence
|
||||||
|
|
||||||
|
try:
|
||||||
|
await _record_session_event(
|
||||||
|
db,
|
||||||
|
session_id=session.id,
|
||||||
|
turn_id=turn.id,
|
||||||
|
event_type="intent_resolved",
|
||||||
|
status="succeeded",
|
||||||
|
message="Turn intent resolved.",
|
||||||
|
metadata={
|
||||||
|
"detected_intent": detected_intent,
|
||||||
|
"intent_confidence": intent_confidence,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
if detected_intent == "save_story":
|
||||||
|
assistant_text = "好的,这个故事已经准备好保存到故事库了。"
|
||||||
|
elif detected_intent == "end_story":
|
||||||
|
assistant_text = "好的,我们先把故事停在这里。想保存的话,现在就可以保存到故事库。"
|
||||||
|
else:
|
||||||
|
assistant_result = await _generate_assistant_turn(
|
||||||
|
db,
|
||||||
|
session=session,
|
||||||
|
transcript_text=transcript_text,
|
||||||
|
intent=detected_intent,
|
||||||
|
)
|
||||||
|
assistant_text = assistant_result.story_text.strip()
|
||||||
|
|
||||||
|
merged_state, story_patch = _merge_story_state(
|
||||||
|
session,
|
||||||
|
transcript_text=transcript_text,
|
||||||
|
intent=detected_intent,
|
||||||
|
assistant_result=assistant_result,
|
||||||
|
)
|
||||||
|
story_patch["transcription_provider"] = (
|
||||||
|
(turn.story_patch or {}).get("transcription_provider")
|
||||||
|
)
|
||||||
|
turn.story_patch = story_patch
|
||||||
|
turn.assistant_text = assistant_text
|
||||||
|
turn.status = "narrative_ready"
|
||||||
|
session.story_state = merged_state
|
||||||
|
session.latest_assistant_text = assistant_text
|
||||||
|
session.status = "waiting_user"
|
||||||
|
session.updated_at = _utcnow()
|
||||||
|
if assistant_result and assistant_result.title and not session.working_title:
|
||||||
|
session.working_title = assistant_result.title
|
||||||
|
await db.commit()
|
||||||
|
await db.refresh(session)
|
||||||
|
await db.refresh(turn)
|
||||||
|
|
||||||
|
await _record_session_event(
|
||||||
|
db,
|
||||||
|
session_id=session.id,
|
||||||
|
turn_id=turn.id,
|
||||||
|
event_type="story_patch_applied",
|
||||||
|
status="succeeded",
|
||||||
|
message="Story state updated after one turn.",
|
||||||
|
metadata=story_patch,
|
||||||
|
)
|
||||||
|
await _record_session_event(
|
||||||
|
db,
|
||||||
|
session_id=session.id,
|
||||||
|
turn_id=turn.id,
|
||||||
|
event_type="assistant_text_ready",
|
||||||
|
status="succeeded",
|
||||||
|
message="Assistant text response generated.",
|
||||||
|
metadata={
|
||||||
|
"assistant_text_length": len(assistant_text or ""),
|
||||||
|
"working_title": session.working_title,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
except Exception as exc:
|
||||||
|
turn.status = "failed"
|
||||||
|
turn.error_message = str(exc)
|
||||||
|
session.status = "waiting_user"
|
||||||
|
session.last_error = str(exc)
|
||||||
|
session.updated_at = _utcnow()
|
||||||
|
await db.commit()
|
||||||
|
await db.refresh(session)
|
||||||
|
await db.refresh(turn)
|
||||||
|
await _record_session_event(
|
||||||
|
db,
|
||||||
|
session_id=session.id,
|
||||||
|
turn_id=turn.id,
|
||||||
|
event_type="session_failed",
|
||||||
|
status="failed",
|
||||||
|
message="Assistant narrative generation failed for one voice turn.",
|
||||||
|
metadata={"error": str(exc), "turn_index": turn.turn_index},
|
||||||
|
)
|
||||||
|
logger.warning(
|
||||||
|
"voice_turn_generation_failed",
|
||||||
|
session_id=session.id,
|
||||||
|
turn_id=turn.id,
|
||||||
|
error=str(exc),
|
||||||
|
)
|
||||||
|
return turn.status
|
||||||
|
|
||||||
|
if assistant_text:
|
||||||
|
try:
|
||||||
|
audio_bytes = await text_to_speech(
|
||||||
|
assistant_text,
|
||||||
|
db=db,
|
||||||
|
user_id=user_id,
|
||||||
|
)
|
||||||
|
saved_path = write_session_audio(
|
||||||
|
build_turn_assistant_audio_path(session.id, turn.turn_index),
|
||||||
|
audio_bytes,
|
||||||
|
)
|
||||||
|
turn.assistant_audio_path = saved_path
|
||||||
|
turn.assistant_audio_duration_ms = None
|
||||||
|
turn.status = "audio_ready"
|
||||||
|
await db.commit()
|
||||||
|
await db.refresh(turn)
|
||||||
|
await _record_session_event(
|
||||||
|
db,
|
||||||
|
session_id=session.id,
|
||||||
|
turn_id=turn.id,
|
||||||
|
event_type="assistant_audio_ready",
|
||||||
|
status="succeeded",
|
||||||
|
message="Assistant audio response generated.",
|
||||||
|
metadata={"audio_path": saved_path},
|
||||||
|
)
|
||||||
|
except Exception as exc:
|
||||||
|
turn.status = "narrative_ready"
|
||||||
|
turn.error_message = None
|
||||||
|
session.last_error = None
|
||||||
|
session.updated_at = _utcnow()
|
||||||
|
await db.commit()
|
||||||
|
await db.refresh(turn)
|
||||||
|
await db.refresh(session)
|
||||||
|
await _record_session_event(
|
||||||
|
db,
|
||||||
|
session_id=session.id,
|
||||||
|
turn_id=turn.id,
|
||||||
|
event_type="assistant_audio_failed",
|
||||||
|
status="failed",
|
||||||
|
message="Assistant audio generation failed, text response kept.",
|
||||||
|
metadata={"error": str(exc)},
|
||||||
|
)
|
||||||
|
logger.warning(
|
||||||
|
"voice_turn_audio_failed",
|
||||||
|
session_id=session.id,
|
||||||
|
turn_id=turn.id,
|
||||||
|
error=str(exc),
|
||||||
|
)
|
||||||
|
|
||||||
|
return turn.status
|
||||||
|
|
||||||
|
|
||||||
|
async def list_voice_sessions_service(
|
||||||
|
user_id: str,
|
||||||
|
db: AsyncSession,
|
||||||
|
*,
|
||||||
|
limit: int = 8,
|
||||||
|
active_only: bool = False,
|
||||||
|
) -> list[VoiceSessionSummaryResponse]:
|
||||||
|
query = (
|
||||||
|
select(VoiceSession)
|
||||||
|
.where(VoiceSession.user_id == user_id)
|
||||||
|
.order_by(desc(VoiceSession.updated_at), desc(VoiceSession.created_at))
|
||||||
|
.limit(limit)
|
||||||
|
)
|
||||||
|
if active_only:
|
||||||
|
query = query.where(VoiceSession.status.in_(CONTINUABLE_SESSION_STATUSES))
|
||||||
|
|
||||||
|
sessions = (await db.execute(query)).scalars().all()
|
||||||
|
summaries: list[VoiceSessionSummaryResponse] = []
|
||||||
|
for session in sessions:
|
||||||
|
latest_turn = await _get_latest_turn(db, session_id=session.id)
|
||||||
|
summaries.append(
|
||||||
|
_session_to_summary(
|
||||||
|
session,
|
||||||
|
latest_turn=latest_turn,
|
||||||
|
total_turns=session.current_turn_index,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
return summaries
|
||||||
|
|
||||||
|
|
||||||
async def create_voice_session_service(
|
async def create_voice_session_service(
|
||||||
request: VoiceSessionCreateRequest,
|
request: VoiceSessionCreateRequest,
|
||||||
user_id: str,
|
user_id: str,
|
||||||
@@ -373,7 +694,12 @@ async def get_voice_session_detail_service(
|
|||||||
).scalars().all()
|
).scalars().all()
|
||||||
events = list(reversed(events))
|
events = list(reversed(events))
|
||||||
|
|
||||||
summary = _session_to_summary(session)
|
latest_turn = turns[-1] if turns else None
|
||||||
|
summary = _session_to_summary(
|
||||||
|
session,
|
||||||
|
latest_turn=latest_turn,
|
||||||
|
total_turns=session.current_turn_index,
|
||||||
|
)
|
||||||
return VoiceSessionDetailResponse(
|
return VoiceSessionDetailResponse(
|
||||||
**summary.model_dump(),
|
**summary.model_dump(),
|
||||||
recent_turns=[_turn_to_summary(turn) for turn in turns],
|
recent_turns=[_turn_to_summary(turn) for turn in turns],
|
||||||
@@ -399,208 +725,83 @@ async def create_voice_turn_from_text_service(
|
|||||||
user_id: str,
|
user_id: str,
|
||||||
db: AsyncSession,
|
db: AsyncSession,
|
||||||
) -> VoiceTurnAcceptedResponse:
|
) -> VoiceTurnAcceptedResponse:
|
||||||
|
session = await _get_owned_session(db, session_id=session_id, user_id=user_id)
|
||||||
|
transcript_text = request.transcript_text.strip()
|
||||||
|
session, turn = await _create_pending_turn(
|
||||||
|
db,
|
||||||
|
session=session,
|
||||||
|
transcript_text=transcript_text,
|
||||||
|
transcript_confidence=1.0,
|
||||||
|
transcription_provider="fallback",
|
||||||
|
user_audio_duration_ms=request.duration_ms,
|
||||||
|
)
|
||||||
|
status = await _process_pending_turn(
|
||||||
|
db,
|
||||||
|
session=session,
|
||||||
|
turn=turn,
|
||||||
|
transcript_text=transcript_text,
|
||||||
|
user_id=user_id,
|
||||||
|
)
|
||||||
|
return VoiceTurnAcceptedResponse(
|
||||||
|
turn_id=turn.id,
|
||||||
|
session_id=session.id,
|
||||||
|
status=status,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
async def create_voice_turn_from_upload_service(
|
||||||
|
*,
|
||||||
|
session_id: str,
|
||||||
|
user_id: str,
|
||||||
|
audio_bytes: bytes,
|
||||||
|
file_name: str,
|
||||||
|
mime_type: str | None,
|
||||||
|
duration_ms: int | None,
|
||||||
|
transcript_hint: str | None,
|
||||||
|
db: AsyncSession,
|
||||||
|
) -> VoiceTurnUploadAcceptedResponse:
|
||||||
session = await _get_owned_session(db, session_id=session_id, user_id=user_id)
|
session = await _get_owned_session(db, session_id=session_id, user_id=user_id)
|
||||||
if session.status not in CONTINUABLE_SESSION_STATUSES:
|
if session.status not in CONTINUABLE_SESSION_STATUSES:
|
||||||
raise HTTPException(
|
raise HTTPException(
|
||||||
status_code=409,
|
status_code=409,
|
||||||
detail="Voice session is not ready for another turn.",
|
detail="Voice session is not ready for another turn.",
|
||||||
)
|
)
|
||||||
|
|
||||||
transcript_text = request.transcript_text.strip()
|
|
||||||
next_turn_index = session.current_turn_index + 1
|
next_turn_index = session.current_turn_index + 1
|
||||||
detected_intent, intent_confidence = _detect_intent(
|
user_audio_path = write_uploaded_user_audio(
|
||||||
transcript_text,
|
|
||||||
current_turn_index=session.current_turn_index,
|
|
||||||
)
|
|
||||||
|
|
||||||
turn = VoiceTurn(
|
|
||||||
session_id=session.id,
|
session_id=session.id,
|
||||||
turn_index=next_turn_index,
|
turn_index=next_turn_index,
|
||||||
status="transcribing",
|
file_name=file_name,
|
||||||
user_audio_duration_ms=request.duration_ms,
|
mime_type=mime_type,
|
||||||
user_transcript=transcript_text,
|
audio_data=audio_bytes,
|
||||||
transcript_confidence=1.0,
|
|
||||||
detected_intent=detected_intent,
|
|
||||||
intent_confidence=intent_confidence,
|
|
||||||
)
|
)
|
||||||
session.status = "processing_turn"
|
transcription = await transcribe_voice_audio(
|
||||||
session.current_turn_index = next_turn_index
|
audio_bytes=audio_bytes,
|
||||||
session.latest_user_transcript = transcript_text
|
file_name=file_name,
|
||||||
session.last_error = None
|
mime_type=mime_type,
|
||||||
db.add(turn)
|
transcript_hint=transcript_hint,
|
||||||
await db.commit()
|
)
|
||||||
await db.refresh(session)
|
session, turn = await _create_pending_turn(
|
||||||
await db.refresh(turn)
|
|
||||||
|
|
||||||
await _record_session_event(
|
|
||||||
db,
|
db,
|
||||||
session_id=session.id,
|
session=session,
|
||||||
turn_id=turn.id,
|
transcript_text=transcription.transcript_text,
|
||||||
event_type="turn_received",
|
transcript_confidence=transcription.confidence,
|
||||||
status="received",
|
transcription_provider=transcription.provider,
|
||||||
message="Voice turn fallback text received.",
|
user_audio_path=user_audio_path,
|
||||||
metadata={"turn_index": turn.turn_index},
|
user_audio_mime_type=mime_type,
|
||||||
|
user_audio_duration_ms=duration_ms,
|
||||||
)
|
)
|
||||||
await _record_session_event(
|
status = await _process_pending_turn(
|
||||||
db,
|
db,
|
||||||
session_id=session.id,
|
session=session,
|
||||||
turn_id=turn.id,
|
turn=turn,
|
||||||
event_type="turn_transcribed",
|
transcript_text=transcription.transcript_text,
|
||||||
status="succeeded",
|
user_id=user_id,
|
||||||
message="Fallback transcript accepted.",
|
|
||||||
metadata={"transcript_confidence": turn.transcript_confidence},
|
|
||||||
)
|
)
|
||||||
|
return VoiceTurnUploadAcceptedResponse(
|
||||||
assistant_text: str | None = None
|
|
||||||
assistant_result: StoryOutput | None = None
|
|
||||||
|
|
||||||
try:
|
|
||||||
await _record_session_event(
|
|
||||||
db,
|
|
||||||
session_id=session.id,
|
|
||||||
turn_id=turn.id,
|
|
||||||
event_type="intent_resolved",
|
|
||||||
status="succeeded",
|
|
||||||
message="Turn intent resolved.",
|
|
||||||
metadata={
|
|
||||||
"detected_intent": detected_intent,
|
|
||||||
"intent_confidence": intent_confidence,
|
|
||||||
},
|
|
||||||
)
|
|
||||||
|
|
||||||
if detected_intent == "save_story":
|
|
||||||
assistant_text = "好的,这个故事已经准备好保存到故事库了。"
|
|
||||||
elif detected_intent == "end_story":
|
|
||||||
assistant_text = "好的,我们先把故事停在这里。想保存的话,现在就可以保存到故事库。"
|
|
||||||
else:
|
|
||||||
assistant_result = await _generate_assistant_turn(
|
|
||||||
db,
|
|
||||||
session=session,
|
|
||||||
transcript_text=transcript_text,
|
|
||||||
intent=detected_intent,
|
|
||||||
)
|
|
||||||
assistant_text = assistant_result.story_text.strip()
|
|
||||||
|
|
||||||
merged_state, story_patch = _merge_story_state(
|
|
||||||
session,
|
|
||||||
transcript_text=transcript_text,
|
|
||||||
intent=detected_intent,
|
|
||||||
assistant_result=assistant_result,
|
|
||||||
)
|
|
||||||
turn.story_patch = story_patch
|
|
||||||
turn.assistant_text = assistant_text
|
|
||||||
turn.status = "narrative_ready"
|
|
||||||
session.story_state = merged_state
|
|
||||||
session.latest_assistant_text = assistant_text
|
|
||||||
session.status = "waiting_user"
|
|
||||||
if assistant_result and assistant_result.title and not session.working_title:
|
|
||||||
session.working_title = assistant_result.title
|
|
||||||
await db.commit()
|
|
||||||
await db.refresh(session)
|
|
||||||
await db.refresh(turn)
|
|
||||||
|
|
||||||
await _record_session_event(
|
|
||||||
db,
|
|
||||||
session_id=session.id,
|
|
||||||
turn_id=turn.id,
|
|
||||||
event_type="story_patch_applied",
|
|
||||||
status="succeeded",
|
|
||||||
message="Story state updated after one turn.",
|
|
||||||
metadata=story_patch,
|
|
||||||
)
|
|
||||||
await _record_session_event(
|
|
||||||
db,
|
|
||||||
session_id=session.id,
|
|
||||||
turn_id=turn.id,
|
|
||||||
event_type="assistant_text_ready",
|
|
||||||
status="succeeded",
|
|
||||||
message="Assistant text response generated.",
|
|
||||||
metadata={
|
|
||||||
"assistant_text_length": len(assistant_text or ""),
|
|
||||||
"working_title": session.working_title,
|
|
||||||
},
|
|
||||||
)
|
|
||||||
except Exception as exc:
|
|
||||||
turn.status = "failed"
|
|
||||||
turn.error_message = str(exc)
|
|
||||||
session.status = "waiting_user"
|
|
||||||
session.last_error = str(exc)
|
|
||||||
await db.commit()
|
|
||||||
await db.refresh(session)
|
|
||||||
await db.refresh(turn)
|
|
||||||
await _record_session_event(
|
|
||||||
db,
|
|
||||||
session_id=session.id,
|
|
||||||
turn_id=turn.id,
|
|
||||||
event_type="session_failed",
|
|
||||||
status="failed",
|
|
||||||
message="Assistant narrative generation failed for one voice turn.",
|
|
||||||
metadata={"error": str(exc), "turn_index": turn.turn_index},
|
|
||||||
)
|
|
||||||
logger.warning(
|
|
||||||
"voice_turn_generation_failed",
|
|
||||||
session_id=session.id,
|
|
||||||
turn_id=turn.id,
|
|
||||||
error=str(exc),
|
|
||||||
)
|
|
||||||
return VoiceTurnAcceptedResponse(
|
|
||||||
turn_id=turn.id,
|
|
||||||
session_id=session.id,
|
|
||||||
status=turn.status,
|
|
||||||
)
|
|
||||||
|
|
||||||
if assistant_text:
|
|
||||||
try:
|
|
||||||
audio_bytes = await text_to_speech(
|
|
||||||
assistant_text,
|
|
||||||
db=db,
|
|
||||||
user_id=user_id,
|
|
||||||
)
|
|
||||||
saved_path = write_session_audio(
|
|
||||||
build_turn_assistant_audio_path(session.id, turn.turn_index),
|
|
||||||
audio_bytes,
|
|
||||||
)
|
|
||||||
turn.assistant_audio_path = saved_path
|
|
||||||
turn.assistant_audio_duration_ms = None
|
|
||||||
turn.status = "audio_ready"
|
|
||||||
await db.commit()
|
|
||||||
await db.refresh(turn)
|
|
||||||
await _record_session_event(
|
|
||||||
db,
|
|
||||||
session_id=session.id,
|
|
||||||
turn_id=turn.id,
|
|
||||||
event_type="assistant_audio_ready",
|
|
||||||
status="succeeded",
|
|
||||||
message="Assistant audio response generated.",
|
|
||||||
metadata={"audio_path": saved_path},
|
|
||||||
)
|
|
||||||
except Exception as exc:
|
|
||||||
turn.status = "narrative_ready"
|
|
||||||
turn.error_message = None
|
|
||||||
session.last_error = None
|
|
||||||
await db.commit()
|
|
||||||
await db.refresh(turn)
|
|
||||||
await db.refresh(session)
|
|
||||||
await _record_session_event(
|
|
||||||
db,
|
|
||||||
session_id=session.id,
|
|
||||||
turn_id=turn.id,
|
|
||||||
event_type="assistant_audio_failed",
|
|
||||||
status="failed",
|
|
||||||
message="Assistant audio generation failed, text response kept.",
|
|
||||||
metadata={"error": str(exc)},
|
|
||||||
)
|
|
||||||
logger.warning(
|
|
||||||
"voice_turn_audio_failed",
|
|
||||||
session_id=session.id,
|
|
||||||
turn_id=turn.id,
|
|
||||||
error=str(exc),
|
|
||||||
)
|
|
||||||
|
|
||||||
return VoiceTurnAcceptedResponse(
|
|
||||||
turn_id=turn.id,
|
turn_id=turn.id,
|
||||||
session_id=session.id,
|
session_id=session.id,
|
||||||
status=turn.status,
|
status=status,
|
||||||
|
transcription_provider=transcription.provider,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -636,6 +837,23 @@ async def get_voice_turn_audio_service(
|
|||||||
return read_session_audio(turn.assistant_audio_path)
|
return read_session_audio(turn.assistant_audio_path)
|
||||||
|
|
||||||
|
|
||||||
|
async def get_voice_turn_user_audio_service(
|
||||||
|
session_id: str,
|
||||||
|
turn_id: str,
|
||||||
|
user_id: str,
|
||||||
|
db: AsyncSession,
|
||||||
|
) -> tuple[bytes, str]:
|
||||||
|
turn = await _get_owned_turn(
|
||||||
|
db,
|
||||||
|
session_id=session_id,
|
||||||
|
turn_id=turn_id,
|
||||||
|
user_id=user_id,
|
||||||
|
)
|
||||||
|
if not session_audio_exists(turn.user_audio_path):
|
||||||
|
raise HTTPException(status_code=404, detail="Uploaded user audio not found")
|
||||||
|
return read_session_audio(turn.user_audio_path), (turn.user_audio_mime_type or "audio/webm")
|
||||||
|
|
||||||
|
|
||||||
async def finalize_voice_session_service(
|
async def finalize_voice_session_service(
|
||||||
session_id: str,
|
session_id: str,
|
||||||
request: VoiceSessionFinalizeRequest,
|
request: VoiceSessionFinalizeRequest,
|
||||||
@@ -655,6 +873,7 @@ async def finalize_voice_session_service(
|
|||||||
raise HTTPException(status_code=409, detail="Voice session is not ready to finalize.")
|
raise HTTPException(status_code=409, detail="Voice session is not ready to finalize.")
|
||||||
|
|
||||||
session.status = "finalizing_story"
|
session.status = "finalizing_story"
|
||||||
|
session.updated_at = _utcnow()
|
||||||
await db.commit()
|
await db.commit()
|
||||||
await db.refresh(session)
|
await db.refresh(session)
|
||||||
await _record_session_event(
|
await _record_session_event(
|
||||||
@@ -695,6 +914,7 @@ async def finalize_voice_session_service(
|
|||||||
session.final_story_id = story.id
|
session.final_story_id = story.id
|
||||||
session.status = "completed"
|
session.status = "completed"
|
||||||
session.last_error = None
|
session.last_error = None
|
||||||
|
session.updated_at = _utcnow()
|
||||||
await db.commit()
|
await db.commit()
|
||||||
await db.refresh(session)
|
await db.refresh(session)
|
||||||
|
|
||||||
@@ -728,6 +948,7 @@ async def abandon_voice_session_service(
|
|||||||
|
|
||||||
session.status = "abandoned"
|
session.status = "abandoned"
|
||||||
session.last_error = request.reason
|
session.last_error = request.reason
|
||||||
|
session.updated_at = _utcnow()
|
||||||
await db.commit()
|
await db.commit()
|
||||||
await db.refresh(session)
|
await db.refresh(session)
|
||||||
|
|
||||||
@@ -741,4 +962,9 @@ async def abandon_voice_session_service(
|
|||||||
metadata={"reason": request.reason},
|
metadata={"reason": request.reason},
|
||||||
)
|
)
|
||||||
await db.refresh(session)
|
await db.refresh(session)
|
||||||
return _session_to_summary(session)
|
latest_turn = await _get_latest_turn(db, session_id=session.id)
|
||||||
|
return _session_to_summary(
|
||||||
|
session,
|
||||||
|
latest_turn=latest_turn,
|
||||||
|
total_turns=session.current_turn_index,
|
||||||
|
)
|
||||||
|
|||||||
@@ -26,6 +26,38 @@ def build_turn_assistant_audio_path(session_id: str, turn_index: int) -> Path:
|
|||||||
return session_storage_dir(session_id) / f"turn-{turn_index:03d}-assistant.mp3"
|
return session_storage_dir(session_id) / f"turn-{turn_index:03d}-assistant.mp3"
|
||||||
|
|
||||||
|
|
||||||
|
def _normalize_audio_suffix(file_name: str | None, mime_type: str | None) -> str:
|
||||||
|
if file_name and "." in file_name:
|
||||||
|
return file_name.rsplit(".", 1)[-1].lower()
|
||||||
|
|
||||||
|
if mime_type == "audio/webm":
|
||||||
|
return "webm"
|
||||||
|
if mime_type == "audio/wav":
|
||||||
|
return "wav"
|
||||||
|
if mime_type == "audio/mpeg":
|
||||||
|
return "mp3"
|
||||||
|
if mime_type == "audio/mp4":
|
||||||
|
return "m4a"
|
||||||
|
return "bin"
|
||||||
|
|
||||||
|
|
||||||
|
def write_uploaded_user_audio(
|
||||||
|
*,
|
||||||
|
session_id: str,
|
||||||
|
turn_index: int,
|
||||||
|
file_name: str | None,
|
||||||
|
mime_type: str | None,
|
||||||
|
audio_data: bytes,
|
||||||
|
) -> str:
|
||||||
|
"""Persist one uploaded user-audio turn and return the saved file path."""
|
||||||
|
|
||||||
|
suffix = _normalize_audio_suffix(file_name, mime_type)
|
||||||
|
return write_session_audio(
|
||||||
|
build_turn_user_audio_path(session_id, turn_index, suffix),
|
||||||
|
audio_data,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def write_session_audio(path: Path, audio_data: bytes) -> str:
|
def write_session_audio(path: Path, audio_data: bytes) -> str:
|
||||||
"""Persist session audio bytes atomically and return the saved path."""
|
"""Persist session audio bytes atomically and return the saved path."""
|
||||||
|
|
||||||
|
|||||||
134
backend/app/services/voice_transcription_service.py
Normal file
134
backend/app/services/voice_transcription_service.py
Normal file
@@ -0,0 +1,134 @@
|
|||||||
|
"""Voice transcription helpers for co-creation sessions."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from io import BytesIO
|
||||||
|
|
||||||
|
from fastapi import HTTPException
|
||||||
|
from openai import AsyncOpenAI
|
||||||
|
|
||||||
|
from app.core.config import settings
|
||||||
|
from app.core.logging import get_logger
|
||||||
|
|
||||||
|
logger = get_logger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class VoiceTranscriptionResult:
|
||||||
|
"""Normalized transcription result for one uploaded voice turn."""
|
||||||
|
|
||||||
|
transcript_text: str
|
||||||
|
confidence: float | None = None
|
||||||
|
provider: str = "demo"
|
||||||
|
|
||||||
|
|
||||||
|
def _normalize_transcript(transcript_text: str) -> str:
|
||||||
|
return transcript_text.strip()
|
||||||
|
|
||||||
|
|
||||||
|
async def _transcribe_demo(
|
||||||
|
*,
|
||||||
|
audio_bytes: bytes,
|
||||||
|
mime_type: str | None,
|
||||||
|
transcript_hint: str | None,
|
||||||
|
) -> VoiceTranscriptionResult:
|
||||||
|
hint = _normalize_transcript(transcript_hint or "")
|
||||||
|
if hint:
|
||||||
|
return VoiceTranscriptionResult(
|
||||||
|
transcript_text=hint,
|
||||||
|
confidence=1.0,
|
||||||
|
provider="demo",
|
||||||
|
)
|
||||||
|
|
||||||
|
if mime_type and mime_type.startswith("text/"):
|
||||||
|
text = _normalize_transcript(audio_bytes.decode("utf-8", errors="ignore"))
|
||||||
|
if text:
|
||||||
|
return VoiceTranscriptionResult(
|
||||||
|
transcript_text=text,
|
||||||
|
confidence=1.0,
|
||||||
|
provider="demo",
|
||||||
|
)
|
||||||
|
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=503,
|
||||||
|
detail=(
|
||||||
|
"当前环境未配置真实语音转写,请先使用文本共创模式,"
|
||||||
|
"或在开发模式下提供 transcript_hint。"
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
async def _transcribe_openai(
|
||||||
|
*,
|
||||||
|
audio_bytes: bytes,
|
||||||
|
file_name: str,
|
||||||
|
mime_type: str | None,
|
||||||
|
transcript_hint: str | None,
|
||||||
|
) -> VoiceTranscriptionResult:
|
||||||
|
if not settings.openai_api_key:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=503,
|
||||||
|
detail="OPENAI_API_KEY 未配置,无法使用 OpenAI 语音转写。",
|
||||||
|
)
|
||||||
|
|
||||||
|
client = AsyncOpenAI(api_key=settings.openai_api_key)
|
||||||
|
audio_file = BytesIO(audio_bytes)
|
||||||
|
audio_file.name = file_name
|
||||||
|
|
||||||
|
prompt = transcript_hint.strip() if transcript_hint else None
|
||||||
|
|
||||||
|
try:
|
||||||
|
response = await client.audio.transcriptions.create(
|
||||||
|
model=settings.voice_transcription_model,
|
||||||
|
file=audio_file,
|
||||||
|
language=settings.voice_transcription_language,
|
||||||
|
prompt=prompt,
|
||||||
|
)
|
||||||
|
except Exception as exc:
|
||||||
|
logger.warning("voice_transcription_openai_failed", error=str(exc))
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=503,
|
||||||
|
detail="语音转写服务暂时不可用,请稍后重试。",
|
||||||
|
) from exc
|
||||||
|
|
||||||
|
transcript_text = _normalize_transcript(getattr(response, "text", "") or "")
|
||||||
|
if not transcript_text:
|
||||||
|
raise HTTPException(status_code=502, detail="语音转写结果为空,请重试。")
|
||||||
|
|
||||||
|
return VoiceTranscriptionResult(
|
||||||
|
transcript_text=transcript_text,
|
||||||
|
confidence=None,
|
||||||
|
provider="openai",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
async def transcribe_voice_audio(
|
||||||
|
*,
|
||||||
|
audio_bytes: bytes,
|
||||||
|
file_name: str,
|
||||||
|
mime_type: str | None,
|
||||||
|
transcript_hint: str | None = None,
|
||||||
|
) -> VoiceTranscriptionResult:
|
||||||
|
"""Transcribe one uploaded audio turn according to the configured mode."""
|
||||||
|
|
||||||
|
mode = (settings.voice_transcription_mode or "demo").strip().lower()
|
||||||
|
|
||||||
|
if mode == "disabled":
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=503,
|
||||||
|
detail="当前环境已禁用语音转写,请先使用文本共创模式。",
|
||||||
|
)
|
||||||
|
if mode == "openai":
|
||||||
|
return await _transcribe_openai(
|
||||||
|
audio_bytes=audio_bytes,
|
||||||
|
file_name=file_name,
|
||||||
|
mime_type=mime_type,
|
||||||
|
transcript_hint=transcript_hint,
|
||||||
|
)
|
||||||
|
|
||||||
|
return await _transcribe_demo(
|
||||||
|
audio_bytes=audio_bytes,
|
||||||
|
mime_type=mime_type,
|
||||||
|
transcript_hint=transcript_hint,
|
||||||
|
)
|
||||||
@@ -22,6 +22,7 @@ dependencies = [
|
|||||||
"redis>=5.0.0",
|
"redis>=5.0.0",
|
||||||
"edge-tts>=6.1.0",
|
"edge-tts>=6.1.0",
|
||||||
"openai>=1.0.0",
|
"openai>=1.0.0",
|
||||||
|
"python-multipart>=0.0.9",
|
||||||
]
|
]
|
||||||
|
|
||||||
[project.optional-dependencies]
|
[project.optional-dependencies]
|
||||||
|
|||||||
@@ -199,3 +199,147 @@ async def test_voice_session_abandon_blocks_future_turns(
|
|||||||
assert response.status_code == 409
|
assert response.status_code == 409
|
||||||
finally:
|
finally:
|
||||||
app.dependency_overrides.clear()
|
app.dependency_overrides.clear()
|
||||||
|
|
||||||
|
|
||||||
|
async def test_voice_session_uploaded_audio_turn_uses_demo_transcript_hint(
|
||||||
|
db_session,
|
||||||
|
auth_token,
|
||||||
|
):
|
||||||
|
async def override_get_db():
|
||||||
|
yield db_session
|
||||||
|
|
||||||
|
app.dependency_overrides[get_db] = override_get_db
|
||||||
|
|
||||||
|
with (
|
||||||
|
patch(
|
||||||
|
"app.services.voice_session_service.generate_story_content",
|
||||||
|
new_callable=AsyncMock,
|
||||||
|
) as mock_generate,
|
||||||
|
patch(
|
||||||
|
"app.services.voice_session_service.text_to_speech",
|
||||||
|
new_callable=AsyncMock,
|
||||||
|
) as mock_tts,
|
||||||
|
):
|
||||||
|
mock_generate.return_value = StoryOutput(
|
||||||
|
mode="generated",
|
||||||
|
title="小鲸鱼找朋友",
|
||||||
|
story_text="小鲸鱼在海面上遇见了一只会唱歌的海鸥。",
|
||||||
|
cover_prompt_suggestion="温暖儿童绘本封面,小鲸鱼和海鸥",
|
||||||
|
)
|
||||||
|
mock_tts.return_value = b"fake-upload-audio"
|
||||||
|
|
||||||
|
transport = ASGITransport(app=app)
|
||||||
|
try:
|
||||||
|
async with AsyncClient(transport=transport, base_url="http://test") as client:
|
||||||
|
client.cookies.set("access_token", auth_token)
|
||||||
|
|
||||||
|
response = await client.post("/api/voice-sessions", json={})
|
||||||
|
assert response.status_code == 201
|
||||||
|
session_id = response.json()["id"]
|
||||||
|
|
||||||
|
response = await client.post(
|
||||||
|
f"/api/voice-sessions/{session_id}/turns",
|
||||||
|
files={
|
||||||
|
"audio_file": ("turn.webm", b"fake-webm-audio", "audio/webm"),
|
||||||
|
},
|
||||||
|
data={
|
||||||
|
"duration_ms": "3200",
|
||||||
|
"transcript_hint": "我想听一个小鲸鱼找朋友的故事",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
assert response.status_code == 202
|
||||||
|
turn_data = response.json()
|
||||||
|
assert turn_data["status"] == "audio_ready"
|
||||||
|
assert turn_data["transcription_provider"] == "demo"
|
||||||
|
|
||||||
|
turn_id = turn_data["turn_id"]
|
||||||
|
response = await client.get(
|
||||||
|
f"/api/voice-sessions/{session_id}/turns/{turn_id}"
|
||||||
|
)
|
||||||
|
assert response.status_code == 200
|
||||||
|
detail = response.json()
|
||||||
|
assert detail["user_audio_ready"] is True
|
||||||
|
assert detail["user_audio_url"].endswith("/user-audio")
|
||||||
|
assert detail["transcription_provider"] == "demo"
|
||||||
|
assert detail["assistant_audio_ready"] is True
|
||||||
|
|
||||||
|
response = await client.get(detail["user_audio_url"])
|
||||||
|
assert response.status_code == 200
|
||||||
|
assert response.content == b"fake-webm-audio"
|
||||||
|
assert response.headers["content-type"] == "audio/webm"
|
||||||
|
finally:
|
||||||
|
app.dependency_overrides.clear()
|
||||||
|
|
||||||
|
|
||||||
|
async def test_voice_session_list_orders_recent_sessions_first(
|
||||||
|
db_session,
|
||||||
|
auth_token,
|
||||||
|
):
|
||||||
|
async def override_get_db():
|
||||||
|
yield db_session
|
||||||
|
|
||||||
|
app.dependency_overrides[get_db] = override_get_db
|
||||||
|
|
||||||
|
with (
|
||||||
|
patch(
|
||||||
|
"app.services.voice_session_service.generate_story_content",
|
||||||
|
new_callable=AsyncMock,
|
||||||
|
) as mock_generate,
|
||||||
|
patch(
|
||||||
|
"app.services.voice_session_service.text_to_speech",
|
||||||
|
new_callable=AsyncMock,
|
||||||
|
) as mock_tts,
|
||||||
|
):
|
||||||
|
mock_generate.side_effect = [
|
||||||
|
StoryOutput(
|
||||||
|
mode="generated",
|
||||||
|
title="第一场冒险",
|
||||||
|
story_text="第一段故事。",
|
||||||
|
cover_prompt_suggestion="封面一",
|
||||||
|
),
|
||||||
|
StoryOutput(
|
||||||
|
mode="generated",
|
||||||
|
title="第二场冒险",
|
||||||
|
story_text="第二段故事。",
|
||||||
|
cover_prompt_suggestion="封面二",
|
||||||
|
),
|
||||||
|
]
|
||||||
|
mock_tts.side_effect = [b"audio-1", b"audio-2"]
|
||||||
|
|
||||||
|
transport = ASGITransport(app=app)
|
||||||
|
try:
|
||||||
|
async with AsyncClient(transport=transport, base_url="http://test") as client:
|
||||||
|
client.cookies.set("access_token", auth_token)
|
||||||
|
|
||||||
|
response = await client.post("/api/voice-sessions", json={})
|
||||||
|
first_session_id = response.json()["id"]
|
||||||
|
await client.post(
|
||||||
|
f"/api/voice-sessions/{first_session_id}/turns/fallback",
|
||||||
|
json={"transcript_text": "第一个故事"},
|
||||||
|
)
|
||||||
|
|
||||||
|
response = await client.post("/api/voice-sessions", json={})
|
||||||
|
second_session_id = response.json()["id"]
|
||||||
|
await client.post(
|
||||||
|
f"/api/voice-sessions/{second_session_id}/turns/fallback",
|
||||||
|
json={"transcript_text": "第二个故事"},
|
||||||
|
)
|
||||||
|
|
||||||
|
response = await client.get("/api/voice-sessions?limit=8")
|
||||||
|
assert response.status_code == 200
|
||||||
|
sessions = response.json()
|
||||||
|
assert len(sessions) >= 2
|
||||||
|
assert sessions[0]["id"] == second_session_id
|
||||||
|
assert sessions[1]["id"] == first_session_id
|
||||||
|
assert sessions[0]["total_turns"] == 1
|
||||||
|
assert sessions[0]["last_turn_status"] == "audio_ready"
|
||||||
|
|
||||||
|
response = await client.get("/api/voice-sessions?active_only=true")
|
||||||
|
assert response.status_code == 200
|
||||||
|
active_sessions = response.json()
|
||||||
|
assert {item["id"] for item in active_sessions} >= {
|
||||||
|
first_session_id,
|
||||||
|
second_session_id,
|
||||||
|
}
|
||||||
|
finally:
|
||||||
|
app.dependency_overrides.clear()
|
||||||
|
|||||||
@@ -2,13 +2,16 @@ const BASE_URL = ''
|
|||||||
|
|
||||||
class ApiClient {
|
class ApiClient {
|
||||||
async request<T>(url: string, options: RequestInit = {}): Promise<T> {
|
async request<T>(url: string, options: RequestInit = {}): Promise<T> {
|
||||||
|
const headers = new Headers(options.headers || {})
|
||||||
|
const isFormData = options.body instanceof FormData
|
||||||
|
if (!isFormData && !headers.has('Content-Type')) {
|
||||||
|
headers.set('Content-Type', 'application/json')
|
||||||
|
}
|
||||||
|
|
||||||
const response = await fetch(`${BASE_URL}${url}`, {
|
const response = await fetch(`${BASE_URL}${url}`, {
|
||||||
...options,
|
...options,
|
||||||
credentials: 'include',
|
credentials: 'include',
|
||||||
headers: {
|
headers,
|
||||||
'Content-Type': 'application/json',
|
|
||||||
...options.headers,
|
|
||||||
},
|
|
||||||
})
|
})
|
||||||
|
|
||||||
if (!response.ok) {
|
if (!response.ok) {
|
||||||
@@ -30,6 +33,13 @@ class ApiClient {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
postForm<T>(url: string, data: FormData): Promise<T> {
|
||||||
|
return this.request<T>(url, {
|
||||||
|
method: 'POST',
|
||||||
|
body: data,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
put<T>(url: string, data?: unknown): Promise<T> {
|
put<T>(url: string, data?: unknown): Promise<T> {
|
||||||
return this.request<T>(url, {
|
return this.request<T>(url, {
|
||||||
method: 'PUT',
|
method: 'PUT',
|
||||||
|
|||||||
@@ -7,6 +7,7 @@ import {
|
|||||||
ArrowRightOnRectangleIcon,
|
ArrowRightOnRectangleIcon,
|
||||||
BookOpenIcon,
|
BookOpenIcon,
|
||||||
GlobeAltIcon,
|
GlobeAltIcon,
|
||||||
|
MicrophoneIcon,
|
||||||
MoonIcon,
|
MoonIcon,
|
||||||
SparklesIcon,
|
SparklesIcon,
|
||||||
StarIcon,
|
StarIcon,
|
||||||
@@ -101,6 +102,13 @@ onMounted(() => {
|
|||||||
<UserGroupIcon class="h-5 w-5" />
|
<UserGroupIcon class="h-5 w-5" />
|
||||||
<span>{{ t('app.navProfiles') }}</span>
|
<span>{{ t('app.navProfiles') }}</span>
|
||||||
</router-link>
|
</router-link>
|
||||||
|
<router-link
|
||||||
|
to="/voice-studio"
|
||||||
|
class="hidden sm:flex items-center space-x-2 px-4 py-2 rounded-xl text-gray-600 hover:text-purple-600 hover:bg-purple-50 transition-all duration-300"
|
||||||
|
>
|
||||||
|
<MicrophoneIcon class="h-5 w-5" />
|
||||||
|
<span>语音共创</span>
|
||||||
|
</router-link>
|
||||||
<router-link
|
<router-link
|
||||||
to="/universes"
|
to="/universes"
|
||||||
class="hidden sm:flex items-center space-x-2 px-4 py-2 rounded-xl text-gray-600 hover:text-purple-600 hover:bg-purple-50 transition-all duration-300"
|
class="hidden sm:flex items-center space-x-2 px-4 py-2 rounded-xl text-gray-600 hover:text-purple-600 hover:bg-purple-50 transition-all duration-300"
|
||||||
@@ -169,6 +177,14 @@ onMounted(() => {
|
|||||||
<UserGroupIcon class="h-5 w-5" />
|
<UserGroupIcon class="h-5 w-5" />
|
||||||
<span>{{ t('app.navProfiles') }}</span>
|
<span>{{ t('app.navProfiles') }}</span>
|
||||||
</router-link>
|
</router-link>
|
||||||
|
<router-link
|
||||||
|
to="/voice-studio"
|
||||||
|
class="sm:hidden flex items-center space-x-3 px-4 py-3 text-gray-700 hover:bg-purple-50 transition-colors"
|
||||||
|
@click="showUserMenu = false"
|
||||||
|
>
|
||||||
|
<MicrophoneIcon class="h-5 w-5" />
|
||||||
|
<span>语音共创</span>
|
||||||
|
</router-link>
|
||||||
<router-link
|
<router-link
|
||||||
to="/universes"
|
to="/universes"
|
||||||
class="sm:hidden flex items-center space-x-3 px-4 py-3 text-gray-700 hover:bg-purple-50 transition-colors"
|
class="sm:hidden flex items-center space-x-3 px-4 py-3 text-gray-700 hover:bg-purple-50 transition-colors"
|
||||||
|
|||||||
@@ -13,6 +13,11 @@ const router = createRouter({
|
|||||||
name: 'my-stories',
|
name: 'my-stories',
|
||||||
component: () => import('./views/MyStories.vue'),
|
component: () => import('./views/MyStories.vue'),
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
path: '/voice-studio',
|
||||||
|
name: 'voice-studio',
|
||||||
|
component: () => import('./views/VoiceStudio.vue'),
|
||||||
|
},
|
||||||
{
|
{
|
||||||
path: '/profiles',
|
path: '/profiles',
|
||||||
name: 'profiles',
|
name: 'profiles',
|
||||||
|
|||||||
75
frontend/src/types/voiceSession.ts
Normal file
75
frontend/src/types/voiceSession.ts
Normal file
@@ -0,0 +1,75 @@
|
|||||||
|
export interface VoiceTurnSummary {
|
||||||
|
id: string
|
||||||
|
session_id: string
|
||||||
|
turn_index: number
|
||||||
|
status: string
|
||||||
|
user_transcript: string | null
|
||||||
|
transcript_confidence: number | null
|
||||||
|
transcription_provider: string | null
|
||||||
|
detected_intent: string
|
||||||
|
intent_confidence: number | null
|
||||||
|
assistant_text: string | null
|
||||||
|
assistant_audio_ready: boolean
|
||||||
|
assistant_audio_url: string | null
|
||||||
|
user_audio_ready: boolean
|
||||||
|
user_audio_url: string | null
|
||||||
|
error_message: string | null
|
||||||
|
created_at: string
|
||||||
|
updated_at: string
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface VoiceSessionEvent {
|
||||||
|
id: number
|
||||||
|
session_id: string
|
||||||
|
turn_id: string | null
|
||||||
|
event_type: string
|
||||||
|
status: string
|
||||||
|
message: string | null
|
||||||
|
event_metadata: Record<string, unknown>
|
||||||
|
created_at: string
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface VoiceSessionSummary {
|
||||||
|
id: string
|
||||||
|
child_profile_id: string | null
|
||||||
|
universe_id: string | null
|
||||||
|
final_story_id: number | null
|
||||||
|
target_mode: string
|
||||||
|
status: string
|
||||||
|
current_turn_index: number
|
||||||
|
total_turns: number
|
||||||
|
working_title: string | null
|
||||||
|
story_state: Record<string, unknown>
|
||||||
|
latest_user_transcript: string | null
|
||||||
|
latest_assistant_text: string | null
|
||||||
|
latest_detected_intent: string | null
|
||||||
|
latest_assistant_audio_ready: boolean
|
||||||
|
last_turn_status: string | null
|
||||||
|
can_continue: boolean
|
||||||
|
can_finalize: boolean
|
||||||
|
last_error: string | null
|
||||||
|
created_at: string
|
||||||
|
updated_at: string
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface VoiceSessionDetail extends VoiceSessionSummary {
|
||||||
|
recent_turns: VoiceTurnSummary[]
|
||||||
|
events: VoiceSessionEvent[]
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface VoiceTurnAcceptedResponse {
|
||||||
|
turn_id: string
|
||||||
|
session_id: string
|
||||||
|
status: string
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface VoiceTurnUploadAcceptedResponse extends VoiceTurnAcceptedResponse {
|
||||||
|
transcription_provider: string | null
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface VoiceSessionFinalizeResponse {
|
||||||
|
session_id: string
|
||||||
|
status: string
|
||||||
|
story_id: number | null
|
||||||
|
generation_job_id: string | null
|
||||||
|
}
|
||||||
@@ -7,7 +7,8 @@ import BaseButton from '../components/ui/BaseButton.vue'
|
|||||||
import LoginDialog from '../components/ui/LoginDialog.vue'
|
import LoginDialog from '../components/ui/LoginDialog.vue'
|
||||||
import {
|
import {
|
||||||
SparklesIcon,
|
SparklesIcon,
|
||||||
ArrowRightOnRectangleIcon
|
ArrowRightOnRectangleIcon,
|
||||||
|
MicrophoneIcon,
|
||||||
} from '@heroicons/vue/24/outline'
|
} from '@heroicons/vue/24/outline'
|
||||||
|
|
||||||
const { locale } = useI18n()
|
const { locale } = useI18n()
|
||||||
@@ -36,6 +37,14 @@ function openCreateModal() {
|
|||||||
router.push({ path: '/my-stories', query: { openCreate: 'true' } })
|
router.push({ path: '/my-stories', query: { openCreate: 'true' } })
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function openVoiceStudio() {
|
||||||
|
if (!userStore.user) {
|
||||||
|
showLoginDialog.value = true
|
||||||
|
return
|
||||||
|
}
|
||||||
|
router.push('/voice-studio')
|
||||||
|
}
|
||||||
|
|
||||||
function scrollToFeatures() {
|
function scrollToFeatures() {
|
||||||
document.getElementById('features')?.scrollIntoView({ behavior: 'smooth' })
|
document.getElementById('features')?.scrollIntoView({ behavior: 'smooth' })
|
||||||
}
|
}
|
||||||
@@ -139,6 +148,10 @@ function scrollToFeatures() {
|
|||||||
<SparklesIcon class="h-5 w-5 mr-2" />
|
<SparklesIcon class="h-5 w-5 mr-2" />
|
||||||
开始创作故事
|
开始创作故事
|
||||||
</BaseButton>
|
</BaseButton>
|
||||||
|
<BaseButton size="lg" variant="secondary" @click="openVoiceStudio">
|
||||||
|
<MicrophoneIcon class="h-5 w-5 mr-2" />
|
||||||
|
进入语音共创
|
||||||
|
</BaseButton>
|
||||||
<button @click="scrollToFeatures" class="px-6 py-3 rounded-xl font-semibold text-stone-600 bg-white border border-stone-200 hover:border-amber-400 hover:text-amber-700 transition-all shadow-sm">
|
<button @click="scrollToFeatures" class="px-6 py-3 rounded-xl font-semibold text-stone-600 bg-white border border-stone-200 hover:border-amber-400 hover:text-amber-700 transition-all shadow-sm">
|
||||||
了解更多功能
|
了解更多功能
|
||||||
</button>
|
</button>
|
||||||
|
|||||||
771
frontend/src/views/VoiceStudio.vue
Normal file
771
frontend/src/views/VoiceStudio.vue
Normal file
@@ -0,0 +1,771 @@
|
|||||||
|
<script setup lang="ts">
|
||||||
|
import { computed, onBeforeUnmount, onMounted, ref, watch } from 'vue'
|
||||||
|
import { useRouter } from 'vue-router'
|
||||||
|
import { api } from '../api/client'
|
||||||
|
import { useUserStore } from '../stores/user'
|
||||||
|
import type {
|
||||||
|
VoiceSessionDetail,
|
||||||
|
VoiceSessionFinalizeResponse,
|
||||||
|
VoiceSessionSummary,
|
||||||
|
VoiceTurnAcceptedResponse,
|
||||||
|
VoiceTurnSummary,
|
||||||
|
VoiceTurnUploadAcceptedResponse,
|
||||||
|
} from '../types/voiceSession'
|
||||||
|
import BaseButton from '../components/ui/BaseButton.vue'
|
||||||
|
import BaseCard from '../components/ui/BaseCard.vue'
|
||||||
|
import BaseSelect from '../components/ui/BaseSelect.vue'
|
||||||
|
import BaseTextarea from '../components/ui/BaseTextarea.vue'
|
||||||
|
import LoadingSpinner from '../components/ui/LoadingSpinner.vue'
|
||||||
|
import EmptyState from '../components/ui/EmptyState.vue'
|
||||||
|
import {
|
||||||
|
ArrowPathIcon,
|
||||||
|
BookOpenIcon,
|
||||||
|
ExclamationCircleIcon,
|
||||||
|
MicrophoneIcon,
|
||||||
|
PaperAirplaneIcon,
|
||||||
|
SparklesIcon,
|
||||||
|
StopIcon,
|
||||||
|
XMarkIcon,
|
||||||
|
} from '@heroicons/vue/24/outline'
|
||||||
|
|
||||||
|
interface ChildProfile {
|
||||||
|
id: string
|
||||||
|
name: string
|
||||||
|
}
|
||||||
|
|
||||||
|
interface StoryUniverse {
|
||||||
|
id: string
|
||||||
|
name: string
|
||||||
|
}
|
||||||
|
|
||||||
|
const router = useRouter()
|
||||||
|
const userStore = useUserStore()
|
||||||
|
|
||||||
|
const sessions = ref<VoiceSessionSummary[]>([])
|
||||||
|
const activeSession = ref<VoiceSessionDetail | null>(null)
|
||||||
|
const profiles = ref<ChildProfile[]>([])
|
||||||
|
const universes = ref<StoryUniverse[]>([])
|
||||||
|
const selectedProfileId = ref('')
|
||||||
|
const selectedUniverseId = ref('')
|
||||||
|
const textTurnInput = ref('')
|
||||||
|
const uploadTranscriptHint = ref('')
|
||||||
|
const loadingSessions = ref(false)
|
||||||
|
const creatingSession = ref(false)
|
||||||
|
const loadingSessionDetail = ref(false)
|
||||||
|
const sendingTurn = ref(false)
|
||||||
|
const finalizing = ref(false)
|
||||||
|
const abandoning = ref(false)
|
||||||
|
const recording = ref(false)
|
||||||
|
const recordingDurationMs = ref(0)
|
||||||
|
const error = ref('')
|
||||||
|
const mediaError = ref('')
|
||||||
|
const recorderSupported = computed(() => typeof window !== 'undefined' && 'MediaRecorder' in window)
|
||||||
|
|
||||||
|
let mediaRecorder: MediaRecorder | null = null
|
||||||
|
let mediaStream: MediaStream | null = null
|
||||||
|
let recordingChunks: Blob[] = []
|
||||||
|
let recordingTimer: number | null = null
|
||||||
|
let recordingStartedAt = 0
|
||||||
|
|
||||||
|
const recordedBlob = ref<Blob | null>(null)
|
||||||
|
const recordedAudioUrl = ref<string | null>(null)
|
||||||
|
|
||||||
|
const profileOptions = computed(() =>
|
||||||
|
profiles.value.map((profile) => ({ value: profile.id, label: profile.name })),
|
||||||
|
)
|
||||||
|
const universeOptions = computed(() =>
|
||||||
|
universes.value.map((universe) => ({ value: universe.id, label: universe.name })),
|
||||||
|
)
|
||||||
|
|
||||||
|
const activeTurnList = computed(() => activeSession.value?.recent_turns ?? [])
|
||||||
|
|
||||||
|
function formatSessionStatus(status: string) {
|
||||||
|
switch (status) {
|
||||||
|
case 'draft':
|
||||||
|
return '待开始'
|
||||||
|
case 'processing_turn':
|
||||||
|
return '处理中'
|
||||||
|
case 'waiting_user':
|
||||||
|
return '等待下一轮'
|
||||||
|
case 'finalizing_story':
|
||||||
|
return '保存中'
|
||||||
|
case 'completed':
|
||||||
|
return '已完成'
|
||||||
|
case 'abandoned':
|
||||||
|
return '已放弃'
|
||||||
|
case 'failed':
|
||||||
|
return '失败'
|
||||||
|
default:
|
||||||
|
return status
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function formatTurnStatus(status: string) {
|
||||||
|
switch (status) {
|
||||||
|
case 'received':
|
||||||
|
return '已接收'
|
||||||
|
case 'transcribing':
|
||||||
|
return '转写中'
|
||||||
|
case 'narrative_ready':
|
||||||
|
return '文本已生成'
|
||||||
|
case 'audio_ready':
|
||||||
|
return '语音已生成'
|
||||||
|
case 'failed':
|
||||||
|
return '失败'
|
||||||
|
default:
|
||||||
|
return status
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function formatIntent(intent: string | null | undefined) {
|
||||||
|
switch (intent) {
|
||||||
|
case 'start_story':
|
||||||
|
return '开启故事'
|
||||||
|
case 'continue_story':
|
||||||
|
return '继续讲述'
|
||||||
|
case 'correct_story':
|
||||||
|
return '修正走向'
|
||||||
|
case 'end_story':
|
||||||
|
return '结束本轮'
|
||||||
|
case 'save_story':
|
||||||
|
return '请求保存'
|
||||||
|
default:
|
||||||
|
return intent || '未知'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function formatDate(dateStr: string) {
|
||||||
|
return new Date(dateStr).toLocaleString('zh-CN', {
|
||||||
|
month: 'short',
|
||||||
|
day: 'numeric',
|
||||||
|
hour: '2-digit',
|
||||||
|
minute: '2-digit',
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
function revokeRecordedAudioUrl() {
|
||||||
|
if (recordedAudioUrl.value) {
|
||||||
|
URL.revokeObjectURL(recordedAudioUrl.value)
|
||||||
|
recordedAudioUrl.value = null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function clearRecordedAudio() {
|
||||||
|
revokeRecordedAudioUrl()
|
||||||
|
recordedBlob.value = null
|
||||||
|
recordingDurationMs.value = 0
|
||||||
|
}
|
||||||
|
|
||||||
|
async function fetchProfiles() {
|
||||||
|
if (!userStore.user) return
|
||||||
|
const data = await api.get<{ profiles: ChildProfile[] }>('/api/profiles')
|
||||||
|
profiles.value = data.profiles
|
||||||
|
if (!selectedProfileId.value && profiles.value.length > 0) {
|
||||||
|
selectedProfileId.value = profiles.value[0].id
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function fetchUniverses(profileId: string) {
|
||||||
|
selectedUniverseId.value = ''
|
||||||
|
if (!profileId) {
|
||||||
|
universes.value = []
|
||||||
|
return
|
||||||
|
}
|
||||||
|
const data = await api.get<{ universes: StoryUniverse[] }>(`/api/profiles/${profileId}/universes`)
|
||||||
|
universes.value = data.universes
|
||||||
|
if (universes.value.length > 0) {
|
||||||
|
selectedUniverseId.value = universes.value[0].id
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function loadSessions() {
|
||||||
|
if (!userStore.user) return
|
||||||
|
loadingSessions.value = true
|
||||||
|
try {
|
||||||
|
sessions.value = await api.get<VoiceSessionSummary[]>('/api/voice-sessions?limit=8')
|
||||||
|
if (!activeSession.value) {
|
||||||
|
const resumable = sessions.value.find((item) => item.can_continue)
|
||||||
|
if (resumable) {
|
||||||
|
await loadSessionDetail(resumable.id)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (err) {
|
||||||
|
error.value = err instanceof Error ? err.message : '会话列表加载失败'
|
||||||
|
} finally {
|
||||||
|
loadingSessions.value = false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function loadSessionDetail(sessionId: string) {
|
||||||
|
loadingSessionDetail.value = true
|
||||||
|
error.value = ''
|
||||||
|
try {
|
||||||
|
activeSession.value = await api.get<VoiceSessionDetail>(`/api/voice-sessions/${sessionId}`)
|
||||||
|
} catch (err) {
|
||||||
|
error.value = err instanceof Error ? err.message : '会话详情加载失败'
|
||||||
|
} finally {
|
||||||
|
loadingSessionDetail.value = false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function createSession() {
|
||||||
|
creatingSession.value = true
|
||||||
|
error.value = ''
|
||||||
|
try {
|
||||||
|
const session = await api.post<VoiceSessionSummary>('/api/voice-sessions', {
|
||||||
|
child_profile_id: selectedProfileId.value || null,
|
||||||
|
universe_id: selectedUniverseId.value || null,
|
||||||
|
target_mode: 'story',
|
||||||
|
})
|
||||||
|
await loadSessions()
|
||||||
|
await loadSessionDetail(session.id)
|
||||||
|
textTurnInput.value = ''
|
||||||
|
uploadTranscriptHint.value = ''
|
||||||
|
clearRecordedAudio()
|
||||||
|
} catch (err) {
|
||||||
|
error.value = err instanceof Error ? err.message : '创建语音共创会话失败'
|
||||||
|
} finally {
|
||||||
|
creatingSession.value = false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function pollTurnResult(sessionId: string, turnId: string) {
|
||||||
|
const terminalStatuses = new Set(['audio_ready', 'narrative_ready', 'failed'])
|
||||||
|
for (let attempt = 0; attempt < 20; attempt += 1) {
|
||||||
|
const turn = await api.get<VoiceTurnSummary>(`/api/voice-sessions/${sessionId}/turns/${turnId}`)
|
||||||
|
if (terminalStatuses.has(turn.status)) {
|
||||||
|
return turn
|
||||||
|
}
|
||||||
|
await new Promise((resolve) => window.setTimeout(resolve, 600))
|
||||||
|
}
|
||||||
|
throw new Error('本轮语音共创处理超时,请刷新后查看最新状态')
|
||||||
|
}
|
||||||
|
|
||||||
|
async function refreshAfterTurn(sessionId: string, turnId: string) {
|
||||||
|
await pollTurnResult(sessionId, turnId)
|
||||||
|
await loadSessionDetail(sessionId)
|
||||||
|
await loadSessions()
|
||||||
|
}
|
||||||
|
|
||||||
|
async function submitTextTurn() {
|
||||||
|
if (!activeSession.value || !textTurnInput.value.trim()) return
|
||||||
|
sendingTurn.value = true
|
||||||
|
error.value = ''
|
||||||
|
try {
|
||||||
|
const result = await api.post<VoiceTurnAcceptedResponse>(
|
||||||
|
`/api/voice-sessions/${activeSession.value.id}/turns/fallback`,
|
||||||
|
{
|
||||||
|
transcript_text: textTurnInput.value.trim(),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
textTurnInput.value = ''
|
||||||
|
await refreshAfterTurn(result.session_id, result.turn_id)
|
||||||
|
} catch (err) {
|
||||||
|
error.value = err instanceof Error ? err.message : '文本共创回合提交失败'
|
||||||
|
} finally {
|
||||||
|
sendingTurn.value = false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function submitRecordedTurn() {
|
||||||
|
if (!activeSession.value || !recordedBlob.value) return
|
||||||
|
sendingTurn.value = true
|
||||||
|
error.value = ''
|
||||||
|
try {
|
||||||
|
const formData = new FormData()
|
||||||
|
formData.append('audio_file', recordedBlob.value, 'voice-turn.webm')
|
||||||
|
if (recordingDurationMs.value > 0) {
|
||||||
|
formData.append('duration_ms', String(recordingDurationMs.value))
|
||||||
|
}
|
||||||
|
if (uploadTranscriptHint.value.trim()) {
|
||||||
|
formData.append('transcript_hint', uploadTranscriptHint.value.trim())
|
||||||
|
}
|
||||||
|
const result = await api.postForm<VoiceTurnUploadAcceptedResponse>(
|
||||||
|
`/api/voice-sessions/${activeSession.value.id}/turns`,
|
||||||
|
formData,
|
||||||
|
)
|
||||||
|
clearRecordedAudio()
|
||||||
|
uploadTranscriptHint.value = ''
|
||||||
|
await refreshAfterTurn(result.session_id, result.turn_id)
|
||||||
|
} catch (err) {
|
||||||
|
error.value = err instanceof Error ? err.message : '录音回合提交失败'
|
||||||
|
} finally {
|
||||||
|
sendingTurn.value = false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function finalizeSession() {
|
||||||
|
if (!activeSession.value) return
|
||||||
|
finalizing.value = true
|
||||||
|
error.value = ''
|
||||||
|
try {
|
||||||
|
const result = await api.post<VoiceSessionFinalizeResponse>(
|
||||||
|
`/api/voice-sessions/${activeSession.value.id}/finalize`,
|
||||||
|
{
|
||||||
|
save_story: true,
|
||||||
|
generate_cover: true,
|
||||||
|
generate_final_audio: false,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
await loadSessions()
|
||||||
|
await loadSessionDetail(activeSession.value.id)
|
||||||
|
if (result.story_id) {
|
||||||
|
router.push(`/story/${result.story_id}`)
|
||||||
|
}
|
||||||
|
} catch (err) {
|
||||||
|
error.value = err instanceof Error ? err.message : '保存语音共创故事失败'
|
||||||
|
} finally {
|
||||||
|
finalizing.value = false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function abandonSession() {
|
||||||
|
if (!activeSession.value) return
|
||||||
|
abandoning.value = true
|
||||||
|
error.value = ''
|
||||||
|
try {
|
||||||
|
const summary = await api.post<VoiceSessionSummary>(
|
||||||
|
`/api/voice-sessions/${activeSession.value.id}/abandon`,
|
||||||
|
{ reason: '用户在语音共创页主动结束会话' },
|
||||||
|
)
|
||||||
|
await loadSessions()
|
||||||
|
activeSession.value = {
|
||||||
|
...(activeSession.value as VoiceSessionDetail),
|
||||||
|
...summary,
|
||||||
|
}
|
||||||
|
} catch (err) {
|
||||||
|
error.value = err instanceof Error ? err.message : '放弃会话失败'
|
||||||
|
} finally {
|
||||||
|
abandoning.value = false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function startRecording() {
|
||||||
|
mediaError.value = ''
|
||||||
|
if (!recorderSupported.value) {
|
||||||
|
mediaError.value = '当前浏览器不支持录音,请先使用文本共创模式。'
|
||||||
|
return
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
clearRecordedAudio()
|
||||||
|
mediaStream = await navigator.mediaDevices.getUserMedia({ audio: true })
|
||||||
|
mediaRecorder = new MediaRecorder(mediaStream)
|
||||||
|
recordingChunks = []
|
||||||
|
mediaRecorder.ondataavailable = (event) => {
|
||||||
|
if (event.data.size > 0) {
|
||||||
|
recordingChunks.push(event.data)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
mediaRecorder.onstop = () => {
|
||||||
|
const blob = new Blob(recordingChunks, { type: mediaRecorder?.mimeType || 'audio/webm' })
|
||||||
|
recordedBlob.value = blob
|
||||||
|
revokeRecordedAudioUrl()
|
||||||
|
recordedAudioUrl.value = URL.createObjectURL(blob)
|
||||||
|
}
|
||||||
|
mediaRecorder.start()
|
||||||
|
recording.value = true
|
||||||
|
recordingStartedAt = Date.now()
|
||||||
|
recordingTimer = window.setInterval(() => {
|
||||||
|
recordingDurationMs.value = Date.now() - recordingStartedAt
|
||||||
|
}, 200)
|
||||||
|
} catch (err) {
|
||||||
|
mediaError.value = err instanceof Error ? err.message : '无法访问麦克风'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function stopRecording() {
|
||||||
|
if (!mediaRecorder || !recording.value) return
|
||||||
|
mediaRecorder.stop()
|
||||||
|
mediaRecorder = null
|
||||||
|
recording.value = false
|
||||||
|
if (recordingTimer) {
|
||||||
|
window.clearInterval(recordingTimer)
|
||||||
|
recordingTimer = null
|
||||||
|
}
|
||||||
|
if (mediaStream) {
|
||||||
|
mediaStream.getTracks().forEach((track) => track.stop())
|
||||||
|
mediaStream = null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function resetRecording() {
|
||||||
|
if (recording.value) {
|
||||||
|
stopRecording()
|
||||||
|
}
|
||||||
|
clearRecordedAudio()
|
||||||
|
}
|
||||||
|
|
||||||
|
watch(selectedProfileId, (newId) => {
|
||||||
|
if (newId) {
|
||||||
|
void fetchUniverses(newId)
|
||||||
|
} else {
|
||||||
|
universes.value = []
|
||||||
|
selectedUniverseId.value = ''
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
onMounted(async () => {
|
||||||
|
if (!userStore.user) {
|
||||||
|
await userStore.fetchSession()
|
||||||
|
}
|
||||||
|
if (!userStore.user) return
|
||||||
|
await fetchProfiles()
|
||||||
|
await loadSessions()
|
||||||
|
})
|
||||||
|
|
||||||
|
onBeforeUnmount(() => {
|
||||||
|
if (recording.value) {
|
||||||
|
stopRecording()
|
||||||
|
}
|
||||||
|
clearRecordedAudio()
|
||||||
|
})
|
||||||
|
</script>
|
||||||
|
|
||||||
|
<template>
|
||||||
|
<div class="max-w-7xl mx-auto space-y-8">
|
||||||
|
<div class="flex flex-col gap-4 lg:flex-row lg:items-center lg:justify-between">
|
||||||
|
<div>
|
||||||
|
<h1 class="text-3xl font-bold gradient-text">语音共创工作台</h1>
|
||||||
|
<p class="mt-2 text-gray-500">
|
||||||
|
第一阶段先跑通回合式共创:孩子说一句,系统接一句,再决定是否保存为正式故事。
|
||||||
|
</p>
|
||||||
|
</div>
|
||||||
|
<div class="flex flex-wrap gap-3">
|
||||||
|
<BaseButton variant="secondary" @click="loadSessions" :loading="loadingSessions">
|
||||||
|
<ArrowPathIcon class="h-5 w-5" />
|
||||||
|
刷新会话
|
||||||
|
</BaseButton>
|
||||||
|
<BaseButton @click="createSession" :loading="creatingSession">
|
||||||
|
<SparklesIcon class="h-5 w-5" />
|
||||||
|
开始新会话
|
||||||
|
</BaseButton>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div v-if="!userStore.user" class="py-12">
|
||||||
|
<EmptyState
|
||||||
|
:icon="BookOpenIcon"
|
||||||
|
title="需要先登录"
|
||||||
|
description="登录后才能使用语音共创工作台。"
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<template v-else>
|
||||||
|
<div class="grid grid-cols-1 gap-6 xl:grid-cols-[320px_minmax(0,1fr)]">
|
||||||
|
<div class="space-y-6">
|
||||||
|
<BaseCard>
|
||||||
|
<div class="space-y-4">
|
||||||
|
<div>
|
||||||
|
<h2 class="text-lg font-semibold text-gray-900">创建条件</h2>
|
||||||
|
<p class="mt-1 text-sm text-gray-500">
|
||||||
|
先选孩子档案与故事宇宙,让共创更容易复用现有角色和世界观。
|
||||||
|
</p>
|
||||||
|
</div>
|
||||||
|
<BaseSelect
|
||||||
|
v-model="selectedProfileId"
|
||||||
|
label="孩子档案"
|
||||||
|
:options="profileOptions"
|
||||||
|
placeholder="请选择孩子档案"
|
||||||
|
/>
|
||||||
|
<BaseSelect
|
||||||
|
v-model="selectedUniverseId"
|
||||||
|
label="故事宇宙"
|
||||||
|
:options="universeOptions"
|
||||||
|
placeholder="可选,默认不绑定宇宙"
|
||||||
|
/>
|
||||||
|
<BaseButton class="w-full" @click="createSession" :loading="creatingSession">
|
||||||
|
创建语音共创会话
|
||||||
|
</BaseButton>
|
||||||
|
</div>
|
||||||
|
</BaseCard>
|
||||||
|
|
||||||
|
<BaseCard>
|
||||||
|
<div class="flex items-center justify-between">
|
||||||
|
<div>
|
||||||
|
<h2 class="text-lg font-semibold text-gray-900">最近会话</h2>
|
||||||
|
<p class="mt-1 text-sm text-gray-500">支持恢复最近还在等待下一轮的 session。</p>
|
||||||
|
</div>
|
||||||
|
<span class="text-xs text-gray-400">{{ sessions.length }} 个</span>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div v-if="loadingSessions" class="py-8">
|
||||||
|
<LoadingSpinner text="加载会话中..." />
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div v-else-if="sessions.length === 0" class="pt-6">
|
||||||
|
<EmptyState
|
||||||
|
:icon="SparklesIcon"
|
||||||
|
title="还没有语音共创会话"
|
||||||
|
description="先创建一个会话,再通过文本或录音开始第一轮故事。"
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div v-else class="mt-4 space-y-3">
|
||||||
|
<button
|
||||||
|
v-for="session in sessions"
|
||||||
|
:key="session.id"
|
||||||
|
type="button"
|
||||||
|
class="w-full rounded-2xl border px-4 py-3 text-left transition-all"
|
||||||
|
:class="activeSession?.id === session.id
|
||||||
|
? 'border-purple-300 bg-purple-50'
|
||||||
|
: 'border-gray-100 bg-white hover:border-gray-300'"
|
||||||
|
@click="loadSessionDetail(session.id)"
|
||||||
|
>
|
||||||
|
<div class="flex items-center justify-between gap-3">
|
||||||
|
<div class="min-w-0">
|
||||||
|
<div class="truncate font-medium text-gray-900">
|
||||||
|
{{ session.working_title || '未命名语音会话' }}
|
||||||
|
</div>
|
||||||
|
<div class="mt-1 text-xs text-gray-500">
|
||||||
|
{{ formatSessionStatus(session.status) }} · {{ session.total_turns }} 轮
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div class="text-right text-xs text-gray-400">
|
||||||
|
{{ formatDate(session.updated_at) }}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
</BaseCard>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="space-y-6">
|
||||||
|
<BaseCard v-if="error" class="border border-rose-100 bg-rose-50 text-rose-600">
|
||||||
|
<div class="flex items-start gap-3">
|
||||||
|
<ExclamationCircleIcon class="mt-0.5 h-5 w-5 shrink-0" />
|
||||||
|
<div>{{ error }}</div>
|
||||||
|
</div>
|
||||||
|
</BaseCard>
|
||||||
|
|
||||||
|
<div v-if="loadingSessionDetail" class="py-16">
|
||||||
|
<LoadingSpinner text="正在加载会话详情..." />
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<BaseCard v-else-if="activeSession">
|
||||||
|
<div class="flex flex-col gap-4 lg:flex-row lg:items-start lg:justify-between">
|
||||||
|
<div>
|
||||||
|
<div class="flex flex-wrap items-center gap-2">
|
||||||
|
<h2 class="text-2xl font-semibold text-gray-900">
|
||||||
|
{{ activeSession.working_title || '语音共创会话' }}
|
||||||
|
</h2>
|
||||||
|
<span class="rounded-full bg-purple-100 px-3 py-1 text-xs font-medium text-purple-700">
|
||||||
|
{{ formatSessionStatus(activeSession.status) }}
|
||||||
|
</span>
|
||||||
|
</div>
|
||||||
|
<p class="mt-2 text-sm text-gray-500">
|
||||||
|
最近意图:{{ formatIntent(activeSession.latest_detected_intent) }} ·
|
||||||
|
已完成 {{ activeSession.total_turns }} 轮
|
||||||
|
</p>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="flex flex-wrap gap-3">
|
||||||
|
<BaseButton
|
||||||
|
variant="secondary"
|
||||||
|
@click="finalizeSession"
|
||||||
|
:loading="finalizing"
|
||||||
|
:disabled="!activeSession.can_finalize"
|
||||||
|
>
|
||||||
|
<BookOpenIcon class="h-5 w-5" />
|
||||||
|
保存为正式故事
|
||||||
|
</BaseButton>
|
||||||
|
<BaseButton
|
||||||
|
variant="ghost"
|
||||||
|
class="text-rose-500 hover:bg-rose-50"
|
||||||
|
@click="abandonSession"
|
||||||
|
:loading="abandoning"
|
||||||
|
:disabled="activeSession.status === 'completed' || activeSession.status === 'abandoned'"
|
||||||
|
>
|
||||||
|
<XMarkIcon class="h-5 w-5" />
|
||||||
|
放弃会话
|
||||||
|
</BaseButton>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="mt-6 grid grid-cols-1 gap-6 xl:grid-cols-[minmax(0,1.2fr)_minmax(0,0.8fr)]">
|
||||||
|
<div class="space-y-6">
|
||||||
|
<div class="rounded-2xl border border-gray-100 bg-white p-4">
|
||||||
|
<div class="flex items-center justify-between">
|
||||||
|
<h3 class="font-semibold text-gray-900">文本共创回合</h3>
|
||||||
|
<span class="text-xs text-gray-400">最稳的 fallback 路径</span>
|
||||||
|
</div>
|
||||||
|
<div class="mt-4 space-y-4">
|
||||||
|
<BaseTextarea
|
||||||
|
v-model="textTurnInput"
|
||||||
|
label="本轮你想让故事怎么发展"
|
||||||
|
placeholder="例如:不要让它害怕,我想让它遇见一个新朋友。"
|
||||||
|
:rows="4"
|
||||||
|
:max-length="1000"
|
||||||
|
:disabled="sendingTurn || !activeSession.can_continue"
|
||||||
|
/>
|
||||||
|
<BaseButton
|
||||||
|
@click="submitTextTurn"
|
||||||
|
:loading="sendingTurn"
|
||||||
|
:disabled="!activeSession.can_continue || !textTurnInput.trim()"
|
||||||
|
>
|
||||||
|
<PaperAirplaneIcon class="h-5 w-5" />
|
||||||
|
发送文本回合
|
||||||
|
</BaseButton>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="rounded-2xl border border-gray-100 bg-white p-4">
|
||||||
|
<div class="flex items-center justify-between">
|
||||||
|
<h3 class="font-semibold text-gray-900">录音共创回合</h3>
|
||||||
|
<span class="text-xs text-gray-400">已支持上传音频 turn</span>
|
||||||
|
</div>
|
||||||
|
<p class="mt-2 text-sm text-gray-500">
|
||||||
|
当前默认是 demo 转写模式。若本地未接真实 ASR,可在下方填写转写提示辅助开发验证。
|
||||||
|
</p>
|
||||||
|
|
||||||
|
<div class="mt-4 flex flex-wrap gap-3">
|
||||||
|
<BaseButton
|
||||||
|
v-if="!recording"
|
||||||
|
variant="secondary"
|
||||||
|
@click="startRecording"
|
||||||
|
:disabled="sendingTurn || !activeSession.can_continue"
|
||||||
|
>
|
||||||
|
<MicrophoneIcon class="h-5 w-5" />
|
||||||
|
开始录音
|
||||||
|
</BaseButton>
|
||||||
|
<BaseButton
|
||||||
|
v-else
|
||||||
|
variant="danger"
|
||||||
|
@click="stopRecording"
|
||||||
|
>
|
||||||
|
<StopIcon class="h-5 w-5" />
|
||||||
|
停止录音
|
||||||
|
</BaseButton>
|
||||||
|
|
||||||
|
<BaseButton
|
||||||
|
variant="ghost"
|
||||||
|
@click="resetRecording"
|
||||||
|
:disabled="(!recordedBlob && !recording) || sendingTurn"
|
||||||
|
>
|
||||||
|
<XMarkIcon class="h-5 w-5" />
|
||||||
|
清空录音
|
||||||
|
</BaseButton>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<p v-if="recording" class="mt-3 text-sm text-amber-600">
|
||||||
|
正在录音:{{ Math.round(recordingDurationMs / 1000) }}s
|
||||||
|
</p>
|
||||||
|
<p v-if="mediaError" class="mt-3 text-sm text-rose-600">
|
||||||
|
{{ mediaError }}
|
||||||
|
</p>
|
||||||
|
|
||||||
|
<div v-if="recordedAudioUrl" class="mt-4 rounded-2xl border border-gray-100 bg-gray-50 p-4">
|
||||||
|
<div class="text-sm font-medium text-gray-700">录音预览</div>
|
||||||
|
<audio class="mt-3 w-full" :src="recordedAudioUrl" controls></audio>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="mt-4">
|
||||||
|
<BaseTextarea
|
||||||
|
v-model="uploadTranscriptHint"
|
||||||
|
label="开发转写提示(可选)"
|
||||||
|
placeholder="如果当前环境还是 demo 转写模式,可以把你刚才说的话写在这里。"
|
||||||
|
:rows="3"
|
||||||
|
:max-length="1000"
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="mt-4">
|
||||||
|
<BaseButton
|
||||||
|
@click="submitRecordedTurn"
|
||||||
|
:loading="sendingTurn"
|
||||||
|
:disabled="!activeSession.can_continue || !recordedBlob"
|
||||||
|
>
|
||||||
|
<SparklesIcon class="h-5 w-5" />
|
||||||
|
上传录音回合
|
||||||
|
</BaseButton>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="rounded-2xl border border-gray-100 bg-white p-4">
|
||||||
|
<div class="flex items-center justify-between">
|
||||||
|
<h3 class="font-semibold text-gray-900">共创过程</h3>
|
||||||
|
<span class="text-xs text-gray-400">{{ activeTurnList.length }} 条最近 turn</span>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="mt-4 space-y-4">
|
||||||
|
<div
|
||||||
|
v-for="turn in activeTurnList"
|
||||||
|
:key="turn.id"
|
||||||
|
class="rounded-2xl border border-gray-100 bg-gray-50 p-4"
|
||||||
|
>
|
||||||
|
<div class="flex flex-wrap items-center gap-2 text-xs text-gray-400">
|
||||||
|
<span>第 {{ turn.turn_index }} 轮</span>
|
||||||
|
<span>·</span>
|
||||||
|
<span>{{ formatTurnStatus(turn.status) }}</span>
|
||||||
|
<span>·</span>
|
||||||
|
<span>{{ formatIntent(turn.detected_intent) }}</span>
|
||||||
|
<span v-if="turn.transcription_provider">· {{ turn.transcription_provider }}</span>
|
||||||
|
</div>
|
||||||
|
<div class="mt-3 text-sm text-gray-800">
|
||||||
|
<span class="font-medium text-gray-900">孩子:</span>
|
||||||
|
{{ turn.user_transcript || '暂无转写内容' }}
|
||||||
|
</div>
|
||||||
|
<div v-if="turn.user_audio_url" class="mt-3">
|
||||||
|
<audio class="w-full" :src="turn.user_audio_url" controls></audio>
|
||||||
|
</div>
|
||||||
|
<div v-if="turn.assistant_text" class="mt-4 text-sm text-gray-700">
|
||||||
|
<span class="font-medium text-purple-700">织机回应:</span>
|
||||||
|
{{ turn.assistant_text }}
|
||||||
|
</div>
|
||||||
|
<div v-if="turn.assistant_audio_url" class="mt-3">
|
||||||
|
<audio class="w-full" :src="turn.assistant_audio_url" controls></audio>
|
||||||
|
</div>
|
||||||
|
<div v-if="turn.error_message" class="mt-3 text-sm text-rose-600">
|
||||||
|
{{ turn.error_message }}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<EmptyState
|
||||||
|
v-if="activeTurnList.length === 0"
|
||||||
|
:icon="MicrophoneIcon"
|
||||||
|
title="从第一轮开始"
|
||||||
|
description="先发送一句文本或录音,让这个会话拥有第一段故事。"
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="space-y-6">
|
||||||
|
<div class="rounded-2xl border border-gray-100 bg-white p-4">
|
||||||
|
<h3 class="font-semibold text-gray-900">故事状态快照</h3>
|
||||||
|
<pre class="mt-4 overflow-x-auto rounded-xl bg-gray-950 p-4 text-xs leading-6 text-emerald-200">{{ JSON.stringify(activeSession.story_state, null, 2) }}</pre>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="rounded-2xl border border-gray-100 bg-white p-4">
|
||||||
|
<h3 class="font-semibold text-gray-900">最近事件</h3>
|
||||||
|
<div class="mt-4 space-y-3">
|
||||||
|
<div
|
||||||
|
v-for="event in activeSession.events.slice(-10)"
|
||||||
|
:key="event.id"
|
||||||
|
class="rounded-xl border border-gray-100 bg-gray-50 px-3 py-3"
|
||||||
|
>
|
||||||
|
<div class="flex items-center justify-between gap-3 text-xs text-gray-400">
|
||||||
|
<span>{{ event.event_type }}</span>
|
||||||
|
<span>{{ formatDate(event.created_at) }}</span>
|
||||||
|
</div>
|
||||||
|
<div class="mt-1 text-sm font-medium text-gray-800">
|
||||||
|
{{ event.message || event.status }}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</BaseCard>
|
||||||
|
|
||||||
|
<div v-else class="py-16">
|
||||||
|
<EmptyState
|
||||||
|
:icon="SparklesIcon"
|
||||||
|
title="创建或恢复一个语音共创会话"
|
||||||
|
description="左侧可以直接创建新会话,也可以恢复最近仍在等待下一轮的 session。"
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</template>
|
||||||
|
</div>
|
||||||
|
</template>
|
||||||
Reference in New Issue
Block a user