feat: improve voice studio alpha recovery flow

This commit is contained in:
2026-04-19 23:25:41 +08:00
parent 46d6201529
commit 4ecf0c09c0
9 changed files with 657 additions and 14 deletions

View File

@@ -12,6 +12,7 @@ from fastapi import (
) )
from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.ext.asyncio import AsyncSession
from app.core.config import settings
from app.core.deps import require_user from app.core.deps import require_user
from app.core.rate_limiter import check_rate_limit from app.core.rate_limiter import check_rate_limit
from app.db.database import get_db from app.db.database import get_db
@@ -34,11 +35,14 @@ from app.services.voice_session_service import (
create_voice_turn_from_text_service, create_voice_turn_from_text_service,
create_voice_turn_from_upload_service, create_voice_turn_from_upload_service,
finalize_voice_session_service, finalize_voice_session_service,
get_latest_active_voice_session_service,
get_voice_session_detail_service, get_voice_session_detail_service,
get_voice_turn_audio_service, get_voice_turn_audio_service,
get_voice_turn_service, get_voice_turn_service,
get_voice_turn_user_audio_service, get_voice_turn_user_audio_service,
list_voice_sessions_service, list_voice_sessions_service,
retry_voice_turn_audio_service,
retry_voice_turn_service,
) )
router = APIRouter() router = APIRouter()
@@ -68,8 +72,13 @@ async def create_voice_session(
@router.get("/voice-sessions", response_model=list[VoiceSessionSummaryResponse]) @router.get("/voice-sessions", response_model=list[VoiceSessionSummaryResponse])
async def list_voice_sessions( async def list_voice_sessions(
limit: int = Query(default=8, ge=1, le=20), limit: int = Query(
default=settings.voice_session_default_list_limit,
ge=1,
le=settings.voice_session_max_list_limit,
),
active_only: bool = Query(default=False), active_only: bool = Query(default=False),
active_first: bool = Query(default=True),
user: User = Depends(require_user), user: User = Depends(require_user),
db: AsyncSession = Depends(get_db), db: AsyncSession = Depends(get_db),
): ):
@@ -79,9 +88,19 @@ async def list_voice_sessions(
db, db,
limit=limit, limit=limit,
active_only=active_only, active_only=active_only,
active_first=active_first,
) )
@router.get("/voice-sessions/active", response_model=VoiceSessionSummaryResponse | None)
async def get_latest_active_voice_session(
user: User = Depends(require_user),
db: AsyncSession = Depends(get_db),
):
"""Get the latest active voice session for quick resume behavior."""
return await get_latest_active_voice_session_service(user.id, db)
@router.get("/voice-sessions/{session_id}", response_model=VoiceSessionDetailResponse) @router.get("/voice-sessions/{session_id}", response_model=VoiceSessionDetailResponse)
async def get_voice_session( async def get_voice_session(
session_id: str, session_id: str,
@@ -158,6 +177,21 @@ async def get_voice_turn(
return await get_voice_turn_service(session_id, turn_id, user.id, db) return await get_voice_turn_service(session_id, turn_id, user.id, db)
@router.post(
"/voice-sessions/{session_id}/turns/{turn_id}/retry",
response_model=VoiceTurnAcceptedResponse,
status_code=status.HTTP_202_ACCEPTED,
)
async def retry_voice_turn(
session_id: str,
turn_id: str,
user: User = Depends(require_user),
db: AsyncSession = Depends(get_db),
):
"""Retry one failed voice turn using its saved transcript."""
return await retry_voice_turn_service(session_id, turn_id, user.id, db)
@router.get("/voice-sessions/{session_id}/turns/{turn_id}/audio") @router.get("/voice-sessions/{session_id}/turns/{turn_id}/audio")
async def get_voice_turn_audio( async def get_voice_turn_audio(
session_id: str, session_id: str,
@@ -170,6 +204,20 @@ async def get_voice_turn_audio(
return Response(content=audio_bytes, media_type="audio/mpeg") return Response(content=audio_bytes, media_type="audio/mpeg")
@router.post(
"/voice-sessions/{session_id}/turns/{turn_id}/retry-audio",
response_model=VoiceTurnSummaryResponse,
)
async def retry_voice_turn_audio(
session_id: str,
turn_id: str,
user: User = Depends(require_user),
db: AsyncSession = Depends(get_db),
):
"""Retry assistant audio synthesis when one turn only has text output."""
return await retry_voice_turn_audio_service(session_id, turn_id, user.id, db)
@router.get("/voice-sessions/{session_id}/turns/{turn_id}/user-audio") @router.get("/voice-sessions/{session_id}/turns/{turn_id}/user-audio")
async def get_voice_turn_user_audio( async def get_voice_turn_user_audio(
session_id: str, session_id: str,

View File

@@ -82,6 +82,18 @@ class Settings(BaseSettings):
"zh", "zh",
description="Preferred language hint for voice transcription", description="Preferred language hint for voice transcription",
) )
voice_turn_max_upload_bytes: int = Field(
5 * 1024 * 1024,
description="Maximum accepted upload size in bytes for one voice turn audio file",
)
voice_session_default_list_limit: int = Field(
8,
description="Default number of recent voice sessions returned to the client",
)
voice_session_max_list_limit: int = Field(
20,
description="Maximum number of recent voice sessions returned to the client",
)
story_audio_cache_ttl_days: int = Field( story_audio_cache_ttl_days: int = Field(
30, 30,
description="TTL in days before cached story audio is pruned", description="TTL in days before cached story audio is pruned",

View File

@@ -101,6 +101,7 @@ class VoiceSessionSummaryResponse(BaseModel):
latest_detected_intent: str | None = None latest_detected_intent: str | None = None
latest_assistant_audio_ready: bool = False latest_assistant_audio_ready: bool = False
last_turn_status: str | None = None last_turn_status: str | None = None
transcription_mode_hint: str | None = None
can_continue: bool = False can_continue: bool = False
can_finalize: bool = False can_finalize: bool = False
last_error: str | None = None last_error: str | None = None

View File

@@ -6,9 +6,10 @@ from datetime import datetime, timezone
from typing import Any from typing import Any
from fastapi import HTTPException from fastapi import HTTPException
from sqlalchemy import desc, select from sqlalchemy import case, desc, select
from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.ext.asyncio import AsyncSession
from app.core.config import settings
from app.core.logging import get_logger from app.core.logging import get_logger
from app.db.models import VoiceSession, VoiceSessionEvent, VoiceTurn from app.db.models import VoiceSession, VoiceSessionEvent, VoiceTurn
from app.schemas.voice_session_schemas import ( from app.schemas.voice_session_schemas import (
@@ -134,6 +135,7 @@ def _session_to_summary(
session_audio_exists(latest_turn.assistant_audio_path) if latest_turn else False session_audio_exists(latest_turn.assistant_audio_path) if latest_turn else False
), ),
last_turn_status=latest_turn.status if latest_turn else None, last_turn_status=latest_turn.status if latest_turn else None,
transcription_mode_hint=settings.voice_transcription_mode,
can_continue=_session_can_continue(session), can_continue=_session_can_continue(session),
can_finalize=_session_can_finalize(session), can_finalize=_session_can_finalize(session),
last_error=session.last_error, last_error=session.last_error,
@@ -602,17 +604,29 @@ async def list_voice_sessions_service(
user_id: str, user_id: str,
db: AsyncSession, db: AsyncSession,
*, *,
limit: int = 8, limit: int | None = None,
active_only: bool = False, active_only: bool = False,
active_first: bool = False,
) -> list[VoiceSessionSummaryResponse]: ) -> list[VoiceSessionSummaryResponse]:
query = ( resolved_limit = limit or settings.voice_session_default_list_limit
select(VoiceSession) resolved_limit = max(1, min(resolved_limit, settings.voice_session_max_list_limit))
.where(VoiceSession.user_id == user_id) query = select(VoiceSession).where(VoiceSession.user_id == user_id)
.order_by(desc(VoiceSession.updated_at), desc(VoiceSession.created_at))
.limit(limit)
)
if active_only: if active_only:
query = query.where(VoiceSession.status.in_(CONTINUABLE_SESSION_STATUSES)) query = query.where(VoiceSession.status.in_(CONTINUABLE_SESSION_STATUSES))
if active_first:
query = query.order_by(
desc(
case(
(VoiceSession.status.in_(CONTINUABLE_SESSION_STATUSES), 1),
else_=0,
)
),
desc(VoiceSession.updated_at),
desc(VoiceSession.created_at),
)
else:
query = query.order_by(desc(VoiceSession.updated_at), desc(VoiceSession.created_at))
query = query.limit(resolved_limit)
sessions = (await db.execute(query)).scalars().all() sessions = (await db.execute(query)).scalars().all()
summaries: list[VoiceSessionSummaryResponse] = [] summaries: list[VoiceSessionSummaryResponse] = []
@@ -628,6 +642,30 @@ async def list_voice_sessions_service(
return summaries return summaries
async def get_latest_active_voice_session_service(
user_id: str,
db: AsyncSession,
) -> VoiceSessionSummaryResponse | None:
query = (
select(VoiceSession)
.where(
VoiceSession.user_id == user_id,
VoiceSession.status.in_(CONTINUABLE_SESSION_STATUSES),
)
.order_by(desc(VoiceSession.updated_at), desc(VoiceSession.created_at))
.limit(1)
)
session = (await db.execute(query)).scalar_one_or_none()
if session is None:
return None
latest_turn = await _get_latest_turn(db, session_id=session.id)
return _session_to_summary(
session,
latest_turn=latest_turn,
total_turns=session.current_turn_index,
)
async def create_voice_session_service( async def create_voice_session_service(
request: VoiceSessionCreateRequest, request: VoiceSessionCreateRequest,
user_id: str, user_id: str,
@@ -766,6 +804,13 @@ async def create_voice_turn_from_upload_service(
status_code=409, status_code=409,
detail="Voice session is not ready for another turn.", detail="Voice session is not ready for another turn.",
) )
if not audio_bytes:
raise HTTPException(status_code=400, detail="上传音频为空,请重新录音后再试。")
if len(audio_bytes) > settings.voice_turn_max_upload_bytes:
raise HTTPException(
status_code=413,
detail="上传音频过大,请缩短单轮录音时长后再试。",
)
next_turn_index = session.current_turn_index + 1 next_turn_index = session.current_turn_index + 1
user_audio_path = write_uploaded_user_audio( user_audio_path = write_uploaded_user_audio(
session_id=session.id, session_id=session.id,
@@ -805,6 +850,91 @@ async def create_voice_turn_from_upload_service(
) )
async def retry_voice_turn_service(
session_id: str,
turn_id: str,
user_id: str,
db: AsyncSession,
) -> VoiceTurnAcceptedResponse:
turn = await _get_owned_turn(
db,
session_id=session_id,
turn_id=turn_id,
user_id=user_id,
)
if turn.status != "failed":
raise HTTPException(status_code=409, detail="Only failed turns can be retried.")
if not turn.user_transcript:
raise HTTPException(status_code=409, detail="This turn has no transcript to retry.")
return await create_voice_turn_from_text_service(
session_id,
VoiceTurnCreateFallbackRequest(
transcript_text=turn.user_transcript,
duration_ms=turn.user_audio_duration_ms,
),
user_id,
db,
)
async def retry_voice_turn_audio_service(
session_id: str,
turn_id: str,
user_id: str,
db: AsyncSession,
) -> VoiceTurnSummaryResponse:
turn = await _get_owned_turn(
db,
session_id=session_id,
turn_id=turn_id,
user_id=user_id,
)
if not turn.assistant_text:
raise HTTPException(status_code=409, detail="This turn has no assistant text to speak.")
if session_audio_exists(turn.assistant_audio_path):
raise HTTPException(status_code=409, detail="Assistant audio already exists for this turn.")
try:
audio_bytes = await text_to_speech(
turn.assistant_text,
db=db,
user_id=user_id,
)
saved_path = write_session_audio(
build_turn_assistant_audio_path(turn.session_id, turn.turn_index),
audio_bytes,
)
turn.assistant_audio_path = saved_path
turn.assistant_audio_duration_ms = None
if turn.status == "narrative_ready":
turn.status = "audio_ready"
await db.commit()
await db.refresh(turn)
await _record_session_event(
db,
session_id=turn.session_id,
turn_id=turn.id,
event_type="assistant_audio_retry_succeeded",
status="succeeded",
message="Assistant audio regenerated for one voice turn.",
metadata={"audio_path": saved_path},
)
except Exception as exc:
await _record_session_event(
db,
session_id=turn.session_id,
turn_id=turn.id,
event_type="assistant_audio_retry_failed",
status="failed",
message="Assistant audio retry failed.",
metadata={"error": str(exc)},
)
raise HTTPException(status_code=503, detail="语音补发失败,请稍后再试。") from exc
return _turn_to_summary(turn)
async def get_voice_turn_service( async def get_voice_turn_service(
session_id: str, session_id: str,
turn_id: str, turn_id: str,

View File

@@ -2,6 +2,7 @@ from unittest.mock import AsyncMock, patch
from httpx import ASGITransport, AsyncClient from httpx import ASGITransport, AsyncClient
from app.core.config import settings
from app.db.database import get_db from app.db.database import get_db
from app.main import app from app.main import app
from app.services.adapters.text.models import StoryOutput from app.services.adapters.text.models import StoryOutput
@@ -343,3 +344,221 @@ async def test_voice_session_list_orders_recent_sessions_first(
} }
finally: finally:
app.dependency_overrides.clear() app.dependency_overrides.clear()
async def test_voice_session_active_endpoint_returns_latest_active_session(
db_session,
auth_token,
):
async def override_get_db():
yield db_session
app.dependency_overrides[get_db] = override_get_db
with (
patch(
"app.services.voice_session_service.generate_story_content",
new_callable=AsyncMock,
) as mock_generate,
patch(
"app.services.voice_session_service.text_to_speech",
new_callable=AsyncMock,
) as mock_tts,
):
mock_generate.return_value = StoryOutput(
mode="generated",
title="活动会话",
story_text="一段活动中的故事。",
cover_prompt_suggestion="活动会话封面",
)
mock_tts.return_value = b"active-audio"
transport = ASGITransport(app=app)
try:
async with AsyncClient(transport=transport, base_url="http://test") as client:
client.cookies.set("access_token", auth_token)
response = await client.post("/api/voice-sessions", json={})
old_session_id = response.json()["id"]
await client.post(
f"/api/voice-sessions/{old_session_id}/abandon",
json={"reason": "旧会话结束"},
)
response = await client.post("/api/voice-sessions", json={})
active_session_id = response.json()["id"]
await client.post(
f"/api/voice-sessions/{active_session_id}/turns/fallback",
json={"transcript_text": "请继续一个新故事"},
)
response = await client.get("/api/voice-sessions/active")
assert response.status_code == 200
data = response.json()
assert data["id"] == active_session_id
assert data["can_continue"] is True
assert data["status"] == "waiting_user"
finally:
app.dependency_overrides.clear()
async def test_voice_session_can_retry_failed_turn_from_saved_transcript(
db_session,
auth_token,
):
async def override_get_db():
yield db_session
app.dependency_overrides[get_db] = override_get_db
with (
patch(
"app.services.voice_session_service.generate_story_content",
new_callable=AsyncMock,
) as mock_generate,
patch(
"app.services.voice_session_service.text_to_speech",
new_callable=AsyncMock,
) as mock_tts,
):
mock_generate.side_effect = [
RuntimeError("provider down"),
StoryOutput(
mode="generated",
title="重试成功",
story_text="重试后的故事终于顺利继续了。",
cover_prompt_suggestion="重试封面",
),
]
mock_tts.return_value = b"retry-turn-audio"
transport = ASGITransport(app=app)
try:
async with AsyncClient(transport=transport, base_url="http://test") as client:
client.cookies.set("access_token", auth_token)
response = await client.post("/api/voice-sessions", json={})
session_id = response.json()["id"]
response = await client.post(
f"/api/voice-sessions/{session_id}/turns/fallback",
json={"transcript_text": "先讲一个会失败的回合"},
)
assert response.status_code == 202
failed_turn_id = response.json()["turn_id"]
response = await client.get(
f"/api/voice-sessions/{session_id}/turns/{failed_turn_id}"
)
assert response.status_code == 200
assert response.json()["status"] == "failed"
response = await client.post(
f"/api/voice-sessions/{session_id}/turns/{failed_turn_id}/retry"
)
assert response.status_code == 202
retried_turn_id = response.json()["turn_id"]
assert retried_turn_id != failed_turn_id
response = await client.get(
f"/api/voice-sessions/{session_id}/turns/{retried_turn_id}"
)
assert response.status_code == 200
retried_turn = response.json()
assert retried_turn["status"] == "audio_ready"
assert retried_turn["assistant_text"] == "重试后的故事终于顺利继续了。"
finally:
app.dependency_overrides.clear()
async def test_voice_session_can_retry_missing_assistant_audio(
db_session,
auth_token,
):
async def override_get_db():
yield db_session
app.dependency_overrides[get_db] = override_get_db
with (
patch(
"app.services.voice_session_service.generate_story_content",
new_callable=AsyncMock,
) as mock_generate,
patch(
"app.services.voice_session_service.text_to_speech",
new_callable=AsyncMock,
) as mock_tts,
):
mock_generate.return_value = StoryOutput(
mode="generated",
title="补发语音",
story_text="这一轮先有文本,稍后再补发语音。",
cover_prompt_suggestion="补发语音封面",
)
mock_tts.side_effect = [RuntimeError("tts down"), b"recovered-audio"]
transport = ASGITransport(app=app)
try:
async with AsyncClient(transport=transport, base_url="http://test") as client:
client.cookies.set("access_token", auth_token)
response = await client.post("/api/voice-sessions", json={})
session_id = response.json()["id"]
response = await client.post(
f"/api/voice-sessions/{session_id}/turns/fallback",
json={"transcript_text": "请先给我一段只有文本的结果"},
)
assert response.status_code == 202
turn_id = response.json()["turn_id"]
response = await client.get(
f"/api/voice-sessions/{session_id}/turns/{turn_id}"
)
assert response.status_code == 200
turn = response.json()
assert turn["status"] == "narrative_ready"
assert turn["assistant_audio_ready"] is False
response = await client.post(
f"/api/voice-sessions/{session_id}/turns/{turn_id}/retry-audio"
)
assert response.status_code == 200
retried = response.json()
assert retried["status"] == "audio_ready"
assert retried["assistant_audio_ready"] is True
finally:
app.dependency_overrides.clear()
async def test_voice_session_uploaded_audio_respects_size_limit(
db_session,
auth_token,
monkeypatch,
):
async def override_get_db():
yield db_session
app.dependency_overrides[get_db] = override_get_db
monkeypatch.setattr(settings, "voice_turn_max_upload_bytes", 4)
transport = ASGITransport(app=app)
try:
async with AsyncClient(transport=transport, base_url="http://test") as client:
client.cookies.set("access_token", auth_token)
response = await client.post("/api/voice-sessions", json={})
assert response.status_code == 201
session_id = response.json()["id"]
response = await client.post(
f"/api/voice-sessions/{session_id}/turns",
files={
"audio_file": ("turn.webm", b"12345", "audio/webm"),
},
data={"transcript_hint": "太长了"},
)
assert response.status_code == 413
finally:
app.dependency_overrides.clear()

View File

@@ -45,6 +45,7 @@ export interface VoiceSessionSummary {
latest_detected_intent: string | null latest_detected_intent: string | null
latest_assistant_audio_ready: boolean latest_assistant_audio_ready: boolean
last_turn_status: string | null last_turn_status: string | null
transcription_mode_hint: string | null
can_continue: boolean can_continue: boolean
can_finalize: boolean can_finalize: boolean
last_error: string | null last_error: string | null

View File

@@ -1,8 +1,10 @@
<script setup lang="ts"> <script setup lang="ts">
import { ref } from 'vue' import { onMounted, ref, watch } from 'vue'
import { useRouter } from 'vue-router' import { useRouter } from 'vue-router'
import { useI18n } from 'vue-i18n' import { useI18n } from 'vue-i18n'
import { useUserStore } from '../stores/user' import { useUserStore } from '../stores/user'
import { api } from '../api/client'
import type { VoiceSessionSummary } from '../types/voiceSession'
import BaseButton from '../components/ui/BaseButton.vue' import BaseButton from '../components/ui/BaseButton.vue'
import LoginDialog from '../components/ui/LoginDialog.vue' import LoginDialog from '../components/ui/LoginDialog.vue'
import { import {
@@ -25,6 +27,7 @@ function switchLocale(lang: 'en' | 'zh') {
// ========== 登录对话框状态 ========== // ========== 登录对话框状态 ==========
const showLoginDialog = ref(false) const showLoginDialog = ref(false)
const activeVoiceSession = ref<VoiceSessionSummary | null>(null)
// ========== 创作入口 ========== // ========== 创作入口 ==========
// 旧的创作变量已移除,现在只负责跳转 // 旧的创作变量已移除,现在只负责跳转
@@ -45,6 +48,26 @@ function openVoiceStudio() {
router.push('/voice-studio') router.push('/voice-studio')
} }
function continueVoiceStudio() {
if (!activeVoiceSession.value) {
openVoiceStudio()
return
}
router.push('/voice-studio')
}
async function loadActiveVoiceSession() {
if (!userStore.user) {
activeVoiceSession.value = null
return
}
try {
activeVoiceSession.value = await api.get<VoiceSessionSummary | null>('/api/voice-sessions/active')
} catch {
activeVoiceSession.value = null
}
}
function scrollToFeatures() { function scrollToFeatures() {
document.getElementById('features')?.scrollIntoView({ behavior: 'smooth' }) document.getElementById('features')?.scrollIntoView({ behavior: 'smooth' })
} }
@@ -54,6 +77,20 @@ function scrollToFeatures() {
// const familiesCount = 5000 // const familiesCount = 5000
// const satisfactionCount = 99 // const satisfactionCount = 99
onMounted(async () => {
if (!userStore.user) {
await userStore.fetchSession()
}
await loadActiveVoiceSession()
})
watch(
() => userStore.user?.id,
() => {
void loadActiveVoiceSession()
},
)
</script> </script>
<template> <template>
@@ -152,6 +189,15 @@ function scrollToFeatures() {
<MicrophoneIcon class="h-5 w-5 mr-2" /> <MicrophoneIcon class="h-5 w-5 mr-2" />
进入语音共创 进入语音共创
</BaseButton> </BaseButton>
<BaseButton
v-if="activeVoiceSession"
size="lg"
variant="ghost"
@click="continueVoiceStudio"
>
<ArrowRightOnRectangleIcon class="h-5 w-5 mr-2" />
继续语音共创
</BaseButton>
<button @click="scrollToFeatures" class="px-6 py-3 rounded-xl font-semibold text-stone-600 bg-white border border-stone-200 hover:border-amber-400 hover:text-amber-700 transition-all shadow-sm"> <button @click="scrollToFeatures" class="px-6 py-3 rounded-xl font-semibold text-stone-600 bg-white border border-stone-200 hover:border-amber-400 hover:text-amber-700 transition-all shadow-sm">
了解更多功能 了解更多功能
</button> </button>

View File

@@ -8,6 +8,7 @@ import BaseCard from '../components/ui/BaseCard.vue'
import EmptyState from '../components/ui/EmptyState.vue' import EmptyState from '../components/ui/EmptyState.vue'
import LoadingSpinner from '../components/ui/LoadingSpinner.vue' import LoadingSpinner from '../components/ui/LoadingSpinner.vue'
import type { GenerationOpsSummary, GenerationProviderAnalytics } from '../types/generation' import type { GenerationOpsSummary, GenerationProviderAnalytics } from '../types/generation'
import type { VoiceSessionSummary } from '../types/voiceSession'
import { import {
getAssetStatusMeta, getAssetStatusMeta,
getGenerationStatusMeta, getGenerationStatusMeta,
@@ -40,6 +41,7 @@ const router = useRouter()
const stories = ref<StoryItem[]>([]) const stories = ref<StoryItem[]>([])
const providerAnalytics = ref<GenerationProviderAnalytics | null>(null) const providerAnalytics = ref<GenerationProviderAnalytics | null>(null)
const opsSummary = ref<GenerationOpsSummary | null>(null) const opsSummary = ref<GenerationOpsSummary | null>(null)
const activeVoiceSession = ref<VoiceSessionSummary | null>(null)
const loading = ref(true) const loading = ref(true)
const error = ref('') const error = ref('')
const showCreateModal = ref(false) const showCreateModal = ref(false)
@@ -76,14 +78,16 @@ function buildProviderAnalyticsPath() {
async function fetchStories() { async function fetchStories() {
try { try {
const [storyList, analytics, ops] = await Promise.all([ const [storyList, analytics, ops, activeSession] = await Promise.all([
api.get<StoryItem[]>('/api/stories'), api.get<StoryItem[]>('/api/stories'),
api.get<GenerationProviderAnalytics>(buildProviderAnalyticsPath()), api.get<GenerationProviderAnalytics>(buildProviderAnalyticsPath()),
api.get<GenerationOpsSummary>('/api/generations/ops-summary'), api.get<GenerationOpsSummary>('/api/generations/ops-summary'),
api.get<VoiceSessionSummary | null>('/api/voice-sessions/active').catch(() => null),
]) ])
stories.value = storyList stories.value = storyList
providerAnalytics.value = analytics providerAnalytics.value = analytics
opsSummary.value = ops opsSummary.value = ops
activeVoiceSession.value = activeSession
} catch (e) { } catch (e) {
error.value = e instanceof Error ? e.message : '加载失败' error.value = e instanceof Error ? e.message : '加载失败'
} finally { } finally {
@@ -112,6 +116,10 @@ function goToCreate() {
showCreateModal.value = true showCreateModal.value = true
} }
function goToVoiceStudio() {
router.push('/voice-studio')
}
function getStoryLink(story: StoryItem) { function getStoryLink(story: StoryItem) {
return story.mode === 'storybook' ? `/storybook/view/${story.id}` : `/story/${story.id}` return story.mode === 'storybook' ? `/storybook/view/${story.id}` : `/story/${story.id}`
} }
@@ -200,6 +208,27 @@ watch([selectedWindow, selectedCapability], () => {
</div> </div>
<template v-else> <template v-else>
<BaseCard
v-if="activeVoiceSession"
class="mb-8 border border-purple-100 bg-purple-50/60"
padding="lg"
>
<div class="flex flex-col gap-4 lg:flex-row lg:items-center lg:justify-between">
<div>
<h2 class="text-xl font-bold text-gray-800">语音共创正在进行中</h2>
<p class="mt-2 text-sm leading-6 text-gray-600">
最近的语音共创会话仍可继续
{{ activeVoiceSession.working_title || '未命名语音会话' }}
当前状态 {{ activeVoiceSession.status }}已完成 {{ activeVoiceSession.total_turns }}
</p>
</div>
<BaseButton @click="goToVoiceStudio">
<SparklesIcon class="h-5 w-5 mr-2" />
继续语音共创
</BaseButton>
</div>
</BaseCard>
<BaseCard class="mb-8" padding="lg"> <BaseCard class="mb-8" padding="lg">
<div class="grid grid-cols-2 lg:grid-cols-4 gap-4"> <div class="grid grid-cols-2 lg:grid-cols-4 gap-4">
<div class="text-center px-4 py-2"> <div class="text-center px-4 py-2">

View File

@@ -47,6 +47,7 @@ const profiles = ref<ChildProfile[]>([])
const universes = ref<StoryUniverse[]>([]) const universes = ref<StoryUniverse[]>([])
const selectedProfileId = ref('') const selectedProfileId = ref('')
const selectedUniverseId = ref('') const selectedUniverseId = ref('')
const sessionFilter = ref<'active' | 'recent'>('active')
const textTurnInput = ref('') const textTurnInput = ref('')
const uploadTranscriptHint = ref('') const uploadTranscriptHint = ref('')
const loadingSessions = ref(false) const loadingSessions = ref(false)
@@ -60,12 +61,14 @@ const recordingDurationMs = ref(0)
const error = ref('') const error = ref('')
const mediaError = ref('') const mediaError = ref('')
const recorderSupported = computed(() => typeof window !== 'undefined' && 'MediaRecorder' in window) const recorderSupported = computed(() => typeof window !== 'undefined' && 'MediaRecorder' in window)
const sessionPollIntervalMs = 1500
let mediaRecorder: MediaRecorder | null = null let mediaRecorder: MediaRecorder | null = null
let mediaStream: MediaStream | null = null let mediaStream: MediaStream | null = null
let recordingChunks: Blob[] = [] let recordingChunks: Blob[] = []
let recordingTimer: number | null = null let recordingTimer: number | null = null
let recordingStartedAt = 0 let recordingStartedAt = 0
let sessionPollTimer: number | null = null
const recordedBlob = ref<Blob | null>(null) const recordedBlob = ref<Blob | null>(null)
const recordedAudioUrl = ref<string | null>(null) const recordedAudioUrl = ref<string | null>(null)
@@ -78,6 +81,22 @@ const universeOptions = computed(() =>
) )
const activeTurnList = computed(() => activeSession.value?.recent_turns ?? []) const activeTurnList = computed(() => activeSession.value?.recent_turns ?? [])
const transcriptionModeDescription = computed(() => {
switch (activeSession.value?.transcription_mode_hint) {
case 'openai':
return '当前会尝试使用 OpenAI 语音转写,转写提示可以留空。'
case 'disabled':
return '当前环境禁用了真实语音转写,请先使用文本共创或填写开发转写提示。'
default:
return '当前默认是 demo 转写模式。若本地未接真实 ASR可在下方填写转写提示辅助开发验证。'
}
})
const isSessionProcessing = computed(
() =>
activeSession.value?.status === 'processing_turn'
|| activeSession.value?.last_turn_status === 'received'
|| activeSession.value?.last_turn_status === 'transcribing',
)
function formatSessionStatus(status: string) { function formatSessionStatus(status: string) {
switch (status) { switch (status) {
@@ -182,8 +201,13 @@ async function loadSessions() {
if (!userStore.user) return if (!userStore.user) return
loadingSessions.value = true loadingSessions.value = true
try { try {
sessions.value = await api.get<VoiceSessionSummary[]>('/api/voice-sessions?limit=8') const params = new URLSearchParams({
if (!activeSession.value) { limit: '8',
active_first: 'true',
active_only: sessionFilter.value === 'active' ? 'true' : 'false',
})
sessions.value = await api.get<VoiceSessionSummary[]>(`/api/voice-sessions?${params.toString()}`)
if (!activeSession.value && sessionFilter.value === 'recent') {
const resumable = sessions.value.find((item) => item.can_continue) const resumable = sessions.value.find((item) => item.can_continue)
if (resumable) { if (resumable) {
await loadSessionDetail(resumable.id) await loadSessionDetail(resumable.id)
@@ -196,6 +220,18 @@ async function loadSessions() {
} }
} }
async function loadLatestActiveSession() {
if (!userStore.user) return
try {
const session = await api.get<VoiceSessionSummary | null>('/api/voice-sessions/active')
if (session) {
await loadSessionDetail(session.id)
}
} catch {
// Ignore active-session bootstrap failures and fall back to normal listing.
}
}
async function loadSessionDetail(sessionId: string) { async function loadSessionDetail(sessionId: string) {
loadingSessionDetail.value = true loadingSessionDetail.value = true
error.value = '' error.value = ''
@@ -208,6 +244,23 @@ async function loadSessionDetail(sessionId: string) {
} }
} }
function stopSessionPolling() {
if (sessionPollTimer) {
window.clearInterval(sessionPollTimer)
sessionPollTimer = null
}
}
function startSessionPolling() {
if (!activeSession.value?.id || sessionPollTimer) return
sessionPollTimer = window.setInterval(() => {
if (activeSession.value?.id) {
void loadSessionDetail(activeSession.value.id)
void loadSessions()
}
}, sessionPollIntervalMs)
}
async function createSession() { async function createSession() {
creatingSession.value = true creatingSession.value = true
error.value = '' error.value = ''
@@ -217,6 +270,7 @@ async function createSession() {
universe_id: selectedUniverseId.value || null, universe_id: selectedUniverseId.value || null,
target_mode: 'story', target_mode: 'story',
}) })
sessionFilter.value = 'active'
await loadSessions() await loadSessions()
await loadSessionDetail(session.id) await loadSessionDetail(session.id)
textTurnInput.value = '' textTurnInput.value = ''
@@ -319,6 +373,39 @@ async function finalizeSession() {
} }
} }
async function retryFailedTurn(turnId: string) {
if (!activeSession.value) return
sendingTurn.value = true
error.value = ''
try {
const result = await api.post<VoiceTurnAcceptedResponse>(
`/api/voice-sessions/${activeSession.value.id}/turns/${turnId}/retry`,
)
await refreshAfterTurn(result.session_id, result.turn_id)
} catch (err) {
error.value = err instanceof Error ? err.message : '重试本轮故事失败'
} finally {
sendingTurn.value = false
}
}
async function retryAssistantAudio(turnId: string) {
if (!activeSession.value) return
sendingTurn.value = true
error.value = ''
try {
await api.post<VoiceTurnSummary>(
`/api/voice-sessions/${activeSession.value.id}/turns/${turnId}/retry-audio`,
)
await loadSessionDetail(activeSession.value.id)
await loadSessions()
} catch (err) {
error.value = err instanceof Error ? err.message : '补发语音失败'
} finally {
sendingTurn.value = false
}
}
async function abandonSession() { async function abandonSession() {
if (!activeSession.value) return if (!activeSession.value) return
abandoning.value = true abandoning.value = true
@@ -404,16 +491,34 @@ watch(selectedProfileId, (newId) => {
} }
}) })
watch(sessionFilter, () => {
void loadSessions()
})
watch(
() => isSessionProcessing.value,
(processing) => {
if (processing) {
startSessionPolling()
} else {
stopSessionPolling()
}
},
{ immediate: true },
)
onMounted(async () => { onMounted(async () => {
if (!userStore.user) { if (!userStore.user) {
await userStore.fetchSession() await userStore.fetchSession()
} }
if (!userStore.user) return if (!userStore.user) return
await fetchProfiles() await fetchProfiles()
await loadLatestActiveSession()
await loadSessions() await loadSessions()
}) })
onBeforeUnmount(() => { onBeforeUnmount(() => {
stopSessionPolling()
if (recording.value) { if (recording.value) {
stopRecording() stopRecording()
} }
@@ -488,6 +593,29 @@ onBeforeUnmount(() => {
<span class="text-xs text-gray-400">{{ sessions.length }} </span> <span class="text-xs text-gray-400">{{ sessions.length }} </span>
</div> </div>
<div class="mt-4 flex gap-2">
<button
type="button"
class="rounded-lg border px-3 py-1.5 text-sm transition-colors"
:class="sessionFilter === 'active'
? 'border-purple-600 bg-purple-600 text-white'
: 'border-gray-200 bg-white text-gray-600 hover:border-gray-400'"
@click="sessionFilter = 'active'"
>
活跃会话
</button>
<button
type="button"
class="rounded-lg border px-3 py-1.5 text-sm transition-colors"
:class="sessionFilter === 'recent'
? 'border-purple-600 bg-purple-600 text-white'
: 'border-gray-200 bg-white text-gray-600 hover:border-gray-400'"
@click="sessionFilter = 'recent'"
>
最近全部
</button>
</div>
<div v-if="loadingSessions" class="py-8"> <div v-if="loadingSessions" class="py-8">
<LoadingSpinner text="加载会话中..." /> <LoadingSpinner text="加载会话中..." />
</div> </div>
@@ -558,6 +686,13 @@ onBeforeUnmount(() => {
</p> </p>
</div> </div>
<div
v-if="isSessionProcessing"
class="rounded-2xl border border-amber-100 bg-amber-50 px-4 py-3 text-sm text-amber-700"
>
当前会话仍在处理中页面会自动轮询刷新
</div>
<div class="flex flex-wrap gap-3"> <div class="flex flex-wrap gap-3">
<BaseButton <BaseButton
variant="secondary" variant="secondary"
@@ -614,7 +749,7 @@ onBeforeUnmount(() => {
<span class="text-xs text-gray-400">已支持上传音频 turn</span> <span class="text-xs text-gray-400">已支持上传音频 turn</span>
</div> </div>
<p class="mt-2 text-sm text-gray-500"> <p class="mt-2 text-sm text-gray-500">
当前默认是 demo 转写模式若本地未接真实 ASR可在下方填写转写提示辅助开发验证 {{ transcriptionModeDescription }}
</p> </p>
<div class="mt-4 flex flex-wrap gap-3"> <div class="mt-4 flex flex-wrap gap-3">
@@ -714,6 +849,28 @@ onBeforeUnmount(() => {
<div v-if="turn.assistant_audio_url" class="mt-3"> <div v-if="turn.assistant_audio_url" class="mt-3">
<audio class="w-full" :src="turn.assistant_audio_url" controls></audio> <audio class="w-full" :src="turn.assistant_audio_url" controls></audio>
</div> </div>
<div class="mt-4 flex flex-wrap gap-3">
<BaseButton
v-if="turn.status === 'failed'"
size="sm"
variant="secondary"
@click="retryFailedTurn(turn.id)"
:disabled="sendingTurn || !activeSession?.can_continue"
>
<ArrowPathIcon class="h-4 w-4" />
重试本轮
</BaseButton>
<BaseButton
v-if="turn.assistant_text && !turn.assistant_audio_ready"
size="sm"
variant="ghost"
@click="retryAssistantAudio(turn.id)"
:disabled="sendingTurn"
>
<ArrowPathIcon class="h-4 w-4" />
补发语音
</BaseButton>
</div>
<div v-if="turn.error_message" class="mt-3 text-sm text-rose-600"> <div v-if="turn.error_message" class="mt-3 text-sm text-rose-600">
{{ turn.error_message }} {{ turn.error_message }}
</div> </div>