From 9f74a9327468e68cdfddd825878967036bb1ca84 Mon Sep 17 00:00:00 2001 From: torin Date: Tue, 21 Apr 2026 14:19:51 +0800 Subject: [PATCH] feat: refine voice studio attention workflow --- backend/app/api/voice_sessions.py | 8 + backend/app/schemas/voice_session_schemas.py | 5 + backend/app/services/voice_session_service.py | 98 ++- backend/tests/test_voice_sessions.py | 143 ++++ frontend/src/types/voiceSession.ts | 5 + frontend/src/views/MyStories.vue | 127 +++- frontend/src/views/VoiceStudio.vue | 687 +++++++++++++++++- 7 files changed, 1025 insertions(+), 48 deletions(-) diff --git a/backend/app/api/voice_sessions.py b/backend/app/api/voice_sessions.py index 561c359..ae29839 100644 --- a/backend/app/api/voice_sessions.py +++ b/backend/app/api/voice_sessions.py @@ -1,5 +1,7 @@ """Voice co-creation session APIs.""" +from typing import Literal + from fastapi import ( APIRouter, Depends, @@ -82,6 +84,10 @@ async def list_voice_sessions( le=settings.voice_session_max_list_limit, ), active_only: bool = Query(default=False), + needs_attention: bool = Query(default=False), + attention_reason: ( + Literal["pending_confirmation", "safety_intervention", "failed_turn"] | None + ) = Query(default=None), active_first: bool = Query(default=True), user: User = Depends(require_user), db: AsyncSession = Depends(get_db), @@ -92,6 +98,8 @@ async def list_voice_sessions( db, limit=limit, active_only=active_only, + needs_attention=needs_attention, + attention_reason=attention_reason, active_first=active_first, ) diff --git a/backend/app/schemas/voice_session_schemas.py b/backend/app/schemas/voice_session_schemas.py index 28f0ba5..110a657 100644 --- a/backend/app/schemas/voice_session_schemas.py +++ b/backend/app/schemas/voice_session_schemas.py @@ -121,6 +121,7 @@ class VoiceSessionSummaryResponse(BaseModel): latest_safety_message: str | None = None latest_assistant_audio_ready: bool = False last_turn_status: str | None = None + attention_reasons: list[str] = Field(default_factory=list) transcription_mode_hint: str | None = None can_continue: bool = False can_finalize: bool = False @@ -149,6 +150,10 @@ class VoiceSessionAnalyticsResponse(BaseModel): window_days: int | None = None total_sessions: int = 0 + attention_sessions: int = 0 + confirmation_attention_sessions: int = 0 + safety_attention_sessions: int = 0 + failed_attention_sessions: int = 0 active_sessions: int = 0 finalized_sessions: int = 0 abandoned_sessions: int = 0 diff --git a/backend/app/services/voice_session_service.py b/backend/app/services/voice_session_service.py index 2b879d4..b047387 100644 --- a/backend/app/services/voice_session_service.py +++ b/backend/app/services/voice_session_service.py @@ -388,6 +388,12 @@ def _session_to_summary( story_patch=latest_turn.story_patch or {}, ) latest_safety_state = _resolve_turn_safety_state(latest_turn.story_patch or {}) + attention_reasons = _build_session_attention_reasons( + latest_requires_confirmation=latest_confirmation_state["requires_confirmation"], + latest_safety_flags=latest_safety_state["safety_flags"], + last_turn_status=latest_turn.status if latest_turn else None, + last_error=session.last_error, + ) return VoiceSessionSummaryResponse( id=session.id, @@ -413,12 +419,55 @@ def _session_to_summary( session_audio_exists(latest_turn.assistant_audio_path) if latest_turn else False ), last_turn_status=latest_turn.status if latest_turn else None, + attention_reasons=attention_reasons, transcription_mode_hint=settings.voice_transcription_mode, can_continue=_session_can_continue(session), can_finalize=_can_finalize_with_latest_turn(session, latest_turn), last_error=session.last_error, created_at=session.created_at, updated_at=session.updated_at, +) + + +def _build_session_attention_reasons( + *, + latest_requires_confirmation: bool, + latest_safety_flags: list[str] | None, + last_turn_status: str | None, + last_error: str | None, +) -> list[str]: + reasons: list[str] = [] + if latest_requires_confirmation: + reasons.append("pending_confirmation") + if latest_safety_flags: + reasons.append("safety_intervention") + if last_turn_status == "failed" or last_error: + reasons.append("failed_turn") + return reasons + + +def _session_summary_needs_attention(summary: VoiceSessionSummaryResponse) -> bool: + return bool(summary.attention_reasons) + + +def _session_summary_matches_attention_reason( + summary: VoiceSessionSummaryResponse, + attention_reason: str | None, +) -> bool: + if attention_reason is None: + return True + return attention_reason in summary.attention_reasons + + +async def _build_session_summary( + db: AsyncSession, + session: VoiceSession, +) -> VoiceSessionSummaryResponse: + latest_turn = await _get_latest_turn(db, session_id=session.id) + return _session_to_summary( + session, + latest_turn=latest_turn, + total_turns=session.current_turn_index, ) @@ -1082,6 +1131,8 @@ async def list_voice_sessions_service( *, limit: int | None = None, active_only: bool = False, + needs_attention: bool = False, + attention_reason: str | None = None, active_first: bool = False, ) -> list[VoiceSessionSummaryResponse]: resolved_limit = limit or settings.voice_session_default_list_limit @@ -1102,19 +1153,20 @@ async def list_voice_sessions_service( ) else: query = query.order_by(desc(VoiceSession.updated_at), desc(VoiceSession.created_at)) - query = query.limit(resolved_limit) + if not needs_attention and attention_reason is None: + query = query.limit(resolved_limit) sessions = (await db.execute(query)).scalars().all() summaries: list[VoiceSessionSummaryResponse] = [] for session in sessions: - latest_turn = await _get_latest_turn(db, session_id=session.id) - summaries.append( - _session_to_summary( - session, - latest_turn=latest_turn, - total_turns=session.current_turn_index, - ) - ) + summary = await _build_session_summary(db, session) + if needs_attention and not _session_summary_needs_attention(summary): + continue + if not _session_summary_matches_attention_reason(summary, attention_reason): + continue + summaries.append(summary) + if (needs_attention or attention_reason is not None) and len(summaries) >= resolved_limit: + break return summaries @@ -1134,12 +1186,7 @@ async def get_latest_active_voice_session_service( session = (await db.execute(query)).scalar_one_or_none() if session is None: return None - latest_turn = await _get_latest_turn(db, session_id=session.id) - return _session_to_summary( - session, - latest_turn=latest_turn, - total_turns=session.current_turn_index, - ) + return await _build_session_summary(db, session) async def get_voice_session_analytics_service( @@ -1172,8 +1219,25 @@ async def get_voice_session_analytics_service( sessions = (await db.execute(session_query)).scalars().all() turns = (await db.execute(turn_query)).scalars().all() events = (await db.execute(event_query)).scalars().all() + session_summaries = [await _build_session_summary(db, session) for session in sessions] total_sessions = len(sessions) + attention_sessions = sum( + 1 for summary in session_summaries if _session_summary_needs_attention(summary) + ) + confirmation_attention_sessions = sum( + 1 + for summary in session_summaries + if "pending_confirmation" in summary.attention_reasons + ) + safety_attention_sessions = sum( + 1 + for summary in session_summaries + if "safety_intervention" in summary.attention_reasons + ) + failed_attention_sessions = sum( + 1 for summary in session_summaries if "failed_turn" in summary.attention_reasons + ) active_sessions = sum( 1 for session in sessions if session.status in CONTINUABLE_SESSION_STATUSES ) @@ -1205,6 +1269,10 @@ async def get_voice_session_analytics_service( return VoiceSessionAnalyticsResponse( window_days=days, total_sessions=total_sessions, + attention_sessions=attention_sessions, + confirmation_attention_sessions=confirmation_attention_sessions, + safety_attention_sessions=safety_attention_sessions, + failed_attention_sessions=failed_attention_sessions, active_sessions=active_sessions, finalized_sessions=finalized_sessions, abandoned_sessions=abandoned_sessions, diff --git a/backend/tests/test_voice_sessions.py b/backend/tests/test_voice_sessions.py index 19f0e75..e752850 100644 --- a/backend/tests/test_voice_sessions.py +++ b/backend/tests/test_voice_sessions.py @@ -681,6 +681,149 @@ async def test_voice_session_analytics_summarize_failures_and_confirmations( app.dependency_overrides.clear() +async def test_voice_session_attention_filter_and_analytics_count( + db_session, + auth_token, +): + async def override_get_db(): + yield db_session + + app.dependency_overrides[get_db] = override_get_db + + with ( + patch( + "app.services.voice_session_service.generate_story_content", + new_callable=AsyncMock, + ) as mock_generate, + patch( + "app.services.voice_session_service.text_to_speech", + new_callable=AsyncMock, + ) as mock_tts, + patch( + "app.services.voice_session_service.transcribe_voice_audio", + new_callable=AsyncMock, + ) as mock_transcribe, + ): + mock_generate.side_effect = [ + StoryOutput( + mode="generated", + title="正常故事", + story_text="第一段温暖故事。", + cover_prompt_suggestion="normal cover", + ), + RuntimeError("provider down"), + ] + mock_tts.side_effect = [ + b"normal-audio", + b"confirmation-audio", + b"safety-audio", + ] + mock_transcribe.return_value = VoiceTranscriptionResult( + transcript_text="我想听一个会发光的小恐龙故事", + confidence=0.41, + provider="openai", + ) + + transport = ASGITransport(app=app) + try: + async with AsyncClient(transport=transport, base_url="http://test") as client: + client.cookies.set("access_token", auth_token) + + response = await client.post("/api/voice-sessions", json={}) + normal_session_id = response.json()["id"] + response = await client.post( + f"/api/voice-sessions/{normal_session_id}/turns/fallback", + json={"transcript_text": "先讲一个温暖的普通故事"}, + ) + assert response.status_code == 202 + + response = await client.post("/api/voice-sessions", json={}) + failed_session_id = response.json()["id"] + response = await client.post( + f"/api/voice-sessions/{failed_session_id}/turns/fallback", + json={"transcript_text": "这轮会触发 provider 异常"}, + ) + assert response.status_code == 202 + + response = await client.post("/api/voice-sessions", json={}) + confirmation_session_id = response.json()["id"] + response = await client.post( + f"/api/voice-sessions/{confirmation_session_id}/turns", + files={ + "audio_file": ("turn.webm", b"fake-webm-audio", "audio/webm"), + }, + ) + assert response.status_code == 202 + + response = await client.post("/api/voice-sessions", json={}) + safety_session_id = response.json()["id"] + response = await client.post( + f"/api/voice-sessions/{safety_session_id}/turns/fallback", + json={"transcript_text": "我想听一个拿着炸弹互相打的故事"}, + ) + assert response.status_code == 202 + + response = await client.get( + "/api/voice-sessions?needs_attention=true&limit=8" + ) + assert response.status_code == 200 + attention_sessions = response.json() + attention_session_ids = {item["id"] for item in attention_sessions} + assert attention_session_ids == { + failed_session_id, + confirmation_session_id, + safety_session_id, + } + assert normal_session_id not in attention_session_ids + attention_reason_sets = { + item["id"]: set(item["attention_reasons"]) for item in attention_sessions + } + assert attention_reason_sets[confirmation_session_id] == { + "pending_confirmation" + } + assert attention_reason_sets[safety_session_id] == { + "safety_intervention" + } + assert attention_reason_sets[failed_session_id] == {"failed_turn"} + + response = await client.get( + "/api/voice-sessions?needs_attention=true&attention_reason=pending_confirmation" + ) + assert response.status_code == 200 + confirmation_sessions = response.json() + assert [item["id"] for item in confirmation_sessions] == [ + confirmation_session_id + ] + + response = await client.get( + "/api/voice-sessions?needs_attention=true&attention_reason=safety_intervention" + ) + assert response.status_code == 200 + safety_sessions = response.json() + assert [item["id"] for item in safety_sessions] == [safety_session_id] + + response = await client.get( + "/api/voice-sessions?needs_attention=true&attention_reason=failed_turn" + ) + assert response.status_code == 200 + failed_sessions = response.json() + assert [item["id"] for item in failed_sessions] == [failed_session_id] + + response = await client.get("/api/voice-sessions/analytics?days=30") + assert response.status_code == 200 + analytics = response.json() + assert analytics["total_sessions"] == 4 + assert analytics["attention_sessions"] == 3 + assert analytics["confirmation_attention_sessions"] == 1 + assert analytics["safety_attention_sessions"] == 1 + assert analytics["failed_attention_sessions"] == 1 + assert analytics["failed_turns"] >= 1 + assert analytics["low_confidence_turns"] >= 1 + assert analytics["safety_interventions"] >= 1 + finally: + app.dependency_overrides.clear() + + async def test_voice_session_list_orders_recent_sessions_first( db_session, auth_token, diff --git a/frontend/src/types/voiceSession.ts b/frontend/src/types/voiceSession.ts index 860911c..2288116 100644 --- a/frontend/src/types/voiceSession.ts +++ b/frontend/src/types/voiceSession.ts @@ -59,6 +59,7 @@ export interface VoiceSessionSummary { latest_safety_message: string | null latest_assistant_audio_ready: boolean last_turn_status: string | null + attention_reasons: string[] transcription_mode_hint: string | null can_continue: boolean can_finalize: boolean @@ -81,6 +82,10 @@ export interface VoiceTurnAcceptedResponse { export interface VoiceSessionAnalytics { window_days: number | null total_sessions: number + attention_sessions: number + confirmation_attention_sessions: number + safety_attention_sessions: number + failed_attention_sessions: number active_sessions: number finalized_sessions: number abandoned_sessions: number diff --git a/frontend/src/views/MyStories.vue b/frontend/src/views/MyStories.vue index 58ac96f..d2366bc 100644 --- a/frontend/src/views/MyStories.vue +++ b/frontend/src/views/MyStories.vue @@ -37,6 +37,9 @@ interface StoryItem { last_error: string | null } +type VoiceAttentionReason = 'pending_confirmation' | 'safety_intervention' | 'failed_turn' +type VoiceStudioFocusTarget = 'confirmation' | 'safety' | 'failed' | 'text' + const router = useRouter() const stories = ref([]) const providerAnalytics = ref(null) @@ -47,6 +50,7 @@ const loading = ref(true) const error = ref('') const showCreateModal = ref(false) const selectedWindow = ref<'7' | '30' | 'all'>('30') +const selectedVoiceWindow = ref<'7' | '30' | 'all'>('30') const selectedCapability = ref<'all' | 'text' | 'image' | 'tts' | 'storybook'>('all') const readableCount = computed(() => @@ -72,6 +76,9 @@ const voiceFinalizeRate = computed(() => { if (!voiceAnalytics.value) return null return Math.round(voiceAnalytics.value.finalize_conversion_rate * 100) }) +const voiceAnalyticsWindowLabel = computed(() => + formatWindowLabel(voiceAnalytics.value?.window_days ?? null), +) function buildProviderAnalyticsPath() { const params = new URLSearchParams() @@ -85,6 +92,13 @@ function buildProviderAnalyticsPath() { return `/api/generations/provider-analytics${query ? `?${query}` : ''}` } +function buildVoiceAnalyticsPath() { + if (selectedVoiceWindow.value === 'all') { + return '/api/voice-sessions/analytics' + } + return `/api/voice-sessions/analytics?days=${selectedVoiceWindow.value}` +} + async function fetchStories() { try { const [storyList, analytics, ops, activeSession, voiceOverview] = await Promise.all([ @@ -92,7 +106,7 @@ async function fetchStories() { api.get(buildProviderAnalyticsPath()), api.get('/api/generations/ops-summary'), api.get('/api/voice-sessions/active').catch(() => null), - api.get('/api/voice-sessions/analytics?days=30').catch(() => null), + api.get(buildVoiceAnalyticsPath()).catch(() => null), ]) stories.value = storyList providerAnalytics.value = analytics @@ -123,12 +137,66 @@ function formatDate(dateStr: string) { }) } +function formatWindowLabel(windowDays: number | null | undefined) { + if (typeof windowDays === 'number') { + return `最近 ${windowDays} 天` + } + return '全部历史' +} + function goToCreate() { showCreateModal.value = true } -function goToVoiceStudio() { - router.push('/voice-studio') +function goToVoiceStudio(options?: { + reason?: VoiceAttentionReason + sessionId?: string + focus?: VoiceStudioFocusTarget +}) { + const query: Record = {} + if (options?.reason) { + query.filter = 'attention' + query.reason = options.reason + } + if (options?.sessionId) { + query.session = options.sessionId + } + if (options?.focus) { + query.focus = options.focus + } + router.push({ path: '/voice-studio', query }) +} + +function continueActiveVoiceSession() { + if (!activeVoiceSession.value) { + goToVoiceStudio() + return + } + if (activeVoiceSession.value.latest_requires_confirmation) { + goToVoiceStudio({ + reason: 'pending_confirmation', + sessionId: activeVoiceSession.value.id, + focus: 'confirmation', + }) + return + } + if (activeVoiceSession.value.latest_safety_message) { + goToVoiceStudio({ + reason: 'safety_intervention', + sessionId: activeVoiceSession.value.id, + focus: 'safety', + }) + return + } + if (activeVoiceSession.value.attention_reasons.includes('failed_turn')) { + goToVoiceStudio({ + reason: 'failed_turn', + sessionId: activeVoiceSession.value.id, + focus: 'failed', + }) + return + } + goToVoiceStudio({ sessionId: activeVoiceSession.value.id }) } function getStoryLink(story: StoryItem) { @@ -160,6 +228,10 @@ function setWindow(value: '7' | '30' | 'all') { selectedWindow.value = value } +function setVoiceWindow(value: '7' | '30' | 'all') { + selectedVoiceWindow.value = value +} + function setCapability(value: 'all' | 'text' | 'image' | 'tts' | 'storybook') { selectedCapability.value = value } @@ -173,7 +245,7 @@ onMounted(() => { } }) -watch([selectedWindow, selectedCapability], () => { +watch([selectedWindow, selectedCapability, selectedVoiceWindow], () => { void fetchStories() }) @@ -245,7 +317,7 @@ watch([selectedWindow, selectedCapability], () => { 最近一轮触发了儿童内容安全兜底,建议回到工作台查看详细记录。

- + 继续语音共创 @@ -261,7 +333,7 @@ watch([selectedWindow, selectedCapability], () => {

语音共创运营摘要

- 最近 {{ voiceAnalytics.window_days ?? 30 }} 天,你的语音共创已经累计 + {{ voiceAnalyticsWindowLabel }},你的语音共创已经累计 {{ voiceAnalytics.total_sessions }} 个会话、{{ voiceAnalytics.total_turns }} 个 turn。

{ 低置信度确认 {{ voiceAnalytics.low_confidence_turns }} 次, 安全介入 {{ voiceAnalytics.safety_interventions }} 次。

+

+ 当前仍有 {{ voiceAnalytics.attention_sessions }} 个语音会话建议优先回到工作台处理: + 待确认 {{ voiceAnalytics.confirmation_attention_sessions }} 个, + 安全介入 {{ voiceAnalytics.safety_attention_sessions }} 个, + 失败待处理 {{ voiceAnalytics.failed_attention_sessions }} 个。 +

+
+ + + +
+
+ + + +
diff --git a/frontend/src/views/VoiceStudio.vue b/frontend/src/views/VoiceStudio.vue index 56972f2..c487ffa 100644 --- a/frontend/src/views/VoiceStudio.vue +++ b/frontend/src/views/VoiceStudio.vue @@ -1,6 +1,6 @@ @@ -655,49 +1025,113 @@ onBeforeUnmount(() => {

最近会话

-

支持恢复最近还在等待下一轮的 session。

+

优先把需要家长确认或安全回看的 session 先拎出来处理。

- {{ sessions.length }} 个 + {{ filteredSessions.length }} 个
-
+
+
+
+ + + + +
+
-
+
{{ formatDate(session.updated_at) }} @@ -732,13 +1194,64 @@ onBeforeUnmount(() => {
+ +
+
+ 已处理上一条{{ autoAdvanceNotice.reasonLabel }}会话“{{ autoAdvanceNotice.fromTitle }}”, + 已自动切换到下一条:“{{ autoAdvanceNotice.toTitle }}”。 +
+ +
+
+

语音共创观测

-

最近 {{ voiceAnalytics.window_days ?? 30 }} 天的会话质量概览。

+

{{ analyticsWindowLabel }} 的会话质量概览。

+
+ + + +
Turn 成功率
@@ -759,8 +1272,46 @@ onBeforeUnmount(() => {

ASR 失败 {{ voiceAnalytics.asr_failures }} 次,TTS 失败 {{ voiceAnalytics.tts_failures }} 次; - 当前共有 {{ voiceAnalytics.total_sessions }} 个会话,已完成 {{ voiceAnalytics.finalized_sessions }} 个。 + 当前共有 {{ voiceAnalytics.total_sessions }} 个会话,其中 {{ voiceAnalytics.attention_sessions }} 个仍需处理, + 已完成 {{ voiceAnalytics.finalized_sessions }} 个。

+

+ 待确认 {{ voiceAnalytics.confirmation_attention_sessions }} 个, + 安全介入 {{ voiceAnalytics.safety_attention_sessions }} 个, + 失败待处理 {{ voiceAnalytics.failed_attention_sessions }} 个。 +

+
+ + + +
@@ -828,7 +1379,8 @@ onBeforeUnmount(() => {
建议先确认这一轮理解
@@ -838,10 +1390,37 @@ onBeforeUnmount(() => {

{{ activeSession.latest_understanding_summary }}

+
+ + 按这个理解继续 + + + 重说一遍 + + + 改成文本输入 + +
已触发儿童内容安全兜底
@@ -849,6 +1428,59 @@ onBeforeUnmount(() => {

安全标记:{{ activeSession.latest_safety_flags.join(' / ') }}

+
+ + 用文本继续改写 + + + 建议换一种更温和的表达再继续。 + +
+
+ +
+
有一轮失败待处理
+

+ + +

+
+ + 重试本轮 + + + 改成文本重发 + +
{
-
+

文本共创回合

最稳的 fallback 路径 @@ -980,6 +1612,7 @@ onBeforeUnmount(() => {