From fab2094e34911126b7b6b9099f8c65113854afcc Mon Sep 17 00:00:00 2001
From: torin <yuyan@duck.com>
Date: Mon, 20 Apr 2026 16:10:15 +0800
Subject: [PATCH] feat: complete voice session safety and confirmation flow

---
 backend/app/api/voice_sessions.py             |  35 ++
 backend/app/schemas/voice_session_schemas.py  |  32 ++
 backend/app/services/voice_session_safety.py  | 135 +++++
 backend/app/services/voice_session_service.py | 533 +++++++++++++++++-
 backend/tests/test_voice_sessions.py          | 315 +++++++++++
 ...co-creation-phase-a-migration-api-draft.md |  23 +
 .../voice-co-creation-phase-a-tech-spec.md    |   4 +
 frontend/src/types/voiceSession.ts            |  24 +
 frontend/src/views/VoiceStudio.vue            | 183 +++++-
 9 files changed, 1256 insertions(+), 28 deletions(-)
 create mode 100644 backend/app/services/voice_session_safety.py

diff --git a/backend/app/api/voice_sessions.py b/backend/app/api/voice_sessions.py
index 2e2a2a0..561c359 100644
--- a/backend/app/api/voice_sessions.py
+++ b/backend/app/api/voice_sessions.py
@@ -19,12 +19,14 @@ from app.db.database import get_db
 from app.db.models import User
 from app.schemas.voice_session_schemas import (
     VoiceSessionAbandonRequest,
+    VoiceSessionAnalyticsResponse,
     VoiceSessionCreateRequest,
     VoiceSessionDetailResponse,
     VoiceSessionFinalizeRequest,
     VoiceSessionFinalizeResponse,
     VoiceSessionSummaryResponse,
     VoiceTurnAcceptedResponse,
+    VoiceTurnConfirmRequest,
     VoiceTurnCreateFallbackRequest,
     VoiceTurnSummaryResponse,
     VoiceTurnUploadAcceptedResponse,
@@ -36,11 +38,13 @@ from app.services.voice_session_service import (
     create_voice_turn_from_upload_service,
     finalize_voice_session_service,
     get_latest_active_voice_session_service,
+    get_voice_session_analytics_service,
     get_voice_session_detail_service,
     get_voice_turn_audio_service,
     get_voice_turn_service,
     get_voice_turn_user_audio_service,
     list_voice_sessions_service,
+    resolve_voice_turn_confirmation_service,
     retry_voice_turn_audio_service,
     retry_voice_turn_service,
 )
@@ -101,6 +105,16 @@ async def get_latest_active_voice_session(
     return await get_latest_active_voice_session_service(user.id, db)
 
 
+@router.get("/voice-sessions/analytics", response_model=VoiceSessionAnalyticsResponse)
+async def get_voice_session_analytics(
+    days: int | None = Query(default=30, ge=1, le=365),
+    user: User = Depends(require_user),
+    db: AsyncSession = Depends(get_db),
+):
+    """Get aggregate voice co-creation analytics for the current user."""
+    return await get_voice_session_analytics_service(user.id, db, days=days)
+
+
 @router.get("/voice-sessions/{session_id}", response_model=VoiceSessionDetailResponse)
 async def get_voice_session(
     session_id: str,
@@ -192,6 +206,27 @@ async def retry_voice_turn(
     return await retry_voice_turn_service(session_id, turn_id, user.id, db)
 
 
+@router.post(
+    "/voice-sessions/{session_id}/turns/{turn_id}/confirm",
+    response_model=VoiceTurnSummaryResponse,
+)
+async def resolve_voice_turn_confirmation(
+    session_id: str,
+    turn_id: str,
+    request: VoiceTurnConfirmRequest,
+    user: User = Depends(require_user),
+    db: AsyncSession = Depends(get_db),
+):
+    """Resolve one pending confirmation before continuing the session."""
+    return await resolve_voice_turn_confirmation_service(
+        session_id,
+        turn_id,
+        request,
+        user.id,
+        db,
+    )
+
+
 @router.get("/voice-sessions/{session_id}/turns/{turn_id}/audio")
 async def get_voice_turn_audio(
     session_id: str,
diff --git a/backend/app/schemas/voice_session_schemas.py b/backend/app/schemas/voice_session_schemas.py
index 54e4ef3..28f0ba5 100644
--- a/backend/app/schemas/voice_session_schemas.py
+++ b/backend/app/schemas/voice_session_schemas.py
@@ -42,6 +42,12 @@ class VoiceSessionFinalizeRequest(BaseModel):
     generate_final_audio: bool = False
 
 
+class VoiceTurnConfirmRequest(BaseModel):
+    """Resolve one pending confirmation before the story continues."""
+
+    action: Literal["accept", "retry_recording", "switch_to_text"]
+
+
 class VoiceSessionAbandonRequest(BaseModel):
     """Explicitly abandon one in-progress session."""
 
@@ -75,8 +81,12 @@ class VoiceTurnSummaryResponse(BaseModel):
     intent_confidence: float | None = None
     understanding_summary: str | None = None
     requires_confirmation: bool = False
+    confirmation_state: str = "not_needed"
     confirmation_reason: str | None = None
     confirmation_message: str | None = None
+    safety_flags: list[str] = Field(default_factory=list)
+    safety_blocked: bool = False
+    safety_message: str | None = None
     assistant_text: str | None = None
     assistant_audio_ready: bool = False
     assistant_audio_url: str | None = None
@@ -105,7 +115,10 @@ class VoiceSessionSummaryResponse(BaseModel):
     latest_detected_intent: str | None = None
     latest_understanding_summary: str | None = None
     latest_requires_confirmation: bool = False
+    latest_confirmation_state: str | None = None
     latest_confirmation_message: str | None = None
+    latest_safety_flags: list[str] = Field(default_factory=list)
+    latest_safety_message: str | None = None
     latest_assistant_audio_ready: bool = False
     last_turn_status: str | None = None
     transcription_mode_hint: str | None = None
@@ -131,6 +144,25 @@ class VoiceTurnAcceptedResponse(BaseModel):
     status: str
 
 
+class VoiceSessionAnalyticsResponse(BaseModel):
+    """Aggregated voice co-creation analytics for one user."""
+
+    window_days: int | None = None
+    total_sessions: int = 0
+    active_sessions: int = 0
+    finalized_sessions: int = 0
+    abandoned_sessions: int = 0
+    total_turns: int = 0
+    successful_turns: int = 0
+    failed_turns: int = 0
+    asr_failures: int = 0
+    tts_failures: int = 0
+    low_confidence_turns: int = 0
+    safety_interventions: int = 0
+    turn_success_rate: float = 0.0
+    finalize_conversion_rate: float = 0.0
+
+
 class VoiceSessionFinalizeResponse(BaseModel):
     """Finalize response after a session is converted into a story."""
 
diff --git a/backend/app/services/voice_session_safety.py b/backend/app/services/voice_session_safety.py
new file mode 100644
index 0000000..f6272ef
--- /dev/null
+++ b/backend/app/services/voice_session_safety.py
@@ -0,0 +1,135 @@
+"""Safety helpers for child-friendly voice co-creation sessions."""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+
+UNSAFE_KEYWORD_GROUPS: dict[str, tuple[str, ...]] = {
+    "violence": (
+        "打死",
+        "杀掉",
+        "砍伤",
+        "流很多血",
+        "炸弹",
+        "爆炸",
+        "开枪",
+        "刀子",
+        "互相打",
+    ),
+    "horror": (
+        "鬼屋",
+        "鬼怪",
+        "僵尸",
+        "诅咒",
+        "恶魔",
+        "吃人",
+        "恐怖",
+        "吓死人",
+    ),
+    "danger": (
+        "毒药",
+        "绑架",
+        "自杀",
+        "跳楼",
+        "伤害自己",
+        "把人关起来",
+    ),
+    "adult": (
+        "色情",
+        "裸",
+        "亲热",
+        "不穿衣服",
+    ),
+}
+
+
+@dataclass(frozen=True)
+class VoiceSafetyResult:
+    """Result of one voice safety evaluation."""
+
+    is_safe: bool
+    flags: list[str]
+    replacement_text: str | None = None
+    message: str | None = None
+
+
+def _collect_safety_flags(text: str) -> list[str]:
+    normalized = text.replace(" ", "").strip()
+    flags: list[str] = []
+    if not normalized:
+        return flags
+
+    for flag, keywords in UNSAFE_KEYWORD_GROUPS.items():
+        if any(keyword in normalized for keyword in keywords):
+            flags.append(flag)
+    return flags
+
+
+def _redirect_prefix(flags: list[str]) -> str:
+    if "adult" in flags:
+        return "这个方向不适合小朋友的睡前故事。"
+    if "danger" in flags or "violence" in flags:
+        return "这个方向有点太危险了。"
+    if "horror" in flags:
+        return "这个方向有点太吓人了。"
+    return "这个方向现在不太适合继续讲下去。"
+
+
+def build_child_safe_redirect(flags: list[str]) -> str:
+    """Build a child-friendly redirect prompt after an unsafe request."""
+
+    return (
+        f"{_redirect_prefix(flags)}"
+        "我们把它改成温柔、安全、适合小朋友的冒险吧。"
+        "你可以试试说：让小伙伴一起想办法、让事情变得更明亮，或者让新朋友来帮忙。"
+    )
+
+
+def build_safe_story_fallback(*, premise: str | None = None) -> str:
+    """Build a safe replacement narrative segment for unsafe assistant output."""
+
+    subject = (premise or "小伙伴们").strip()
+    if len(subject) > 12:
+        subject = subject[:12]
+
+    return (
+        f"{subject}决定把眼前的难题变成一次温柔又勇敢的冒险。"
+        "大家先停下来想一想，再一起找到一个善良、安全、让人安心的解决办法，"
+        "故事也朝着明亮的方向继续展开。"
+    )
+
+
+def check_user_transcript_safety(transcript_text: str) -> VoiceSafetyResult:
+    """Screen user transcript text before it enters the story flow."""
+
+    flags = _collect_safety_flags(transcript_text)
+    if not flags:
+        return VoiceSafetyResult(is_safe=True, flags=[])
+
+    message = build_child_safe_redirect(flags)
+    return VoiceSafetyResult(
+        is_safe=False,
+        flags=flags,
+        replacement_text=message,
+        message=message,
+    )
+
+
+def check_assistant_output_safety(
+    assistant_text: str,
+    *,
+    premise: str | None = None,
+) -> VoiceSafetyResult:
+    """Screen assistant output and replace it with a child-safe segment when needed."""
+
+    flags = _collect_safety_flags(assistant_text)
+    if not flags:
+        return VoiceSafetyResult(is_safe=True, flags=[])
+
+    replacement_text = build_safe_story_fallback(premise=premise)
+    return VoiceSafetyResult(
+        is_safe=False,
+        flags=flags,
+        replacement_text=replacement_text,
+        message="系统已把不适合孩子的内容改写为更温和安全的版本。",
+    )
diff --git a/backend/app/services/voice_session_service.py b/backend/app/services/voice_session_service.py
index 14ad513..a402c2c 100644
--- a/backend/app/services/voice_session_service.py
+++ b/backend/app/services/voice_session_service.py
@@ -2,7 +2,7 @@
 
 from __future__ import annotations
 
-from datetime import datetime, timezone
+from datetime import datetime, timedelta, timezone
 from typing import Any
 
 from fastapi import HTTPException
@@ -14,12 +14,14 @@ from app.core.logging import get_logger
 from app.db.models import VoiceSession, VoiceSessionEvent, VoiceTurn
 from app.schemas.voice_session_schemas import (
     VoiceSessionAbandonRequest,
+    VoiceSessionAnalyticsResponse,
     VoiceSessionCreateRequest,
     VoiceSessionDetailResponse,
     VoiceSessionFinalizeRequest,
     VoiceSessionFinalizeResponse,
     VoiceSessionSummaryResponse,
     VoiceTurnAcceptedResponse,
+    VoiceTurnConfirmRequest,
     VoiceTurnCreateFallbackRequest,
     VoiceTurnSummaryResponse,
     VoiceTurnUploadAcceptedResponse,
@@ -27,7 +29,15 @@ from app.schemas.voice_session_schemas import (
 from app.services.adapters.text.models import StoryOutput
 from app.services.memory_service import build_enhanced_memory_context
 from app.services.provider_router import generate_story_content, text_to_speech
-from app.services.story_service import create_story_from_result, validate_profile_and_universe
+from app.services.story_service import (
+    create_story_from_result,
+    generate_story_cover,
+    validate_profile_and_universe,
+)
+from app.services.voice_session_safety import (
+    check_assistant_output_safety,
+    check_user_transcript_safety,
+)
 from app.services.voice_session_storage import (
     build_turn_assistant_audio_path,
     read_session_audio,
@@ -51,6 +61,7 @@ def _default_story_state() -> dict[str, Any]:
         "narrative_segments": [],
         "safety_flags": [],
         "last_intent": None,
+        "final_summary": None,
     }
 
 
@@ -121,7 +132,9 @@ def _build_confirmation_message(
                 f"{normalized_transcript}。"
             )
         else:
-            natural_understanding = f"我现在先理解成你想「{_format_intent_label(detected_intent)}」。"
+            natural_understanding = (
+                f"我现在先理解成你想「{_format_intent_label(detected_intent)}」。"
+            )
 
     if "intent_unknown" in reasons:
         prefix = "我这一次还没有完全听懂。"
@@ -141,6 +154,34 @@ def _build_confirmation_message(
     )
 
 
+def _merge_unique_items(*values: list[str] | tuple[str, ...]) -> list[str]:
+    merged: list[str] = []
+    for value in values:
+        for item in value:
+            normalized = str(item).strip()
+            if normalized and normalized not in merged:
+                merged.append(normalized)
+    return merged
+
+
+def _confirmation_state_from_patch(story_patch: dict[str, Any] | None = None) -> str:
+    patch = story_patch or {}
+    if isinstance(patch.get("confirmation_state"), str):
+        return str(patch["confirmation_state"])
+    if patch.get("requires_confirmation"):
+        return "pending"
+    return "not_needed"
+
+
+def _resolve_turn_safety_state(story_patch: dict[str, Any] | None = None) -> dict[str, Any]:
+    patch = story_patch or {}
+    return {
+        "safety_flags": list(patch.get("safety_flags") or []),
+        "safety_blocked": bool(patch.get("safety_blocked") or False),
+        "safety_message": patch.get("safety_message"),
+    }
+
+
 def _resolve_turn_confirmation_state(
     *,
     transcript_text: str | None,
@@ -154,6 +195,7 @@ def _resolve_turn_confirmation_state(
     confirmation_reason = patch.get("confirmation_reason")
     confirmation_message = patch.get("confirmation_message")
     understanding_summary = patch.get("understanding_summary")
+    confirmation_state = _confirmation_state_from_patch(patch)
 
     reasons: list[str] = []
     if detected_intent == "unknown":
@@ -188,11 +230,93 @@ def _resolve_turn_confirmation_state(
     return {
         "understanding_summary": understanding_summary,
         "requires_confirmation": bool(requires_confirmation),
+        "confirmation_state": confirmation_state,
         "confirmation_reason": confirmation_reason,
         "confirmation_message": confirmation_message,
     }
 
 
+def _turn_has_pending_confirmation(turn: VoiceTurn) -> bool:
+    confirmation_state = _resolve_turn_confirmation_state(
+        transcript_text=turn.user_transcript,
+        transcript_confidence=turn.transcript_confidence,
+        detected_intent=turn.detected_intent,
+        intent_confidence=turn.intent_confidence,
+        story_patch=turn.story_patch or {},
+    )
+    return confirmation_state["requires_confirmation"] and (
+        confirmation_state["confirmation_state"] == "pending"
+    )
+
+
+def _extract_first_sentence(text: str | None) -> str:
+    normalized = (text or "").strip().replace("\n", " ")
+    if not normalized:
+        return ""
+    for separator in ("。", "！", "？", ".", "!", "?"):
+        if separator in normalized:
+            return normalized.split(separator, 1)[0].strip()
+    return normalized
+
+
+def _build_final_story_title(session: VoiceSession) -> str:
+    candidates = [
+        session.working_title,
+        (session.story_state or {}).get("premise"),
+        _extract_first_sentence(
+            ((session.story_state or {}).get("narrative_segments") or [None])[0]
+        ),
+        "一起编织的睡前故事",
+    ]
+    for candidate in candidates:
+        normalized = str(candidate or "").strip(" \n\t。！？:：-")
+        if normalized:
+            return normalized[:24]
+    return "一起编织的睡前故事"
+
+
+def _build_final_story_summary(session: VoiceSession) -> str:
+    story_state = session.story_state or {}
+    segments = [
+        segment.strip()
+        for segment in list(story_state.get("narrative_segments") or [])
+        if str(segment).strip()
+    ]
+    if not segments:
+        return "这是一段由孩子和 DreamWeaver 一起共创的温柔故事。"
+
+    first_sentence = _extract_first_sentence(segments[0])
+    last_sentence = _extract_first_sentence(segments[-1])
+    if first_sentence and last_sentence and first_sentence != last_sentence:
+        return f"{first_sentence}。后来，{last_sentence}。"
+    if first_sentence:
+        return f"{first_sentence}。"
+    return "这是一段由孩子和 DreamWeaver 一起共创的温柔故事。"
+
+
+def _turn_counts_as_success(turn: VoiceTurn) -> bool:
+    patch = turn.story_patch or {}
+    confirmation_state = _confirmation_state_from_patch(patch)
+    if turn.status == "failed":
+        return False
+    if patch.get("safety_blocked"):
+        return False
+    if confirmation_state in {"pending", "retry_recording", "switch_to_text"}:
+        return False
+    return turn.status in {"audio_ready", "narrative_ready"}
+
+
+def _can_finalize_with_latest_turn(
+    session: VoiceSession,
+    latest_turn: VoiceTurn | None,
+) -> bool:
+    if not _session_can_finalize(session):
+        return False
+    if latest_turn and _turn_has_pending_confirmation(latest_turn):
+        return False
+    return True
+
+
 def _turn_to_summary(turn: VoiceTurn) -> VoiceTurnSummaryResponse:
     turn_patch = turn.story_patch or {}
     confirmation_state = _resolve_turn_confirmation_state(
@@ -202,6 +326,7 @@ def _turn_to_summary(turn: VoiceTurn) -> VoiceTurnSummaryResponse:
         intent_confidence=turn.intent_confidence,
         story_patch=turn_patch,
     )
+    safety_state = _resolve_turn_safety_state(turn_patch)
     return VoiceTurnSummaryResponse(
         id=turn.id,
         session_id=turn.session_id,
@@ -214,8 +339,12 @@ def _turn_to_summary(turn: VoiceTurn) -> VoiceTurnSummaryResponse:
         intent_confidence=turn.intent_confidence,
         understanding_summary=confirmation_state["understanding_summary"],
         requires_confirmation=confirmation_state["requires_confirmation"],
+        confirmation_state=confirmation_state["confirmation_state"],
         confirmation_reason=confirmation_state["confirmation_reason"],
         confirmation_message=confirmation_state["confirmation_message"],
+        safety_flags=safety_state["safety_flags"],
+        safety_blocked=safety_state["safety_blocked"],
+        safety_message=safety_state["safety_message"],
         assistant_text=turn.assistant_text,
         assistant_audio_ready=session_audio_exists(turn.assistant_audio_path),
         assistant_audio_url=_assistant_audio_url(
@@ -242,8 +371,13 @@ def _session_to_summary(
         latest_confirmation_state = {
             "understanding_summary": None,
             "requires_confirmation": False,
+            "confirmation_state": None,
             "confirmation_message": None,
         }
+        latest_safety_state = {
+            "safety_flags": [],
+            "safety_message": None,
+        }
     else:
         total_turns = total_turns if total_turns is not None else latest_turn.turn_index
         latest_confirmation_state = _resolve_turn_confirmation_state(
@@ -253,6 +387,7 @@ def _session_to_summary(
             intent_confidence=latest_turn.intent_confidence,
             story_patch=latest_turn.story_patch or {},
         )
+        latest_safety_state = _resolve_turn_safety_state(latest_turn.story_patch or {})
 
     return VoiceSessionSummaryResponse(
         id=session.id,
@@ -270,14 +405,17 @@ def _session_to_summary(
         latest_detected_intent=latest_turn.detected_intent if latest_turn else None,
         latest_understanding_summary=latest_confirmation_state["understanding_summary"],
         latest_requires_confirmation=latest_confirmation_state["requires_confirmation"],
+        latest_confirmation_state=latest_confirmation_state["confirmation_state"],
         latest_confirmation_message=latest_confirmation_state["confirmation_message"],
+        latest_safety_flags=latest_safety_state["safety_flags"],
+        latest_safety_message=latest_safety_state["safety_message"],
         latest_assistant_audio_ready=(
             session_audio_exists(latest_turn.assistant_audio_path) if latest_turn else False
         ),
         last_turn_status=latest_turn.status if latest_turn else None,
         transcription_mode_hint=settings.voice_transcription_mode,
         can_continue=_session_can_continue(session),
-        can_finalize=_session_can_finalize(session),
+        can_finalize=_can_finalize_with_latest_turn(session, latest_turn),
         last_error=session.last_error,
         created_at=session.created_at,
         updated_at=session.updated_at,
@@ -468,6 +606,7 @@ def _merge_story_state(
     transcript_text: str,
     intent: str,
     assistant_result: StoryOutput | None,
+    safety_flags: list[str] | None = None,
 ) -> tuple[dict[str, Any], dict[str, Any]]:
     current_state = _default_story_state() | (session.story_state or {})
     narrative_segments = list(current_state.get("narrative_segments") or [])
@@ -481,6 +620,10 @@ def _merge_story_state(
     current_state["narrative_segments"] = narrative_segments
     current_state["latest_direction"] = transcript_text
     current_state["last_intent"] = intent
+    current_state["safety_flags"] = _merge_unique_items(
+        list(current_state.get("safety_flags") or []),
+        list(safety_flags or []),
+    )
     if assistant_result and assistant_result.cover_prompt_suggestion:
         current_state["cover_prompt"] = assistant_result.cover_prompt_suggestion
 
@@ -491,10 +634,24 @@ def _merge_story_state(
         "working_title": assistant_result.title if assistant_result else session.working_title,
         "cover_prompt": current_state.get("cover_prompt"),
         "narrative_segments_count": len(narrative_segments),
+        "safety_flags": list(current_state.get("safety_flags") or []),
     }
     return current_state, patch
 
 
+async def _ensure_no_pending_confirmation(
+    db: AsyncSession,
+    *,
+    session: VoiceSession,
+) -> None:
+    latest_turn = await _get_latest_turn(db, session_id=session.id)
+    if latest_turn and _turn_has_pending_confirmation(latest_turn):
+        raise HTTPException(
+            status_code=409,
+            detail="请先确认上一轮系统理解，或选择重说 / 改成文本输入后再继续。",
+        )
+
+
 async def _create_pending_turn(
     db: AsyncSession,
     *,
@@ -511,6 +668,7 @@ async def _create_pending_turn(
             status_code=409,
             detail="Voice session is not ready for another turn.",
         )
+    await _ensure_no_pending_confirmation(db, session=session)
 
     next_turn_index = session.current_turn_index + 1
     detected_intent, intent_confidence = _detect_intent(
@@ -593,13 +751,18 @@ async def _process_pending_turn(
     assistant_result: StoryOutput | None = None
     detected_intent = turn.detected_intent
     intent_confidence = turn.intent_confidence
+    turn_patch = dict(turn.story_patch or {})
     confirmation_state = _resolve_turn_confirmation_state(
         transcript_text=transcript_text,
         transcript_confidence=turn.transcript_confidence,
         detected_intent=detected_intent,
         intent_confidence=intent_confidence,
-        story_patch=turn.story_patch or {},
+        story_patch=turn_patch,
     )
+    transcript_safety = check_user_transcript_safety(transcript_text)
+    assistant_safety_message: str | None = None
+    safety_flags: list[str] = []
+    transcript_blocked = False
 
     try:
         await _record_session_event(
@@ -669,6 +832,70 @@ async def _process_pending_turn(
                     "requires_confirmation": True,
                 },
             )
+        elif not transcript_safety.is_safe:
+            transcript_blocked = True
+            safety_flags = list(transcript_safety.flags)
+            current_state = _default_story_state() | (session.story_state or {})
+            current_state["safety_flags"] = _merge_unique_items(
+                list(current_state.get("safety_flags") or []),
+                safety_flags,
+            )
+            assistant_text = transcript_safety.replacement_text or transcript_safety.message
+            turn.story_patch = {
+                **turn_patch,
+                "intent": detected_intent,
+                "transcript_text": transcript_text,
+                "segment_added": False,
+                "working_title": session.working_title,
+                "cover_prompt": current_state.get("cover_prompt"),
+                "narrative_segments_count": len(
+                    list(current_state.get("narrative_segments") or [])
+                ),
+                "requires_confirmation": False,
+                "confirmation_state": turn_patch.get("confirmation_state", "not_needed"),
+                "understanding_summary": confirmation_state["understanding_summary"],
+                "safety_flags": safety_flags,
+                "safety_blocked": True,
+                "safety_message": transcript_safety.message,
+            }
+            turn.assistant_text = assistant_text
+            turn.status = "narrative_ready"
+            turn.error_message = None
+            session.story_state = current_state
+            session.latest_assistant_text = assistant_text
+            session.status = "waiting_user"
+            session.last_error = None
+            session.updated_at = _utcnow()
+            await db.commit()
+            await db.refresh(session)
+            await db.refresh(turn)
+
+            await _record_session_event(
+                db,
+                session_id=session.id,
+                turn_id=turn.id,
+                event_type="safety_intervention_requested",
+                status="blocked",
+                message="Unsafe user transcript was redirected to a child-friendly path.",
+                metadata={
+                    "stage": "user_input",
+                    "safety_flags": safety_flags,
+                },
+            )
+            await _record_session_event(
+                db,
+                session_id=session.id,
+                turn_id=turn.id,
+                event_type="assistant_text_ready",
+                status="succeeded",
+                message="Assistant safety redirect generated.",
+                metadata={
+                    "assistant_text_length": len(assistant_text or ""),
+                    "working_title": session.working_title,
+                    "requires_confirmation": False,
+                    "safety_flags": safety_flags,
+                },
+            )
         elif detected_intent == "save_story":
             assistant_text = "好的，这个故事已经准备好保存到故事库了。"
         elif detected_intent == "end_story":
@@ -681,23 +908,47 @@ async def _process_pending_turn(
                 intent=detected_intent,
             )
             assistant_text = assistant_result.story_text.strip()
+            output_safety = check_assistant_output_safety(
+                assistant_text,
+                premise=str((session.story_state or {}).get("premise") or ""),
+            )
+            if not output_safety.is_safe:
+                safety_flags = _merge_unique_items(safety_flags, output_safety.flags)
+                assistant_safety_message = output_safety.message
+                assistant_text = output_safety.replacement_text or assistant_text
+                assistant_result = StoryOutput(
+                    mode=assistant_result.mode,
+                    title=assistant_result.title,
+                    story_text=assistant_text,
+                    cover_prompt_suggestion=assistant_result.cover_prompt_suggestion,
+                )
 
-        if not confirmation_state["requires_confirmation"]:
+        if not confirmation_state["requires_confirmation"] and not transcript_blocked:
             merged_state, story_patch = _merge_story_state(
                 session,
                 transcript_text=transcript_text,
                 intent=detected_intent,
                 assistant_result=assistant_result,
+                safety_flags=safety_flags,
             )
-            story_patch["transcription_provider"] = (
-                (turn.story_patch or {}).get("transcription_provider")
-            )
+            story_patch["transcription_provider"] = turn_patch.get("transcription_provider")
+            story_patch["requires_confirmation"] = False
+            story_patch["confirmation_state"] = turn_patch.get("confirmation_state", "not_needed")
+            story_patch["understanding_summary"] = confirmation_state["understanding_summary"]
+            if turn_patch.get("confirmation_reason"):
+                story_patch["confirmation_reason"] = turn_patch.get("confirmation_reason")
+            story_patch["confirmation_message"] = None
+            story_patch["safety_flags"] = safety_flags
+            story_patch["safety_blocked"] = False
+            story_patch["safety_message"] = assistant_safety_message
             turn.story_patch = story_patch
             turn.assistant_text = assistant_text
             turn.status = "narrative_ready"
+            turn.error_message = None
             session.story_state = merged_state
             session.latest_assistant_text = assistant_text
             session.status = "waiting_user"
+            session.last_error = None
             session.updated_at = _utcnow()
             if assistant_result and assistant_result.title and not session.working_title:
                 session.working_title = assistant_result.title
@@ -714,6 +965,19 @@ async def _process_pending_turn(
                 message="Story state updated after one turn.",
                 metadata=story_patch,
             )
+            if safety_flags:
+                await _record_session_event(
+                    db,
+                    session_id=session.id,
+                    turn_id=turn.id,
+                    event_type="safety_intervention_requested",
+                    status="rewritten",
+                    message="Assistant output was rewritten to keep the story child-friendly.",
+                    metadata={
+                        "stage": "assistant_output",
+                        "safety_flags": safety_flags,
+                    },
+                )
             await _record_session_event(
                 db,
                 session_id=session.id,
@@ -725,6 +989,7 @@ async def _process_pending_turn(
                     "assistant_text_length": len(assistant_text or ""),
                     "working_title": session.working_title,
                     "requires_confirmation": False,
+                    "safety_flags": safety_flags,
                 },
             )
     except Exception as exc:
@@ -805,6 +1070,12 @@ async def _process_pending_turn(
     return turn.status
 
 
+def _confirmation_resolution_text(action: str) -> str:
+    if action == "retry_recording":
+        return "好的，我们把这一轮先撤回，你可以重新录一遍，我会重新认真听。"
+    return "好的，我们先切换成文本输入。你可以直接在下面把这一轮想法改写清楚，我们再继续讲。"
+
+
 async def list_voice_sessions_service(
     user_id: str,
     db: AsyncSession,
@@ -871,6 +1142,84 @@ async def get_latest_active_voice_session_service(
     )
 
 
+async def get_voice_session_analytics_service(
+    user_id: str,
+    db: AsyncSession,
+    *,
+    days: int | None = 30,
+) -> VoiceSessionAnalyticsResponse:
+    cutoff = None
+    if days is not None:
+        cutoff = datetime.now(timezone.utc) - timedelta(days=days)
+
+    session_query = select(VoiceSession).where(VoiceSession.user_id == user_id)
+    turn_query = (
+        select(VoiceTurn)
+        .join(VoiceSession, VoiceTurn.session_id == VoiceSession.id)
+        .where(VoiceSession.user_id == user_id)
+    )
+    event_query = (
+        select(VoiceSessionEvent)
+        .join(VoiceSession, VoiceSessionEvent.session_id == VoiceSession.id)
+        .where(VoiceSession.user_id == user_id)
+    )
+
+    if cutoff is not None:
+        session_query = session_query.where(VoiceSession.created_at >= cutoff)
+        turn_query = turn_query.where(VoiceTurn.created_at >= cutoff)
+        event_query = event_query.where(VoiceSessionEvent.created_at >= cutoff)
+
+    sessions = (await db.execute(session_query)).scalars().all()
+    turns = (await db.execute(turn_query)).scalars().all()
+    events = (await db.execute(event_query)).scalars().all()
+
+    total_sessions = len(sessions)
+    active_sessions = sum(
+        1 for session in sessions if session.status in CONTINUABLE_SESSION_STATUSES
+    )
+    finalized_sessions = sum(1 for session in sessions if session.status == "completed")
+    abandoned_sessions = sum(1 for session in sessions if session.status == "abandoned")
+    total_turns = len(turns)
+    successful_turns = sum(1 for turn in turns if _turn_counts_as_success(turn))
+    failed_turns = sum(1 for turn in turns if turn.status == "failed")
+    asr_failures = sum(1 for event in events if event.event_type == "turn_transcription_failed")
+    tts_failures = sum(
+        1
+        for event in events
+        if event.event_type in {"assistant_audio_failed", "assistant_audio_retry_failed"}
+    )
+    low_confidence_turns = sum(
+        1 for event in events if event.event_type == "turn_confirmation_requested"
+    )
+    safety_interventions = sum(
+        1 for event in events if event.event_type == "safety_intervention_requested"
+    )
+
+    turn_success_rate = (
+        round(successful_turns / total_turns, 4) if total_turns else 0.0
+    )
+    finalize_conversion_rate = (
+        round(finalized_sessions / total_sessions, 4) if total_sessions else 0.0
+    )
+
+    return VoiceSessionAnalyticsResponse(
+        window_days=days,
+        total_sessions=total_sessions,
+        active_sessions=active_sessions,
+        finalized_sessions=finalized_sessions,
+        abandoned_sessions=abandoned_sessions,
+        total_turns=total_turns,
+        successful_turns=successful_turns,
+        failed_turns=failed_turns,
+        asr_failures=asr_failures,
+        tts_failures=tts_failures,
+        low_confidence_turns=low_confidence_turns,
+        safety_interventions=safety_interventions,
+        turn_success_rate=turn_success_rate,
+        finalize_conversion_rate=finalize_conversion_rate,
+    )
+
+
 async def create_voice_session_service(
     request: VoiceSessionCreateRequest,
     user_id: str,
@@ -1009,6 +1358,7 @@ async def create_voice_turn_from_upload_service(
             status_code=409,
             detail="Voice session is not ready for another turn.",
         )
+    await _ensure_no_pending_confirmation(db, session=session)
     if not audio_bytes:
         raise HTTPException(status_code=400, detail="上传音频为空，请重新录音后再试。")
     if len(audio_bytes) > settings.voice_turn_max_upload_bytes:
@@ -1024,12 +1374,32 @@ async def create_voice_turn_from_upload_service(
         mime_type=mime_type,
         audio_data=audio_bytes,
     )
-    transcription = await transcribe_voice_audio(
-        audio_bytes=audio_bytes,
-        file_name=file_name,
-        mime_type=mime_type,
-        transcript_hint=transcript_hint,
-    )
+    try:
+        transcription = await transcribe_voice_audio(
+            audio_bytes=audio_bytes,
+            file_name=file_name,
+            mime_type=mime_type,
+            transcript_hint=transcript_hint,
+        )
+    except HTTPException as exc:
+        session.last_error = str(exc.detail)
+        session.updated_at = _utcnow()
+        await db.commit()
+        await db.refresh(session)
+        await _record_session_event(
+            db,
+            session_id=session.id,
+            turn_id=None,
+            event_type="turn_transcription_failed",
+            status="failed",
+            message="Voice transcription failed before one turn could be created.",
+            metadata={
+                "mime_type": mime_type,
+                "audio_path": user_audio_path,
+                "error": str(exc.detail),
+            },
+        )
+        raise
     session, turn = await _create_pending_turn(
         db,
         session=session,
@@ -1083,6 +1453,86 @@ async def retry_voice_turn_service(
     )
 
 
+async def resolve_voice_turn_confirmation_service(
+    session_id: str,
+    turn_id: str,
+    request: VoiceTurnConfirmRequest,
+    user_id: str,
+    db: AsyncSession,
+) -> VoiceTurnSummaryResponse:
+    session = await _get_owned_session(db, session_id=session_id, user_id=user_id)
+    turn = await _get_owned_turn(
+        db,
+        session_id=session_id,
+        turn_id=turn_id,
+        user_id=user_id,
+    )
+    if turn.turn_index != session.current_turn_index:
+        raise HTTPException(status_code=409, detail="Only the latest turn can be confirmed.")
+    if not _turn_has_pending_confirmation(turn):
+        raise HTTPException(status_code=409, detail="This turn does not need confirmation.")
+    if not turn.user_transcript:
+        raise HTTPException(status_code=409, detail="This turn has no transcript to confirm.")
+
+    patch = dict(turn.story_patch or {})
+    patch["requires_confirmation"] = False
+    patch["confirmation_state"] = "accepted" if request.action == "accept" else request.action
+    patch["confirmation_message"] = None
+    turn.story_patch = patch
+    turn.error_message = None
+    session.last_error = None
+    session.updated_at = _utcnow()
+
+    if request.action == "accept":
+        session.status = "processing_turn"
+        await db.commit()
+        await db.refresh(session)
+        await db.refresh(turn)
+        await _record_session_event(
+            db,
+            session_id=session.id,
+            turn_id=turn.id,
+            event_type="turn_confirmation_accepted",
+            status="succeeded",
+            message=(
+                "Parent confirmed the current interpretation "
+                "and allowed the story to continue."
+            ),
+            metadata={"turn_index": turn.turn_index},
+        )
+        await _process_pending_turn(
+            db,
+            session=session,
+            turn=turn,
+            transcript_text=turn.user_transcript,
+            user_id=user_id,
+        )
+        await db.refresh(turn)
+        return _turn_to_summary(turn)
+
+    guidance_text = _confirmation_resolution_text(request.action)
+    turn.assistant_text = guidance_text
+    turn.assistant_audio_path = None
+    turn.assistant_audio_duration_ms = None
+    turn.status = "narrative_ready"
+    session.status = "waiting_user"
+    session.latest_assistant_text = guidance_text
+
+    await db.commit()
+    await db.refresh(session)
+    await db.refresh(turn)
+    await _record_session_event(
+        db,
+        session_id=session.id,
+        turn_id=turn.id,
+        event_type=f"turn_confirmation_{request.action}",
+        status="succeeded",
+        message="Pending confirmation was resolved without continuing the current transcript.",
+        metadata={"turn_index": turn.turn_index, "action": request.action},
+    )
+    return _turn_to_summary(turn)
+
+
 async def retry_voice_turn_audio_service(
     session_id: str,
     turn_id: str,
@@ -1202,9 +1652,10 @@ async def finalize_voice_session_service(
         )
 
     session = await _get_owned_session(db, session_id=session_id, user_id=user_id)
+    latest_turn = await _get_latest_turn(db, session_id=session.id)
     if session.status in FINAL_SESSION_STATUSES:
         raise HTTPException(status_code=409, detail="Voice session is already closed.")
-    if not _session_can_finalize(session):
+    if not _can_finalize_with_latest_turn(session, latest_turn):
         raise HTTPException(status_code=409, detail="Voice session is not ready to finalize.")
 
     session.status = "finalizing_story"
@@ -1229,9 +1680,19 @@ async def finalize_voice_session_service(
     if not final_story_text:
         raise HTTPException(status_code=409, detail="Voice session has no narrative to save.")
 
+    final_title = _build_final_story_title(session)
+    final_summary = _build_final_story_summary(session)
+    story_state = {
+        **story_state,
+        "final_summary": final_summary,
+        "final_title": final_title,
+    }
+    session.story_state = story_state
+    session.working_title = final_title
+
     story_result = StoryOutput(
         mode="generated",
-        title=session.working_title or "一起编织的睡前故事",
+        title=final_title,
         story_text=final_story_text,
         cover_prompt_suggestion=(
             (story_state.get("cover_prompt") or "") if request.generate_cover else ""
@@ -1246,6 +1707,36 @@ async def finalize_voice_session_service(
         db=db,
     )
 
+    generation_job_id: str | None = None
+    if request.generate_cover and story.cover_prompt:
+        try:
+            await generate_story_cover(story.id, user_id, db)
+            await _record_session_event(
+                db,
+                session_id=session.id,
+                turn_id=None,
+                event_type="session_cover_generation_succeeded",
+                status="succeeded",
+                message="Finalized story cover was generated after session save.",
+                metadata={"story_id": story.id},
+            )
+        except HTTPException as exc:
+            await _record_session_event(
+                db,
+                session_id=session.id,
+                turn_id=None,
+                event_type="session_cover_generation_failed",
+                status="failed",
+                message="Finalized story cover generation failed after session save.",
+                metadata={"story_id": story.id, "error": str(exc.detail)},
+            )
+            logger.warning(
+                "voice_session_finalize_cover_failed",
+                session_id=session.id,
+                story_id=story.id,
+                error=str(exc.detail),
+            )
+
     session.final_story_id = story.id
     session.status = "completed"
     session.last_error = None
@@ -1260,14 +1751,18 @@ async def finalize_voice_session_service(
         event_type="session_saved_as_story",
         status="succeeded",
         message="Voice session finalized into a story.",
-        metadata={"story_id": story.id},
+        metadata={
+            "story_id": story.id,
+            "final_title": final_title,
+            "final_summary": final_summary,
+        },
     )
 
     return VoiceSessionFinalizeResponse(
         session_id=session.id,
         status=session.status,
         story_id=story.id,
-        generation_job_id=None,
+        generation_job_id=generation_job_id,
     )
 
 
diff --git a/backend/tests/test_voice_sessions.py b/backend/tests/test_voice_sessions.py
index 06d1630..edd01ff 100644
--- a/backend/tests/test_voice_sessions.py
+++ b/backend/tests/test_voice_sessions.py
@@ -1,5 +1,6 @@
 from unittest.mock import AsyncMock, patch
 
+from fastapi import HTTPException
 from httpx import ASGITransport, AsyncClient
 
 from app.core.config import settings
@@ -97,6 +98,10 @@ async def test_voice_session_correct_turn_and_finalize_to_story(
             "app.services.voice_session_service.text_to_speech",
             new_callable=AsyncMock,
         ) as mock_tts,
+        patch(
+            "app.services.voice_session_service.generate_story_cover",
+            new_callable=AsyncMock,
+        ) as mock_generate_cover,
     ):
         mock_generate.side_effect = [
             StoryOutput(
@@ -113,6 +118,7 @@ async def test_voice_session_correct_turn_and_finalize_to_story(
             ),
         ]
         mock_tts.side_effect = [b"turn-1-audio", b"turn-2-audio"]
+        mock_generate_cover.return_value = "https://example.com/voice-cover.png"
 
         transport = ASGITransport(app=app)
         try:
@@ -165,6 +171,8 @@ async def test_voice_session_correct_turn_and_finalize_to_story(
                 assert session_data["status"] == "completed"
                 assert session_data["final_story_id"] == story_id
                 assert session_data["can_continue"] is False
+                assert session_data["story_state"]["final_summary"]
+                mock_generate_cover.assert_awaited_once()
         finally:
             app.dependency_overrides.clear()
 
@@ -328,14 +336,22 @@ async def test_voice_session_low_confidence_turn_requests_confirmation(
                 turn_data = response.json()
                 assert turn_data["status"] == "audio_ready"
                 assert turn_data["requires_confirmation"] is True
+                assert turn_data["confirmation_state"] == "pending"
                 assert turn_data["understanding_summary"].startswith("本轮系统理解为")
                 assert "请家长帮忙确认" in turn_data["confirmation_message"]
                 assert turn_data["assistant_text"] == turn_data["confirmation_message"]
 
+                response = await client.post(
+                    f"/api/voice-sessions/{session_id}/turns/fallback",
+                    json={"transcript_text": "我要直接继续下一轮"},
+                )
+                assert response.status_code == 409
+
                 response = await client.get(f"/api/voice-sessions/{session_id}")
                 assert response.status_code == 200
                 session_data = response.json()
                 assert session_data["latest_requires_confirmation"] is True
+                assert session_data["latest_confirmation_state"] == "pending"
                 assert "请家长帮忙确认" in session_data["latest_confirmation_message"]
                 assert session_data["can_finalize"] is False
                 assert session_data["story_state"]["narrative_segments"] == []
@@ -349,6 +365,305 @@ async def test_voice_session_low_confidence_turn_requests_confirmation(
             app.dependency_overrides.clear()
 
 
+async def test_voice_session_confirmation_accept_continues_original_turn(
+    db_session,
+    auth_token,
+):
+    async def override_get_db():
+        yield db_session
+
+    app.dependency_overrides[get_db] = override_get_db
+
+    with (
+        patch(
+            "app.services.voice_session_service.generate_story_content",
+            new_callable=AsyncMock,
+        ) as mock_generate,
+        patch(
+            "app.services.voice_session_service.text_to_speech",
+            new_callable=AsyncMock,
+        ) as mock_tts,
+        patch(
+            "app.services.voice_session_service.transcribe_voice_audio",
+            new_callable=AsyncMock,
+        ) as mock_transcribe,
+    ):
+        mock_generate.return_value = StoryOutput(
+            mode="generated",
+            title="小恐龙的星光之旅",
+            story_text="小恐龙踩着亮晶晶的石头，朝着会唱歌的山谷慢慢走去。",
+            cover_prompt_suggestion="A glowing little dinosaur walking into a musical valley",
+        )
+        mock_tts.side_effect = [b"confirmation-audio", b"story-audio"]
+        mock_transcribe.return_value = VoiceTranscriptionResult(
+            transcript_text="我想听一个会发光的小恐龙故事",
+            confidence=0.44,
+            provider="openai",
+        )
+
+        transport = ASGITransport(app=app)
+        try:
+            async with AsyncClient(transport=transport, base_url="http://test") as client:
+                client.cookies.set("access_token", auth_token)
+
+                response = await client.post("/api/voice-sessions", json={})
+                session_id = response.json()["id"]
+
+                response = await client.post(
+                    f"/api/voice-sessions/{session_id}/turns",
+                    files={
+                        "audio_file": ("turn.webm", b"fake-webm-audio", "audio/webm"),
+                    },
+                )
+                turn_id = response.json()["turn_id"]
+
+                response = await client.post(
+                    f"/api/voice-sessions/{session_id}/turns/{turn_id}/confirm",
+                    json={"action": "accept"},
+                )
+                assert response.status_code == 200
+                turn_data = response.json()
+                assert turn_data["status"] == "audio_ready"
+                assert turn_data["requires_confirmation"] is False
+                assert turn_data["confirmation_state"] == "accepted"
+                assert "小恐龙踩着亮晶晶的石头" in turn_data["assistant_text"]
+
+                response = await client.get(f"/api/voice-sessions/{session_id}")
+                session_data = response.json()
+                assert session_data["latest_requires_confirmation"] is False
+                assert session_data["can_finalize"] is True
+                assert len(session_data["story_state"]["narrative_segments"]) == 1
+        finally:
+            app.dependency_overrides.clear()
+
+
+async def test_voice_session_confirmation_switch_to_text_allows_follow_up_turn(
+    db_session,
+    auth_token,
+):
+    async def override_get_db():
+        yield db_session
+
+    app.dependency_overrides[get_db] = override_get_db
+
+    with (
+        patch(
+            "app.services.voice_session_service.generate_story_content",
+            new_callable=AsyncMock,
+        ) as mock_generate,
+        patch(
+            "app.services.voice_session_service.text_to_speech",
+            new_callable=AsyncMock,
+        ) as mock_tts,
+        patch(
+            "app.services.voice_session_service.transcribe_voice_audio",
+            new_callable=AsyncMock,
+        ) as mock_transcribe,
+    ):
+        mock_generate.return_value = StoryOutput(
+            mode="generated",
+            title="文字修正后的故事",
+            story_text="小熊轻轻推开了云朵门，发现里面藏着一座会发光的图书馆。",
+            cover_prompt_suggestion="A little bear opening a glowing cloud library door",
+        )
+        mock_tts.side_effect = [b"confirmation-audio", b"story-audio"]
+        mock_transcribe.return_value = VoiceTranscriptionResult(
+            transcript_text="我想听一个小熊和云朵门的故事",
+            confidence=0.4,
+            provider="openai",
+        )
+
+        transport = ASGITransport(app=app)
+        try:
+            async with AsyncClient(transport=transport, base_url="http://test") as client:
+                client.cookies.set("access_token", auth_token)
+
+                response = await client.post("/api/voice-sessions", json={})
+                session_id = response.json()["id"]
+
+                response = await client.post(
+                    f"/api/voice-sessions/{session_id}/turns",
+                    files={
+                        "audio_file": ("turn.webm", b"fake-webm-audio", "audio/webm"),
+                    },
+                )
+                turn_id = response.json()["turn_id"]
+
+                response = await client.post(
+                    f"/api/voice-sessions/{session_id}/turns/{turn_id}/confirm",
+                    json={"action": "switch_to_text"},
+                )
+                assert response.status_code == 200
+                assert response.json()["confirmation_state"] == "switch_to_text"
+
+                response = await client.post(
+                    f"/api/voice-sessions/{session_id}/turns/fallback",
+                    json={"transcript_text": "我想听一个小熊打开云朵门去冒险的故事"},
+                )
+                assert response.status_code == 202
+
+                response = await client.get(f"/api/voice-sessions/{session_id}")
+                session_data = response.json()
+                assert session_data["latest_requires_confirmation"] is False
+                assert session_data["can_finalize"] is True
+        finally:
+            app.dependency_overrides.clear()
+
+
+async def test_voice_session_unsafe_transcript_is_redirected_safely(
+    db_session,
+    auth_token,
+):
+    async def override_get_db():
+        yield db_session
+
+    app.dependency_overrides[get_db] = override_get_db
+
+    with patch(
+        "app.services.voice_session_service.text_to_speech",
+        new_callable=AsyncMock,
+    ) as mock_tts, patch(
+        "app.services.voice_session_service.generate_story_content",
+        new_callable=AsyncMock,
+    ) as mock_generate:
+        mock_tts.return_value = b"safe-redirect-audio"
+
+        transport = ASGITransport(app=app)
+        try:
+            async with AsyncClient(transport=transport, base_url="http://test") as client:
+                client.cookies.set("access_token", auth_token)
+
+                response = await client.post("/api/voice-sessions", json={})
+                session_id = response.json()["id"]
+
+                response = await client.post(
+                    f"/api/voice-sessions/{session_id}/turns/fallback",
+                    json={"transcript_text": "我想听一个拿着炸弹互相打的故事"},
+                )
+                assert response.status_code == 202
+                turn_id = response.json()["turn_id"]
+
+                response = await client.get(
+                    f"/api/voice-sessions/{session_id}/turns/{turn_id}"
+                )
+                turn_data = response.json()
+                assert turn_data["safety_blocked"] is True
+                assert "violence" in turn_data["safety_flags"]
+                assert "温柔、安全" in turn_data["assistant_text"]
+
+                response = await client.get(f"/api/voice-sessions/{session_id}")
+                session_data = response.json()
+                assert session_data["story_state"]["narrative_segments"] == []
+                assert "violence" in session_data["latest_safety_flags"]
+
+                mock_generate.assert_not_awaited()
+        finally:
+            app.dependency_overrides.clear()
+
+
+async def test_voice_session_analytics_summarize_failures_and_confirmations(
+    db_session,
+    auth_token,
+):
+    async def override_get_db():
+        yield db_session
+
+    app.dependency_overrides[get_db] = override_get_db
+
+    with (
+        patch(
+            "app.services.voice_session_service.generate_story_content",
+            new_callable=AsyncMock,
+        ) as mock_generate,
+        patch(
+            "app.services.voice_session_service.text_to_speech",
+            new_callable=AsyncMock,
+        ) as mock_tts,
+        patch(
+            "app.services.voice_session_service.transcribe_voice_audio",
+            new_callable=AsyncMock,
+        ) as mock_transcribe,
+    ):
+        mock_generate.side_effect = [
+            StoryOutput(
+                mode="generated",
+                title="安全故事",
+                story_text="第一段安全故事。",
+                cover_prompt_suggestion="safe cover",
+            ),
+            StoryOutput(
+                mode="generated",
+                title="确认后继续",
+                story_text="第二段确认后顺利继续。",
+                cover_prompt_suggestion="safe cover 2",
+            ),
+        ]
+        mock_tts.side_effect = [
+            RuntimeError("tts down"),
+            b"confirmation-audio",
+            b"confirmed-story-audio",
+        ]
+        mock_transcribe.side_effect = [
+            VoiceTranscriptionResult(
+                transcript_text="我想听一个会发光的小恐龙故事",
+                confidence=0.41,
+                provider="openai",
+            ),
+            HTTPException(status_code=503, detail="语音转写服务暂时不可用，请稍后重试。"),
+        ]
+
+        transport = ASGITransport(app=app)
+        try:
+            async with AsyncClient(transport=transport, base_url="http://test") as client:
+                client.cookies.set("access_token", auth_token)
+
+                response = await client.post("/api/voice-sessions", json={})
+                session_id = response.json()["id"]
+
+                await client.post(
+                    f"/api/voice-sessions/{session_id}/turns/fallback",
+                    json={"transcript_text": "先给我一段故事"},
+                )
+
+                response = await client.post(
+                    f"/api/voice-sessions/{session_id}/turns",
+                    files={
+                        "audio_file": ("turn.webm", b"fake-webm-audio", "audio/webm"),
+                    },
+                )
+                turn_id = response.json()["turn_id"]
+                await client.post(
+                    f"/api/voice-sessions/{session_id}/turns/{turn_id}/confirm",
+                    json={"action": "accept"},
+                )
+
+                response = await client.post(
+                    f"/api/voice-sessions/{session_id}/turns",
+                    files={
+                        "audio_file": ("turn-2.webm", b"fake-webm-audio-2", "audio/webm"),
+                    },
+                )
+                assert response.status_code == 503
+
+                await client.post(
+                    f"/api/voice-sessions/{session_id}/finalize",
+                    json={"save_story": True, "generate_cover": False},
+                )
+
+                response = await client.get("/api/voice-sessions/analytics?days=30")
+                assert response.status_code == 200
+                analytics = response.json()
+                assert analytics["total_sessions"] >= 1
+                assert analytics["successful_turns"] >= 1
+                assert analytics["tts_failures"] >= 1
+                assert analytics["low_confidence_turns"] >= 1
+                assert analytics["asr_failures"] >= 1
+                assert analytics["finalized_sessions"] >= 1
+                assert analytics["finalize_conversion_rate"] > 0
+        finally:
+            app.dependency_overrides.clear()
+
+
 async def test_voice_session_list_orders_recent_sessions_first(
     db_session,
     auth_token,
diff --git a/docs/technical/voice-co-creation-phase-a-migration-api-draft.md b/docs/technical/voice-co-creation-phase-a-migration-api-draft.md
index 3792c58..b3277a9 100644
--- a/docs/technical/voice-co-creation-phase-a-migration-api-draft.md
+++ b/docs/technical/voice-co-creation-phase-a-migration-api-draft.md
@@ -568,6 +568,22 @@ async def create_voice_turn_from_text(...)
 async def get_voice_turn(...)
 ```
 
+### 解决低置信度确认
+
+```python
+@router.post(
+    "/voice-sessions/{session_id}/turns/{turn_id}/confirm",
+    response_model=VoiceTurnSummaryResponse,
+)
+async def resolve_voice_turn_confirmation(...)
+```
+
+支持：
+
+- `accept`: 按当前理解继续本轮
+- `retry_recording`: 撤回当前理解，重新录音
+- `switch_to_text`: 撤回当前理解，切换到文本输入
+
 ### 结束并保存
 
 ```python
@@ -578,6 +594,13 @@ async def get_voice_turn(...)
 async def finalize_voice_session(...)
 ```
 
+### 获取语音共创 analytics
+
+```python
+@router.get("/voice-sessions/analytics", response_model=VoiceSessionAnalyticsResponse)
+async def get_voice_session_analytics(...)
+```
+
 ### 放弃会话
 
 ```python
diff --git a/docs/technical/voice-co-creation-phase-a-tech-spec.md b/docs/technical/voice-co-creation-phase-a-tech-spec.md
index 9968881..649c333 100644
--- a/docs/technical/voice-co-creation-phase-a-tech-spec.md
+++ b/docs/technical/voice-co-creation-phase-a-tech-spec.md
@@ -23,8 +23,12 @@
 本轮新增收束：
 
 - 当 `transcript_confidence` 或 `intent_confidence` 偏低时，后端优先返回确认提示，而不是直接把这一轮写进故事正文
+- 已补完整确认流：支持“按这个理解继续”“重说本轮”“改成文本输入”
 - 前端明确展示“本轮系统理解为”与“建议家长确认后再继续”提示
 - 低置信度确认链路已有后端测试覆盖，可作为下一阶段继续接 ASR 与更细确认交互的基础
+- 已新增用户转写安全检查、assistant 输出柔性改写与 `safety_flags` 事件记录
+- finalize 会生成更稳定的标题/摘要，并在条件允许时自动衔接封面补全
+- 已新增 `voice session analytics` 聚合指标，可跟踪 turn 成功率、ASR/TTS 失败、低置信度触发和 finalize 转化率
 
 Phase A 的核心目标不是做“完全实时的语音陪伴”，而是验证以下最小闭环：
 
diff --git a/frontend/src/types/voiceSession.ts b/frontend/src/types/voiceSession.ts
index d6fd3e8..860911c 100644
--- a/frontend/src/types/voiceSession.ts
+++ b/frontend/src/types/voiceSession.ts
@@ -10,8 +10,12 @@ export interface VoiceTurnSummary {
   intent_confidence: number | null
   understanding_summary: string | null
   requires_confirmation: boolean
+  confirmation_state: string
   confirmation_reason: string | null
   confirmation_message: string | null
+  safety_flags: string[]
+  safety_blocked: boolean
+  safety_message: string | null
   assistant_text: string | null
   assistant_audio_ready: boolean
   assistant_audio_url: string | null
@@ -49,7 +53,10 @@ export interface VoiceSessionSummary {
   latest_detected_intent: string | null
   latest_understanding_summary: string | null
   latest_requires_confirmation: boolean
+  latest_confirmation_state: string | null
   latest_confirmation_message: string | null
+  latest_safety_flags: string[]
+  latest_safety_message: string | null
   latest_assistant_audio_ready: boolean
   last_turn_status: string | null
   transcription_mode_hint: string | null
@@ -71,6 +78,23 @@ export interface VoiceTurnAcceptedResponse {
   status: string
 }
 
+export interface VoiceSessionAnalytics {
+  window_days: number | null
+  total_sessions: number
+  active_sessions: number
+  finalized_sessions: number
+  abandoned_sessions: number
+  total_turns: number
+  successful_turns: number
+  failed_turns: number
+  asr_failures: number
+  tts_failures: number
+  low_confidence_turns: number
+  safety_interventions: number
+  turn_success_rate: number
+  finalize_conversion_rate: number
+}
+
 export interface VoiceTurnUploadAcceptedResponse extends VoiceTurnAcceptedResponse {
   transcription_provider: string | null
 }
diff --git a/frontend/src/views/VoiceStudio.vue b/frontend/src/views/VoiceStudio.vue
index 28e517a..3308481 100644
--- a/frontend/src/views/VoiceStudio.vue
+++ b/frontend/src/views/VoiceStudio.vue
@@ -4,6 +4,7 @@ import { useRouter } from 'vue-router'
 import { api } from '../api/client'
 import { useUserStore } from '../stores/user'
 import type {
+  VoiceSessionAnalytics,
   VoiceSessionDetail,
   VoiceSessionFinalizeResponse,
   VoiceSessionSummary,
@@ -43,6 +44,7 @@ const userStore = useUserStore()
 
 const sessions = ref<VoiceSessionSummary[]>([])
 const activeSession = ref<VoiceSessionDetail | null>(null)
+const voiceAnalytics = ref<VoiceSessionAnalytics | null>(null)
 const profiles = ref<ChildProfile[]>([])
 const universes = ref<StoryUniverse[]>([])
 const selectedProfileId = ref('')
@@ -81,6 +83,19 @@ const universeOptions = computed(() =>
 )
 
 const activeTurnList = computed(() => activeSession.value?.recent_turns ?? [])
+const hasPendingConfirmation = computed(() => activeSession.value?.latest_requires_confirmation ?? false)
+const finalStorySummary = computed(() => {
+  const value = activeSession.value?.story_state?.final_summary
+  return typeof value === 'string' ? value : null
+})
+const turnSuccessRateLabel = computed(() => {
+  if (!voiceAnalytics.value) return '0%'
+  return `${Math.round(voiceAnalytics.value.turn_success_rate * 100)}%`
+})
+const finalizeConversionRateLabel = computed(() => {
+  if (!voiceAnalytics.value) return '0%'
+  return `${Math.round(voiceAnalytics.value.finalize_conversion_rate * 100)}%`
+})
 const transcriptionModeDescription = computed(() => {
   switch (activeSession.value?.transcription_mode_hint) {
     case 'openai':
@@ -227,6 +242,15 @@ async function loadSessions() {
   }
 }
 
+async function loadVoiceAnalytics() {
+  if (!userStore.user) return
+  try {
+    voiceAnalytics.value = await api.get<VoiceSessionAnalytics>('/api/voice-sessions/analytics?days=30')
+  } catch {
+    // Ignore analytics failures so the main editor stays usable.
+  }
+}
+
 async function loadLatestActiveSession() {
   if (!userStore.user) return
   try {
@@ -306,6 +330,7 @@ async function refreshAfterTurn(sessionId: string, turnId: string) {
   await pollTurnResult(sessionId, turnId)
   await loadSessionDetail(sessionId)
   await loadSessions()
+  await loadVoiceAnalytics()
 }
 
 async function submitTextTurn() {
@@ -360,7 +385,7 @@ async function finalizeSession() {
   finalizing.value = true
   error.value = ''
   try {
-    const result = await api.post<VoiceSessionFinalizeResponse>(
+    await api.post<VoiceSessionFinalizeResponse>(
       `/api/voice-sessions/${activeSession.value.id}/finalize`,
       {
         save_story: true,
@@ -370,9 +395,7 @@ async function finalizeSession() {
     )
     await loadSessions()
     await loadSessionDetail(activeSession.value.id)
-    if (result.story_id) {
-      router.push(`/story/${result.story_id}`)
-    }
+    await loadVoiceAnalytics()
   } catch (err) {
     error.value = err instanceof Error ? err.message : '保存语音共创故事失败'
   } finally {
@@ -406,6 +429,7 @@ async function retryAssistantAudio(turnId: string) {
     )
     await loadSessionDetail(activeSession.value.id)
     await loadSessions()
+    await loadVoiceAnalytics()
   } catch (err) {
     error.value = err instanceof Error ? err.message : '补发语音失败'
   } finally {
@@ -413,6 +437,33 @@ async function retryAssistantAudio(turnId: string) {
   }
 }
 
+async function resolveTurnConfirmation(turn: VoiceTurnSummary, action: 'accept' | 'retry_recording' | 'switch_to_text') {
+  if (!activeSession.value) return
+  sendingTurn.value = true
+  error.value = ''
+  try {
+    await api.post<VoiceTurnSummary>(
+      `/api/voice-sessions/${activeSession.value.id}/turns/${turn.id}/confirm`,
+      { action },
+    )
+    if (action === 'switch_to_text') {
+      textTurnInput.value = turn.user_transcript || ''
+      clearRecordedAudio()
+    }
+    if (action === 'retry_recording') {
+      uploadTranscriptHint.value = turn.user_transcript || ''
+      clearRecordedAudio()
+    }
+    await loadSessionDetail(activeSession.value.id)
+    await loadSessions()
+    await loadVoiceAnalytics()
+  } catch (err) {
+    error.value = err instanceof Error ? err.message : '确认当前理解失败'
+  } finally {
+    sendingTurn.value = false
+  }
+}
+
 async function abandonSession() {
   if (!activeSession.value) return
   abandoning.value = true
@@ -489,6 +540,11 @@ function resetRecording() {
   clearRecordedAudio()
 }
 
+function viewFinalStory() {
+  if (!activeSession.value?.final_story_id) return
+  router.push(`/story/${activeSession.value.final_story_id}`)
+}
+
 watch(selectedProfileId, (newId) => {
   if (newId) {
     void fetchUniverses(newId)
@@ -522,6 +578,7 @@ onMounted(async () => {
   await fetchProfiles()
   await loadLatestActiveSession()
   await loadSessions()
+  await loadVoiceAnalytics()
 })
 
 onBeforeUnmount(() => {
@@ -672,6 +729,37 @@ onBeforeUnmount(() => {
             </div>
           </BaseCard>
 
+          <BaseCard v-if="voiceAnalytics" class="border border-slate-100 bg-white/90">
+            <div class="flex items-center justify-between">
+              <div>
+                <h2 class="text-lg font-semibold text-gray-900">语音共创观测</h2>
+                <p class="mt-1 text-sm text-gray-500">最近 {{ voiceAnalytics.window_days ?? 30 }} 天的会话质量概览。</p>
+              </div>
+            </div>
+            <div class="mt-4 grid grid-cols-2 gap-3 xl:grid-cols-4">
+              <div class="rounded-xl border border-gray-100 bg-gray-50 px-4 py-3">
+                <div class="text-xs text-gray-500">Turn 成功率</div>
+                <div class="mt-1 text-lg font-semibold text-gray-900">{{ turnSuccessRateLabel }}</div>
+              </div>
+              <div class="rounded-xl border border-gray-100 bg-gray-50 px-4 py-3">
+                <div class="text-xs text-gray-500">低置信度触发</div>
+                <div class="mt-1 text-lg font-semibold text-amber-700">{{ voiceAnalytics.low_confidence_turns }}</div>
+              </div>
+              <div class="rounded-xl border border-gray-100 bg-gray-50 px-4 py-3">
+                <div class="text-xs text-gray-500">安全介入</div>
+                <div class="mt-1 text-lg font-semibold text-rose-700">{{ voiceAnalytics.safety_interventions }}</div>
+              </div>
+              <div class="rounded-xl border border-gray-100 bg-gray-50 px-4 py-3">
+                <div class="text-xs text-gray-500">Finalize 转化率</div>
+                <div class="mt-1 text-lg font-semibold text-emerald-700">{{ finalizeConversionRateLabel }}</div>
+              </div>
+            </div>
+            <p class="mt-4 text-sm text-gray-500">
+              ASR 失败 {{ voiceAnalytics.asr_failures }} 次，TTS 失败 {{ voiceAnalytics.tts_failures }} 次；
+              当前共有 {{ voiceAnalytics.total_sessions }} 个会话，已完成 {{ voiceAnalytics.finalized_sessions }} 个。
+            </p>
+          </BaseCard>
+
           <div v-if="loadingSessionDetail" class="py-16">
             <LoadingSpinner text="正在加载会话详情..." />
           </div>
@@ -691,6 +779,9 @@ onBeforeUnmount(() => {
                   最近意图：{{ formatIntent(activeSession.latest_detected_intent) }} ·
                   已完成 {{ activeSession.total_turns }} 轮
                 </p>
+                <p v-if="activeSession.final_story_id" class="mt-2 text-sm text-emerald-700">
+                  已沉淀为正式故事 #{{ activeSession.final_story_id }}
+                </p>
               </div>
 
               <div
@@ -720,6 +811,14 @@ onBeforeUnmount(() => {
                   <XMarkIcon class="h-5 w-5" />
                   放弃会话
                 </BaseButton>
+                <BaseButton
+                  v-if="activeSession.final_story_id"
+                  variant="ghost"
+                  @click="viewFinalStory"
+                >
+                  <BookOpenIcon class="h-5 w-5" />
+                  查看正式故事
+                </BaseButton>
               </div>
             </div>
 
@@ -738,6 +837,36 @@ onBeforeUnmount(() => {
                   </p>
                 </div>
 
+                <div
+                  v-if="activeSession.latest_safety_message"
+                  class="rounded-2xl border border-rose-200 bg-rose-50 p-4 text-rose-700"
+                >
+                  <div class="text-sm font-semibold">已触发儿童内容安全兜底</div>
+                  <p class="mt-2 text-sm">{{ activeSession.latest_safety_message }}</p>
+                  <p v-if="activeSession.latest_safety_flags.length" class="mt-2 text-xs text-rose-600">
+                    安全标记：{{ activeSession.latest_safety_flags.join(' / ') }}
+                  </p>
+                </div>
+
+                <div
+                  v-if="activeSession.final_story_id"
+                  class="rounded-2xl border border-emerald-200 bg-emerald-50 p-4 text-emerald-800"
+                >
+                  <div class="text-sm font-semibold">正式故事已生成</div>
+                  <p class="mt-2 text-sm">
+                    当前语音共创已经沉淀为正式故事《{{ activeSession.working_title || '未命名故事' }}》。
+                  </p>
+                  <p v-if="finalStorySummary" class="mt-2 text-sm text-emerald-700">
+                    摘要：{{ finalStorySummary }}
+                  </p>
+                  <div class="mt-3">
+                    <BaseButton size="sm" variant="secondary" @click="viewFinalStory">
+                      <BookOpenIcon class="h-4 w-4" />
+                      打开正式故事
+                    </BaseButton>
+                  </div>
+                </div>
+
                 <div class="rounded-2xl border border-gray-100 bg-white p-4">
                   <div class="flex items-center justify-between">
                     <h3 class="font-semibold text-gray-900">文本共创回合</h3>
@@ -750,12 +879,12 @@ onBeforeUnmount(() => {
                       placeholder="例如：不要让它害怕，我想让它遇见一个新朋友。"
                       :rows="4"
                       :max-length="1000"
-                      :disabled="sendingTurn || !activeSession.can_continue"
+                      :disabled="sendingTurn || !activeSession.can_continue || hasPendingConfirmation"
                     />
                     <BaseButton
                       @click="submitTextTurn"
                       :loading="sendingTurn"
-                      :disabled="!activeSession.can_continue || !textTurnInput.trim()"
+                      :disabled="!activeSession.can_continue || !textTurnInput.trim() || hasPendingConfirmation"
                     >
                       <PaperAirplaneIcon class="h-5 w-5" />
                       发送文本回合
@@ -777,7 +906,7 @@ onBeforeUnmount(() => {
                       v-if="!recording"
                       variant="secondary"
                       @click="startRecording"
-                      :disabled="sendingTurn || !activeSession.can_continue"
+                      :disabled="sendingTurn || !activeSession.can_continue || hasPendingConfirmation"
                     >
                       <MicrophoneIcon class="h-5 w-5" />
                       开始录音
@@ -827,7 +956,7 @@ onBeforeUnmount(() => {
                     <BaseButton
                       @click="submitRecordedTurn"
                       :loading="sendingTurn"
-                      :disabled="!activeSession.can_continue || !recordedBlob"
+                      :disabled="!activeSession.can_continue || !recordedBlob || hasPendingConfirmation"
                     >
                       <SparklesIcon class="h-5 w-5" />
                       上传录音回合
@@ -874,6 +1003,42 @@ onBeforeUnmount(() => {
                           转写置信度：{{ formatConfidence(turn.transcript_confidence) }} ·
                           意图置信度：{{ formatConfidence(turn.intent_confidence) }}
                         </p>
+                        <div class="mt-3 flex flex-wrap gap-2">
+                          <BaseButton
+                            size="sm"
+                            variant="secondary"
+                            @click="resolveTurnConfirmation(turn, 'accept')"
+                            :disabled="sendingTurn"
+                          >
+                            按这个理解继续
+                          </BaseButton>
+                          <BaseButton
+                            size="sm"
+                            variant="ghost"
+                            @click="resolveTurnConfirmation(turn, 'retry_recording')"
+                            :disabled="sendingTurn"
+                          >
+                            不对，重说一遍
+                          </BaseButton>
+                          <BaseButton
+                            size="sm"
+                            variant="ghost"
+                            @click="resolveTurnConfirmation(turn, 'switch_to_text')"
+                            :disabled="sendingTurn"
+                          >
+                            改成文本输入
+                          </BaseButton>
+                        </div>
+                      </div>
+                      <div
+                        v-if="turn.safety_message"
+                        class="mt-3 rounded-2xl border border-rose-200 bg-rose-50 px-3 py-3 text-sm text-rose-700"
+                      >
+                        <div class="font-medium">儿童内容安全已介入</div>
+                        <p class="mt-1">{{ turn.safety_message }}</p>
+                        <p v-if="turn.safety_flags.length" class="mt-2 text-xs text-rose-600">
+                          安全标记：{{ turn.safety_flags.join(' / ') }}
+                        </p>
                       </div>
                       <div v-if="turn.user_audio_url" class="mt-3">
                         <audio class="w-full" :src="turn.user_audio_url" controls></audio>
@@ -891,7 +1056,7 @@ onBeforeUnmount(() => {
                           size="sm"
                           variant="secondary"
                           @click="retryFailedTurn(turn.id)"
-                          :disabled="sendingTurn || !activeSession?.can_continue"
+                          :disabled="sendingTurn || !activeSession?.can_continue || hasPendingConfirmation"
                         >
                           <ArrowPathIcon class="h-4 w-4" />
                           重试本轮