From 55ca0985eb3416791692966c4639ee95c95df077 Mon Sep 17 00:00:00 2001 From: Yuyan Date: Sun, 26 Apr 2026 22:00:34 +0800 Subject: [PATCH] Add voice analytics filters and metrics --- backend/app/api/admin_providers.py | 6 +- backend/app/api/voice_sessions.py | 12 +- backend/app/core/celery_app.py | 8 + backend/app/core/config.py | 5 +- backend/app/db/admin_models.py | 24 +-- backend/app/db/database.py | 21 ++- backend/app/schemas/voice_session_schemas.py | 22 +++ backend/app/services/adapters/__init__.py | 10 +- backend/app/services/voice_session_service.py | 108 ++++++++++++ backend/app/tasks/achievements.py | 3 +- backend/app/tasks/audio_cache.py | 3 +- backend/app/tasks/generation_maintenance.py | 3 +- backend/app/tasks/generation_workflow.py | 3 +- backend/app/tasks/memory.py | 9 +- backend/app/tasks/push_notifications.py | 3 +- backend/app/tasks/utils.py | 17 ++ backend/tests/test_admin_providers.py | 3 + backend/tests/test_voice_sessions.py | 44 +++++ docs/planning/demo-checklist.md | 2 + docs/planning/demo-validation-log.md | 155 ++++++++++++++++++ .../voice-co-creation-mode-incremental-prd.md | 100 +++++++++++ .../voice-co-creation-phase-a-tech-spec.md | 2 +- frontend/src/types/voiceSession.ts | 22 +++ frontend/src/views/VoiceStudio.vue | 139 +++++++++++++++- scripts/demo_smoke.sh | 25 ++- 25 files changed, 710 insertions(+), 39 deletions(-) create mode 100644 backend/app/tasks/utils.py diff --git a/backend/app/api/admin_providers.py b/backend/app/api/admin_providers.py index 2511ef5..58e0315 100644 --- a/backend/app/api/admin_providers.py +++ b/backend/app/api/admin_providers.py @@ -1,3 +1,5 @@ +from typing import Literal + from fastapi import APIRouter, Depends, HTTPException, Query from pydantic import BaseModel, ConfigDict, Field from sqlalchemy import select @@ -120,7 +122,9 @@ async def list_provider_capabilities(): @router.get("/providers/analytics", response_model=ProviderAnalyticsResponse) async def get_provider_analytics( days: int | None = Query(default=None, ge=1, le=365), - capability: str | None = Query(default=None), + capability: Literal["text", "image", "tts", "storybook", "asr"] | None = Query( + default=None + ), db: AsyncSession = Depends(get_db), ): """获取当前环境跨用户的 Provider 运营摘要。""" diff --git a/backend/app/api/voice_sessions.py b/backend/app/api/voice_sessions.py index ae29839..14fad7a 100644 --- a/backend/app/api/voice_sessions.py +++ b/backend/app/api/voice_sessions.py @@ -116,11 +116,21 @@ async def get_latest_active_voice_session( @router.get("/voice-sessions/analytics", response_model=VoiceSessionAnalyticsResponse) async def get_voice_session_analytics( days: int | None = Query(default=30, ge=1, le=365), + provider: str | None = Query(default=None, min_length=1, max_length=64), + session_status: ( + Literal["draft", "active", "waiting_user", "completed", "abandoned"] | None + ) = Query(default=None), user: User = Depends(require_user), db: AsyncSession = Depends(get_db), ): """Get aggregate voice co-creation analytics for the current user.""" - return await get_voice_session_analytics_service(user.id, db, days=days) + return await get_voice_session_analytics_service( + user.id, + db, + days=days, + provider=provider, + session_status=session_status, + ) @router.get("/voice-sessions/{session_id}", response_model=VoiceSessionDetailResponse) diff --git a/backend/app/core/celery_app.py b/backend/app/core/celery_app.py index fedced3..8d96eb0 100644 --- a/backend/app/core/celery_app.py +++ b/backend/app/core/celery_app.py @@ -34,6 +34,14 @@ else: ) celery_app.conf.update( + imports=( + "app.tasks.achievements", + "app.tasks.audio_cache", + "app.tasks.generation_maintenance", + "app.tasks.generation_workflow", + "app.tasks.memory", + "app.tasks.push_notifications", + ), task_track_started=True, task_serializer="json", accept_content=["json"], diff --git a/backend/app/core/config.py b/backend/app/core/config.py index f5ca937..fd4e215 100644 --- a/backend/app/core/config.py +++ b/backend/app/core/config.py @@ -73,7 +73,10 @@ class Settings(BaseSettings): ) voice_transcription_mode: str = Field( "provider", - description="Voice transcription mode: provider or disabled; provider order is controlled by ASR_PROVIDERS", + description=( + "Voice transcription mode: provider or disabled; provider order is " + "controlled by ASR_PROVIDERS" + ), ) voice_transcription_model: str = Field( "gpt-4o-mini-transcribe", diff --git a/backend/app/db/admin_models.py b/backend/app/db/admin_models.py index 169291b..9e7df85 100644 --- a/backend/app/db/admin_models.py +++ b/backend/app/db/admin_models.py @@ -1,4 +1,4 @@ -from datetime import datetime +from datetime import datetime, timezone from decimal import Decimal from uuid import uuid4 @@ -12,6 +12,10 @@ def _uuid() -> str: return str(uuid4()) +def _utcnow() -> datetime: + return datetime.now(timezone.utc) + + class Provider(Base): """Model provider registry.""" @@ -34,9 +38,9 @@ class Provider(Base): nullable=True, ) # 存储额外配置(speed, vol, etc) config_ref: Mapped[str] = mapped_column(String(100), nullable=True) # 环境变量 key 名称(回退) - created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=datetime.utcnow) + created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=_utcnow) updated_at: Mapped[datetime] = mapped_column( - DateTime(timezone=True), default=datetime.utcnow, onupdate=datetime.utcnow + DateTime(timezone=True), default=_utcnow, onupdate=_utcnow ) updated_by: Mapped[str] = mapped_column(String(100), nullable=True) @@ -51,7 +55,7 @@ class ProviderMetrics(Base): String(36), ForeignKey("providers.id", ondelete="CASCADE"), nullable=False, index=True ) timestamp: Mapped[datetime] = mapped_column( - DateTime(timezone=True), default=datetime.utcnow, index=True + DateTime(timezone=True), default=_utcnow, index=True ) success: Mapped[bool] = mapped_column(Boolean, nullable=False) latency_ms: Mapped[int] = mapped_column(Integer, nullable=True) @@ -82,9 +86,9 @@ class ProviderSecret(Base): id: Mapped[str] = mapped_column(String(36), primary_key=True, default=_uuid) name: Mapped[str] = mapped_column(String(100), unique=True, nullable=False) encrypted_value: Mapped[str] = mapped_column(Text, nullable=False) - created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=datetime.utcnow) + created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=_utcnow) updated_at: Mapped[datetime] = mapped_column( - DateTime(timezone=True), default=datetime.utcnow, onupdate=datetime.utcnow + DateTime(timezone=True), default=_utcnow, onupdate=_utcnow ) @@ -97,10 +101,10 @@ class CostRecord(Base): user_id: Mapped[str] = mapped_column(String(36), nullable=False, index=True) provider_id: Mapped[str] = mapped_column(String(36), nullable=True) # 可能是环境变量配置 provider_name: Mapped[str] = mapped_column(String(100), nullable=False) - capability: Mapped[str] = mapped_column(String(50), nullable=False) # text/image/tts/storybook/asr + capability: Mapped[str] = mapped_column(String(50), nullable=False) estimated_cost: Mapped[Decimal] = mapped_column(Numeric(10, 6), nullable=False) timestamp: Mapped[datetime] = mapped_column( - DateTime(timezone=True), default=datetime.utcnow, index=True + DateTime(timezone=True), default=_utcnow, index=True ) @@ -116,7 +120,7 @@ class UserBudget(Base): Numeric(3, 2), default=Decimal("0.8") ) # 80% 时告警 enabled: Mapped[bool] = mapped_column(Boolean, default=True) - created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=datetime.utcnow) + created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=_utcnow) updated_at: Mapped[datetime] = mapped_column( - DateTime(timezone=True), default=datetime.utcnow, onupdate=datetime.utcnow + DateTime(timezone=True), default=_utcnow, onupdate=_utcnow ) diff --git a/backend/app/db/database.py b/backend/app/db/database.py index 32bad04..bb6660e 100644 --- a/backend/app/db/database.py +++ b/backend/app/db/database.py @@ -6,7 +6,7 @@ from app.core.config import settings _engine = None _session_factory: async_sessionmaker[AsyncSession] | None = None -_lock = threading.Lock() +_lock = threading.RLock() def _get_engine(): @@ -34,6 +34,25 @@ def _get_session_factory(): return _session_factory +async def dispose_engine(): + """Dispose the async engine and reset cached DB handles. + + Celery tasks run async code through ``asyncio.run()``, which creates and closes + one event loop per task. Asyncpg connections are bound to the loop that created + them, so worker tasks must not keep pooled connections across task runs. + """ + global _engine, _session_factory + + engine = _engine + if engine is not None: + await engine.dispose() + + with _lock: + if _engine is engine: + _engine = None + _session_factory = None + + async def init_db(): """Create tables if they do not exist.""" from app.db.models import Base # main models diff --git a/backend/app/schemas/voice_session_schemas.py b/backend/app/schemas/voice_session_schemas.py index 110a657..79505c6 100644 --- a/backend/app/schemas/voice_session_schemas.py +++ b/backend/app/schemas/voice_session_schemas.py @@ -77,6 +77,7 @@ class VoiceTurnSummaryResponse(BaseModel): user_transcript: str | None = None transcript_confidence: float | None = None transcription_provider: str | None = None + user_audio_duration_ms: int | None = None detected_intent: str intent_confidence: float | None = None understanding_summary: str | None = None @@ -88,6 +89,7 @@ class VoiceTurnSummaryResponse(BaseModel): safety_blocked: bool = False safety_message: str | None = None assistant_text: str | None = None + assistant_audio_duration_ms: int | None = None assistant_audio_ready: bool = False assistant_audio_url: str | None = None user_audio_ready: bool = False @@ -149,6 +151,8 @@ class VoiceSessionAnalyticsResponse(BaseModel): """Aggregated voice co-creation analytics for one user.""" window_days: int | None = None + provider: str | None = None + session_status: str | None = None total_sessions: int = 0 attention_sessions: int = 0 confirmation_attention_sessions: int = 0 @@ -164,6 +168,24 @@ class VoiceSessionAnalyticsResponse(BaseModel): tts_failures: int = 0 low_confidence_turns: int = 0 safety_interventions: int = 0 + text_fallback_turns: int = 0 + uploaded_audio_turns: int = 0 + user_audio_turn_rate: float = 0.0 + assistant_audio_ready_turns: int = 0 + assistant_audio_ready_rate: float = 0.0 + asr_success_rate: float = 0.0 + tts_success_rate: float = 0.0 + avg_transcript_confidence: float = 0.0 + avg_intent_confidence: float = 0.0 + safety_intervention_rate: float = 0.0 + failure_event_counts: dict[str, int] = Field(default_factory=dict) + total_user_audio_duration_ms: int = 0 + avg_user_audio_duration_ms: float = 0.0 + total_assistant_audio_turns: int = 0 + total_assistant_audio_duration_ms: int = 0 + avg_assistant_audio_duration_ms: float = 0.0 + transcription_provider_counts: dict[str, int] = Field(default_factory=dict) + confirmation_request_rate: float = 0.0 turn_success_rate: float = 0.0 finalize_conversion_rate: float = 0.0 diff --git a/backend/app/services/adapters/__init__.py b/backend/app/services/adapters/__init__.py index 0ed81dd..c3b48cc 100644 --- a/backend/app/services/adapters/__init__.py +++ b/backend/app/services/adapters/__init__.py @@ -2,13 +2,13 @@ # Demo adapters from app.services.adapters import demo as _demo_adapters # noqa: F401 + +# ASR adapters +from app.services.adapters.asr import demo as _asr_demo_adapter # noqa: F401 +from app.services.adapters.asr import openai as _asr_openai_adapter # noqa: F401 from app.services.adapters.base import AdapterConfig, BaseAdapter -# ASR adapters -from app.services.adapters.asr import demo as _asr_demo_adapter # noqa: F401 -from app.services.adapters.asr import openai as _asr_openai_adapter # noqa: F401 - -# Image adapters +# Image adapters from app.services.adapters.image import cqtai as _image_cqtai_adapter # noqa: F401 from app.services.adapters.registry import AdapterRegistry diff --git a/backend/app/services/voice_session_service.py b/backend/app/services/voice_session_service.py index aa78af1..0dce7ba 100644 --- a/backend/app/services/voice_session_service.py +++ b/backend/app/services/voice_session_service.py @@ -335,6 +335,7 @@ def _turn_to_summary(turn: VoiceTurn) -> VoiceTurnSummaryResponse: user_transcript=turn.user_transcript, transcript_confidence=turn.transcript_confidence, transcription_provider=turn_patch.get("transcription_provider"), + user_audio_duration_ms=turn.user_audio_duration_ms, detected_intent=turn.detected_intent, intent_confidence=turn.intent_confidence, understanding_summary=confirmation_state["understanding_summary"], @@ -346,6 +347,7 @@ def _turn_to_summary(turn: VoiceTurn) -> VoiceTurnSummaryResponse: safety_blocked=safety_state["safety_blocked"], safety_message=safety_state["safety_message"], assistant_text=turn.assistant_text, + assistant_audio_duration_ms=turn.assistant_audio_duration_ms, assistant_audio_ready=session_audio_exists(turn.assistant_audio_path), assistant_audio_url=_assistant_audio_url( turn.session_id, @@ -1194,10 +1196,14 @@ async def get_voice_session_analytics_service( db: AsyncSession, *, days: int | None = 30, + provider: str | None = None, + session_status: str | None = None, ) -> VoiceSessionAnalyticsResponse: cutoff = None if days is not None: cutoff = datetime.now(timezone.utc) - timedelta(days=days) + provider_filter = (provider or "").strip() or None + session_status_filter = (session_status or "").strip() or None session_query = select(VoiceSession).where(VoiceSession.user_id == user_id) turn_query = ( @@ -1215,10 +1221,30 @@ async def get_voice_session_analytics_service( session_query = session_query.where(VoiceSession.created_at >= cutoff) turn_query = turn_query.where(VoiceTurn.created_at >= cutoff) event_query = event_query.where(VoiceSessionEvent.created_at >= cutoff) + if session_status_filter is not None: + session_query = session_query.where(VoiceSession.status == session_status_filter) + turn_query = turn_query.where(VoiceSession.status == session_status_filter) + event_query = event_query.where(VoiceSession.status == session_status_filter) sessions = (await db.execute(session_query)).scalars().all() turns = (await db.execute(turn_query)).scalars().all() events = (await db.execute(event_query)).scalars().all() + if provider_filter is not None: + provider_turn_ids = { + turn.id + for turn in turns + if ((turn.story_patch or {}).get("transcription_provider") or "unknown") + == provider_filter + } + provider_session_ids = {turn.session_id for turn in turns if turn.id in provider_turn_ids} + sessions = [session for session in sessions if session.id in provider_session_ids] + turns = [turn for turn in turns if turn.id in provider_turn_ids] + events = [ + event + for event in events + if event.turn_id in provider_turn_ids + or (event.turn_id is None and event.session_id in provider_session_ids) + ] session_summaries = [await _build_session_summary(db, session) for session in sessions] total_sessions = len(sessions) @@ -1258,6 +1284,36 @@ async def get_voice_session_analytics_service( safety_interventions = sum( 1 for event in events if event.event_type == "safety_intervention_requested" ) + text_fallback_turns = sum( + 1 for turn in turns if (turn.story_patch or {}).get("transcription_provider") == "fallback" + ) + uploaded_audio_turns = sum(1 for turn in turns if turn.user_audio_path) + assistant_audio_ready_turns = sum( + 1 for turn in turns if session_audio_exists(turn.assistant_audio_path) + ) + user_audio_durations = [ + duration for turn in turns if (duration := turn.user_audio_duration_ms) is not None + ] + assistant_audio_durations = [ + duration for turn in turns if (duration := turn.assistant_audio_duration_ms) is not None + ] + total_user_audio_duration_ms = sum(user_audio_durations) + total_assistant_audio_duration_ms = sum(assistant_audio_durations) + transcription_provider_counts: dict[str, int] = {} + for turn in turns: + provider = (turn.story_patch or {}).get("transcription_provider") or "unknown" + transcription_provider_counts[provider] = transcription_provider_counts.get(provider, 0) + 1 + failure_event_counts: dict[str, int] = {} + for event in events: + if event.status != "failed": + continue + failure_event_counts[event.event_type] = failure_event_counts.get(event.event_type, 0) + 1 + transcript_confidences = [ + confidence for turn in turns if (confidence := turn.transcript_confidence) is not None + ] + intent_confidences = [ + confidence for turn in turns if (confidence := turn.intent_confidence) is not None + ] turn_success_rate = ( round(successful_turns / total_turns, 4) if total_turns else 0.0 @@ -1265,9 +1321,27 @@ async def get_voice_session_analytics_service( finalize_conversion_rate = ( round(finalized_sessions / total_sessions, 4) if total_sessions else 0.0 ) + confirmation_request_rate = ( + round(low_confidence_turns / total_turns, 4) if total_turns else 0.0 + ) + user_audio_turn_rate = round(uploaded_audio_turns / total_turns, 4) if total_turns else 0.0 + assistant_audio_ready_rate = ( + round(assistant_audio_ready_turns / successful_turns, 4) if successful_turns else 0.0 + ) + asr_attempts = uploaded_audio_turns + asr_failures + asr_success_rate = round(uploaded_audio_turns / asr_attempts, 4) if asr_attempts else 0.0 + tts_attempts = assistant_audio_ready_turns + tts_failures + tts_success_rate = ( + round(assistant_audio_ready_turns / tts_attempts, 4) if tts_attempts else 0.0 + ) + safety_intervention_rate = ( + round(safety_interventions / total_turns, 4) if total_turns else 0.0 + ) return VoiceSessionAnalyticsResponse( window_days=days, + provider=provider_filter, + session_status=session_status_filter, total_sessions=total_sessions, attention_sessions=attention_sessions, confirmation_attention_sessions=confirmation_attention_sessions, @@ -1283,6 +1357,40 @@ async def get_voice_session_analytics_service( tts_failures=tts_failures, low_confidence_turns=low_confidence_turns, safety_interventions=safety_interventions, + text_fallback_turns=text_fallback_turns, + uploaded_audio_turns=uploaded_audio_turns, + user_audio_turn_rate=user_audio_turn_rate, + assistant_audio_ready_turns=assistant_audio_ready_turns, + assistant_audio_ready_rate=assistant_audio_ready_rate, + asr_success_rate=asr_success_rate, + tts_success_rate=tts_success_rate, + avg_transcript_confidence=( + round(sum(transcript_confidences) / len(transcript_confidences), 4) + if transcript_confidences + else 0.0 + ), + avg_intent_confidence=( + round(sum(intent_confidences) / len(intent_confidences), 4) + if intent_confidences + else 0.0 + ), + safety_intervention_rate=safety_intervention_rate, + failure_event_counts=failure_event_counts, + total_user_audio_duration_ms=total_user_audio_duration_ms, + avg_user_audio_duration_ms=( + round(total_user_audio_duration_ms / len(user_audio_durations), 2) + if user_audio_durations + else 0.0 + ), + total_assistant_audio_turns=len(assistant_audio_durations), + total_assistant_audio_duration_ms=total_assistant_audio_duration_ms, + avg_assistant_audio_duration_ms=( + round(total_assistant_audio_duration_ms / len(assistant_audio_durations), 2) + if assistant_audio_durations + else 0.0 + ), + transcription_provider_counts=transcription_provider_counts, + confirmation_request_rate=confirmation_request_rate, turn_success_rate=turn_success_rate, finalize_conversion_rate=finalize_conversion_rate, ) diff --git a/backend/app/tasks/achievements.py b/backend/app/tasks/achievements.py index 06b071e..ec7d2f0 100644 --- a/backend/app/tasks/achievements.py +++ b/backend/app/tasks/achievements.py @@ -10,6 +10,7 @@ from app.core.logging import get_logger from app.db.database import _get_session_factory from app.db.models import Story, StoryUniverse from app.services.achievement_extractor import extract_achievements +from app.tasks.utils import run_with_disposed_engine logger = get_logger(__name__) @@ -17,7 +18,7 @@ logger = get_logger(__name__) @celery_app.task def extract_story_achievements(story_id: int, universe_id: str) -> None: """Extract achievements and update universe.""" - asyncio.run(_extract_story_achievements(story_id, universe_id)) + asyncio.run(run_with_disposed_engine(_extract_story_achievements(story_id, universe_id))) async def _extract_story_achievements(story_id: int, universe_id: str) -> None: diff --git a/backend/app/tasks/audio_cache.py b/backend/app/tasks/audio_cache.py index 73fb084..0f6737b 100644 --- a/backend/app/tasks/audio_cache.py +++ b/backend/app/tasks/audio_cache.py @@ -6,6 +6,7 @@ from app.core.celery_app import celery_app from app.core.logging import get_logger from app.db.database import _get_session_factory from app.services.story_service import prune_story_audio_cache +from app.tasks.utils import run_with_disposed_engine logger = get_logger(__name__) @@ -21,7 +22,7 @@ def prune_story_audio_cache_task(): return await prune_story_audio_cache(session) try: - result = asyncio.run(_run()) + result = asyncio.run(run_with_disposed_engine(_run())) logger.info("prune_story_audio_cache_task_completed", **result) return result except Exception as exc: diff --git a/backend/app/tasks/generation_maintenance.py b/backend/app/tasks/generation_maintenance.py index fa82396..63db029 100644 --- a/backend/app/tasks/generation_maintenance.py +++ b/backend/app/tasks/generation_maintenance.py @@ -6,6 +6,7 @@ from app.core.celery_app import celery_app from app.core.logging import get_logger from app.db.database import _get_session_factory from app.services.generation_jobs import mark_stale_generation_jobs +from app.tasks.utils import run_with_disposed_engine logger = get_logger(__name__) @@ -22,7 +23,7 @@ def prune_stale_generation_jobs_task(): return await mark_stale_generation_jobs(session) try: - result = asyncio.run(_run()) + result = asyncio.run(run_with_disposed_engine(_run())) logger.info("prune_stale_generation_jobs_task_completed", **result) return result except Exception as exc: diff --git a/backend/app/tasks/generation_workflow.py b/backend/app/tasks/generation_workflow.py index 38105c6..263120d 100644 --- a/backend/app/tasks/generation_workflow.py +++ b/backend/app/tasks/generation_workflow.py @@ -6,6 +6,7 @@ from app.core.celery_app import celery_app from app.core.logging import get_logger from app.db.database import _get_session_factory from app.services.story_service import run_generation_job_service +from app.tasks.utils import run_with_disposed_engine logger = get_logger(__name__) @@ -22,7 +23,7 @@ def run_generation_workflow_task(job_id: str): return await run_generation_job_service(job_id, session) try: - result = asyncio.run(_run()) + result = asyncio.run(run_with_disposed_engine(_run())) logger.info( "generation_workflow_task_completed", job_id=job_id, diff --git a/backend/app/tasks/memory.py b/backend/app/tasks/memory.py index 1b70d92..3fbfffb 100644 --- a/backend/app/tasks/memory.py +++ b/backend/app/tasks/memory.py @@ -2,9 +2,10 @@ import asyncio from app.core.celery_app import celery_app -from app.core.logging import get_logger -from app.db.database import _get_session_factory -from app.services.memory_service import prune_expired_memories +from app.core.logging import get_logger +from app.db.database import _get_session_factory +from app.services.memory_service import prune_expired_memories +from app.tasks.utils import run_with_disposed_engine logger = get_logger(__name__) @@ -21,7 +22,7 @@ def prune_memories_task(): try: # Create a new event loop for this task execution - count = asyncio.run(_run()) + count = asyncio.run(run_with_disposed_engine(_run())) logger.info("prune_memories_task_completed", deleted_count=count) return f"Deleted {count} expired memories" except Exception as exc: diff --git a/backend/app/tasks/push_notifications.py b/backend/app/tasks/push_notifications.py index f7fe943..7f8e61d 100644 --- a/backend/app/tasks/push_notifications.py +++ b/backend/app/tasks/push_notifications.py @@ -10,6 +10,7 @@ from app.core.celery_app import celery_app from app.core.logging import get_logger from app.db.database import _get_session_factory from app.db.models import PushConfig, PushEvent +from app.tasks.utils import run_with_disposed_engine logger = get_logger(__name__) @@ -22,7 +23,7 @@ TRIGGER_WINDOW_MINUTES = 30 @celery_app.task def check_push_notifications() -> None: """Check push configs and create push events.""" - asyncio.run(_check_push_notifications()) + asyncio.run(run_with_disposed_engine(_check_push_notifications())) def _is_quiet_hours(current: time) -> bool: diff --git a/backend/app/tasks/utils.py b/backend/app/tasks/utils.py new file mode 100644 index 0000000..0bc35d9 --- /dev/null +++ b/backend/app/tasks/utils.py @@ -0,0 +1,17 @@ +"""Shared helpers for Celery tasks.""" + +from collections.abc import Awaitable +from typing import TypeVar + +from app.db.database import dispose_engine + +T = TypeVar("T") + + +async def run_with_disposed_engine(awaitable: Awaitable[T]) -> T: + """Run async task work and drop DB pools before the event loop closes.""" + + try: + return await awaitable + finally: + await dispose_engine() diff --git a/backend/tests/test_admin_providers.py b/backend/tests/test_admin_providers.py index bcbb755..69bec64 100644 --- a/backend/tests/test_admin_providers.py +++ b/backend/tests/test_admin_providers.py @@ -283,3 +283,6 @@ async def test_admin_provider_analytics_support_days_and_capability_filters( assert data["job_count"] == 1 assert data["story_count"] == 1 assert data["failure_reasons"] == [{"reason": "timeout", "count": 1}] + + response = await client.get("/admin/providers/analytics?capability=unknown") + assert response.status_code == 422 diff --git a/backend/tests/test_voice_sessions.py b/backend/tests/test_voice_sessions.py index e752850..a3a0db9 100644 --- a/backend/tests/test_voice_sessions.py +++ b/backend/tests/test_voice_sessions.py @@ -342,6 +342,7 @@ async def test_voice_session_low_confidence_turn_requests_confirmation( files={ "audio_file": ("turn.webm", b"fake-webm-audio", "audio/webm"), }, + data={"duration_ms": "1200"}, ) assert response.status_code == 202 turn_id = response.json()["turn_id"] @@ -431,6 +432,7 @@ async def test_voice_session_confirmation_accept_continues_original_turn( files={ "audio_file": ("turn.webm", b"fake-webm-audio", "audio/webm"), }, + data={"duration_ms": "1200"}, ) turn_id = response.json()["turn_id"] @@ -503,6 +505,7 @@ async def test_voice_session_confirmation_switch_to_text_allows_follow_up_turn( files={ "audio_file": ("turn.webm", b"fake-webm-audio", "audio/webm"), }, + data={"duration_ms": "1200"}, ) turn_id = response.json()["turn_id"] @@ -647,6 +650,7 @@ async def test_voice_session_analytics_summarize_failures_and_confirmations( files={ "audio_file": ("turn.webm", b"fake-webm-audio", "audio/webm"), }, + data={"duration_ms": "1200"}, ) turn_id = response.json()["turn_id"] await client.post( @@ -677,6 +681,46 @@ async def test_voice_session_analytics_summarize_failures_and_confirmations( assert analytics["asr_failures"] >= 1 assert analytics["finalized_sessions"] >= 1 assert analytics["finalize_conversion_rate"] > 0 + assert analytics["text_fallback_turns"] >= 1 + assert analytics["uploaded_audio_turns"] >= 1 + assert analytics["user_audio_turn_rate"] > 0 + assert analytics["assistant_audio_ready_turns"] >= 1 + assert analytics["assistant_audio_ready_rate"] > 0 + assert analytics["asr_success_rate"] > 0 + assert analytics["tts_success_rate"] > 0 + assert analytics["avg_transcript_confidence"] > 0 + assert analytics["avg_intent_confidence"] > 0 + assert analytics["failure_event_counts"]["turn_transcription_failed"] >= 1 + assert analytics["failure_event_counts"]["assistant_audio_failed"] >= 1 + assert analytics["total_user_audio_duration_ms"] >= 1200 + assert analytics["avg_user_audio_duration_ms"] >= 1200 + assert analytics["transcription_provider_counts"]["openai"] >= 1 + assert analytics["transcription_provider_counts"]["fallback"] >= 1 + assert analytics["confirmation_request_rate"] > 0 + + response = await client.get( + "/api/voice-sessions/analytics?days=30&provider=openai" + ) + assert response.status_code == 200 + provider_analytics = response.json() + assert provider_analytics["provider"] == "openai" + assert provider_analytics["uploaded_audio_turns"] >= 1 + assert provider_analytics["text_fallback_turns"] == 0 + assert set(provider_analytics["transcription_provider_counts"]) == {"openai"} + + response = await client.get( + "/api/voice-sessions/analytics?days=30&session_status=completed" + ) + assert response.status_code == 200 + status_analytics = response.json() + assert status_analytics["session_status"] == "completed" + assert status_analytics["total_sessions"] >= 1 + assert status_analytics["finalized_sessions"] >= 1 + + response = await client.get( + "/api/voice-sessions/analytics?days=30&session_status=unknown" + ) + assert response.status_code == 422 finally: app.dependency_overrides.clear() diff --git a/docs/planning/demo-checklist.md b/docs/planning/demo-checklist.md index ae02784..ee63733 100644 --- a/docs/planning/demo-checklist.md +++ b/docs/planning/demo-checklist.md @@ -79,6 +79,8 @@ SMOKE_AUDIO=1 SMOKE_VOICE=1 ./scripts/demo_smoke.sh - [ ] `/api/audio/{story_id}/status` 能查询音频缓存状态且不触发生成 - [ ] 如果启用 `SMOKE_AUDIO=1`,音频 retry 后 `audio_status=ready` - [ ] 如果启用 `SMOKE_VOICE=1`,语音共创会话可完成文本 fallback、上传回合、analytics 和 finalize 到 Story +- [ ] 如果启用 `SMOKE_VOICE=1`,analytics 返回输入构成、语音时长、Provider 分布、ASR/TTS 成功率和低置信度确认率 +- [ ] 如果启用 `SMOKE_VOICE=1`,analytics 支持按 `provider` 和 `session_status` 筛选 - [ ] 验证结果已记录到 `docs/planning/demo-validation-log.md` --- diff --git a/docs/planning/demo-validation-log.md b/docs/planning/demo-validation-log.md index e0acd9a..4a41944 100644 --- a/docs/planning/demo-validation-log.md +++ b/docs/planning/demo-validation-log.md @@ -128,3 +128,158 @@ SMOKE_AUDIO=1 ./scripts/demo_smoke.sh 限制: - 本机浏览器自动化脚本默认寻找标准版 Chrome;当前电脑安装的是 Google Chrome Beta,所以本轮没有生成 CDP 截图。 + +## 2026-04-24 语音共创 Alpha 观测补强 + +- 今日优先级:先收束 Phase A Alpha 的可解释性,不进入 Phase B 实时化。 +- 后端 `VoiceTurnSummaryResponse` 已返回用户/助手音频时长,便于定位单轮录音质量与 TTS 产物状态。 +- 后端 `VoiceSessionAnalyticsResponse` 已新增用户语音总时长、平均时长、助手音频统计、转写 Provider 分布和低置信度确认率。 +- 用户端 Voice Studio 观测卡片已展示平均用户语音时长、转写来源分布和确认率。 +- `SMOKE_VOICE=1 ./scripts/demo_smoke.sh` 已新增语音时长与转写 Provider 分布断言。 + +验证命令: + +```bash +cd backend && .venv/bin/pytest tests/test_voice_sessions.py -q +cd frontend && npm run build +``` + +结果: + +- `tests/test_voice_sessions.py` 通过,15 passed,保留 1 个 SQLAlchemy/SQLite `datetime.utcnow()` 上游 deprecation warning。 +- 用户端 `vue-tsc && vite build` 通过,保留 `baseline-browser-mapping` 数据偏旧提示。 + +## 2026-04-24 语音共创 Alpha 50 项执行池与 P1 观测扩展 + +- PRD 已新增 Phase A Alpha 50 项执行 Backlog,明确 P0/P1/P2、验收口径和今日执行策略。 +- 后端 voice analytics 已扩展输入构成、上传语音占比、助手语音覆盖率、ASR/TTS 成功率、平均转写/意图置信度、安全介入率和失败事件分布。 +- Voice Studio 已展示上传/文本构成、助手语音覆盖、ASR/TTS 成功率、平均置信度、用户/助手平均语音时长,并在单 turn 卡片展示用户/助手语音时长。 +- `SMOKE_VOICE=1` smoke 已新增输入构成与 ASR/TTS 成功率断言。 +- 技术方案与 demo checklist 已同步语音观测字段。 + +验证命令: + +```bash +cd backend && .venv/bin/pytest tests/test_voice_sessions.py -q +cd backend && .venv/bin/ruff check app/schemas/voice_session_schemas.py app/services/voice_session_service.py tests/test_voice_sessions.py +cd frontend && npm run build +``` + +结果: + +- `tests/test_voice_sessions.py` 通过,15 passed,保留 1 个 SQLAlchemy/SQLite `datetime.utcnow()` 上游 deprecation warning。 +- `ruff check` 通过。 +- 用户端 `vue-tsc && vite build` 通过,保留 `baseline-browser-mapping` 数据偏旧提示。 + +## 2026-04-24 语音共创 P2 样本与列表摘要补充 + +- Voice Studio 最近会话列表已增加轻量状态摘要:待确认、安全介入、最近意图或等待输入。 +- PRD 已补 10 条儿童表达样本和 2 版低置信度确认文案草案,用于后续 Alpha 人工验收。 +- 代码自审结论:本轮没有新增数据库迁移;新增字段均为响应层兼容扩展;前端使用空值兜底;smoke 断言只在 `SMOKE_VOICE=1` 路径生效,不影响默认演示。 + +复验命令: + +```bash +cd frontend && npm run build +cd backend && .venv/bin/pytest tests/test_voice_sessions.py -q +cd backend && .venv/bin/ruff check app/schemas/voice_session_schemas.py app/services/voice_session_service.py tests/test_voice_sessions.py +``` + +结果: + +- 用户端 `vue-tsc && vite build` 通过。 +- `tests/test_voice_sessions.py` 通过,15 passed,保留 1 个 SQLAlchemy/SQLite `datetime.utcnow()` 上游 deprecation warning。 +- `ruff check` 通过。 + +## 2026-04-25 语音 Analytics Provider/Status 过滤开发 + +- 后端 `GET /api/voice-sessions/analytics` 新增 `provider` 与 `session_status` 查询参数。 +- analytics 响应新增当前筛选条件回显:`provider`、`session_status`。 +- Voice Studio 观测卡新增转写来源与会话状态筛选控件。 +- `SMOKE_VOICE=1` 已新增 provider/status 过滤断言。 +- 技术方案、demo checklist、PRD 执行状态已同步。 + +验证命令: + +```bash +cd backend && .venv/bin/pytest tests/test_voice_sessions.py -q +cd backend && .venv/bin/ruff check app/api/voice_sessions.py app/schemas/voice_session_schemas.py app/services/voice_session_service.py tests/test_voice_sessions.py +cd frontend && npm run build +``` + +结果: + +- `tests/test_voice_sessions.py` 通过,15 passed,保留 1 个 SQLAlchemy/SQLite `datetime.utcnow()` 上游 deprecation warning。 +- `ruff check` 通过。 +- 用户端 `vue-tsc && vite build` 通过,保留 `baseline-browser-mapping` 数据偏旧提示。 + +## 2026-04-25 Warning 与前端依赖安全收敛 + +- 后端移除 `datetime.utcnow()`:Provider admin models、cost tracker、provider metrics 已改为 timezone-aware UTC 时间。 +- `tests/test_voice_sessions.py` 不再输出 SQLAlchemy/SQLite `datetime.utcnow()` deprecation warning。 +- 前端更新 `baseline-browser-mapping`,`npm run build` 不再输出 Baseline 数据过期提示。 +- 执行非破坏性 `npm audit fix` 后,用户端生产依赖 `npm audit --omit=dev` 为 0 vulnerabilities。 + +验证命令: + +```bash +cd backend && .venv/bin/pytest tests/test_admin_providers.py tests/test_voice_sessions.py -q +cd backend && .venv/bin/ruff check app/db/admin_models.py app/services/cost_tracker.py app/services/provider_metrics.py app/api/voice_sessions.py app/schemas/voice_session_schemas.py app/services/voice_session_service.py tests/test_voice_sessions.py +cd frontend && npm audit --omit=dev +cd frontend && npm run build +``` + +结果: + +- `tests/test_admin_providers.py tests/test_voice_sessions.py` 通过,17 passed。 +- `ruff check` 通过。 +- `npm audit --omit=dev` 返回 0 vulnerabilities。 +- 用户端 `vue-tsc && vite build` 通过。 + +## 2026-04-25 行尾噪音收敛与 Admin Analytics 校验 + +- 已撤回高噪音 CRLF / lockfile 变更,当前 diff 保留在语音 analytics、Voice Studio、测试、smoke、文档和低噪音 admin models 修复范围内。 +- 后端 admin provider analytics 的 `capability` 参数已收紧为 `text/image/tts/storybook/asr` 枚举,无效能力返回 `422`。 +- 语音 analytics 的 `session_status` 参数已收紧为明确会话状态枚举,无效状态返回 `422`。 + +验证命令: + +```bash +cd backend && .venv/bin/ruff check app/api/admin_providers.py app/api/voice_sessions.py app/db/admin_models.py app/schemas/voice_session_schemas.py app/services/voice_session_service.py tests/test_admin_providers.py tests/test_voice_sessions.py +cd backend && .venv/bin/pytest tests/test_admin_providers.py tests/test_voice_sessions.py -q +cd frontend && npm run build +``` + +结果: + +- `ruff check` 通过。 +- `tests/test_admin_providers.py tests/test_voice_sessions.py` 通过,17 passed。 +- 用户端 `vue-tsc && vite build` 通过。 + +## 2026-04-25 Docker Voice Smoke 回归闭环 + +- Docker 栈已用当前代码重建:backend、backend-admin、worker、celery-beat、frontend、frontend-admin 均可启动。 +- 修复 Celery task 注册不完整问题:worker 现在注册 generation workflow、generation maintenance、audio cache、memory、push 和 achievements 任务。 +- 修复 worker 冷启动 DB session factory 自锁:数据库锁改为可重入锁。 +- 修复 Celery async task 跨 event loop 复用 asyncpg 连接问题:任务结束时 dispose async engine。 +- `SMOKE_VOICE=1` smoke 对齐当前 intent/event 命名,并使用非空临时 demo audio 上传样本。 + +验证命令: + +```bash +cd backend && .venv/bin/python -m ruff check app/db/database.py app/core/celery_app.py app/tasks +cd backend && .venv/bin/python -m pytest tests/test_admin_providers.py tests/test_voice_sessions.py -q +cd frontend && npm run build +cd admin-frontend && npm run build +docker compose up -d --build +SMOKE_VOICE=1 ./scripts/demo_smoke.sh +``` + +结果: + +- `ruff check` 通过。 +- `tests/test_admin_providers.py tests/test_voice_sessions.py` 通过,17 passed。 +- 用户端 `vue-tsc && vite build` 通过。 +- 管理端 `vue-tsc && vite build` 通过,仍有 `baseline-browser-mapping` 数据偏旧提示。 +- `docker compose up -d --build` 通过,当前本地服务可访问 `http://localhost:52080` 与 `http://localhost:52888`。 +- `SMOKE_VOICE=1 ./scripts/demo_smoke.sh` 完整通过,覆盖普通故事、语音共创文本 fallback、上传回合、voice analytics、provider/status 筛选、finalize、绘本、资产重试、provider analytics 与 ops summary。 diff --git a/docs/product/voice-co-creation-mode-incremental-prd.md b/docs/product/voice-co-creation-mode-incremental-prd.md index 88fedc1..e0c7343 100644 --- a/docs/product/voice-co-creation-mode-incremental-prd.md +++ b/docs/product/voice-co-creation-mode-incremental-prd.md @@ -556,6 +556,71 @@ DreamWeaver 的语音共创模式应当成为一种“孩子可以开口参与 --- + + +## Phase A Alpha 50-Task Execution Backlog(2026-04-24) + +> 目标:先把语音共创 Alpha 做到“可演示、可解释、可复验”,再进入 Phase B 实时化。以下 50 项按今天可连续推进的优先级排列;实现时优先选择无需新迁移、风险低、能用测试和 smoke 验证的任务。 + +| # | Priority | Area | Task | Acceptance | +| --- | --- | --- | --- | --- | +| 01 | P0 | PRD | 固化 50 项 Alpha 执行池 | PRD 中能看到任务、优先级、验收口径 | +| 02 | P0 | Analytics | turn summary 返回用户录音时长 | `GET /turns/{id}` 有 `user_audio_duration_ms` | +| 03 | P0 | Analytics | turn summary 返回助手音频时长 | `GET /turns/{id}` 有 `assistant_audio_duration_ms` | +| 04 | P0 | Analytics | voice analytics 返回用户语音总时长 | analytics 有 `total_user_audio_duration_ms` | +| 05 | P0 | Analytics | voice analytics 返回用户平均语音时长 | analytics 有 `avg_user_audio_duration_ms` | +| 06 | P0 | Analytics | voice analytics 返回转写 Provider 分布 | analytics 有 `transcription_provider_counts` | +| 07 | P0 | Analytics | voice analytics 返回低置信度确认率 | analytics 有 `confirmation_request_rate` | +| 08 | P0 | Frontend | Voice Studio 展示平均用户语音时长 | 观测卡片可见平均秒数 | +| 09 | P0 | Frontend | Voice Studio 展示转写来源分布 | 观测卡片可见 fallback/demo/openai 次数 | +| 10 | P0 | Frontend | Voice Studio 展示确认率 | 低置信度卡片显示确认率 | +| 11 | P0 | Smoke | `SMOKE_VOICE=1` 断言上传回合时长 | smoke 检查 `user_audio_duration_ms` | +| 12 | P0 | Smoke | `SMOKE_VOICE=1` 断言 Provider 分布 | smoke 检查 demo/fallback 次数 | +| 13 | P0 | Tests | 增加 analytics 时长测试 | `test_voice_sessions.py` 覆盖新增字段 | +| 14 | P0 | Tests | 增加 Provider 分布测试 | 测试覆盖 fallback/openai 分布 | +| 15 | P0 | Tests | 增加确认率测试 | 测试覆盖 `confirmation_request_rate` | +| 16 | P1 | Analytics | 统计文本 fallback turn 数 | analytics 有 `text_fallback_turns` | +| 17 | P1 | Analytics | 统计上传音频 turn 数 | analytics 有 `uploaded_audio_turns` | +| 18 | P1 | Analytics | 统计用户语音 turn 占比 | analytics 有 `user_audio_turn_rate` | +| 19 | P1 | Analytics | 统计助手音频 ready turn 数 | analytics 有 `assistant_audio_ready_turns` | +| 20 | P1 | Analytics | 统计助手音频 ready 率 | analytics 有 `assistant_audio_ready_rate` | +| 21 | P1 | Analytics | 统计 ASR 成功率 | analytics 有 `asr_success_rate` | +| 22 | P1 | Analytics | 统计 TTS 成功率 | analytics 有 `tts_success_rate` | +| 23 | P1 | Analytics | 统计平均转写置信度 | analytics 有 `avg_transcript_confidence` | +| 24 | P1 | Analytics | 统计平均意图置信度 | analytics 有 `avg_intent_confidence` | +| 25 | P1 | Analytics | 统计安全介入率 | analytics 有 `safety_intervention_rate` | +| 26 | P1 | Analytics | 统计语音失败事件分布 | analytics 有 `failure_event_counts` | +| 27 | P1 | Frontend | Voice Studio 展示 fallback/upload turn 数 | 观测卡片可见输入构成 | +| 28 | P1 | Frontend | Voice Studio 展示助手音频 ready 率 | 观测卡片可见 TTS 产物覆盖 | +| 29 | P1 | Frontend | Voice Studio 展示 ASR/TTS 成功率 | 观测卡片文案可见成功率 | +| 30 | P1 | Frontend | Voice Studio 展示平均置信度 | 观测卡片文案可见转写/意图均值 | +| 31 | P1 | Frontend | Turn 卡片展示用户录音时长 | 单轮卡片可解释录音长度 | +| 32 | P1 | Frontend | Turn 卡片展示助手音频时长 | 单轮卡片可解释 TTS 产物长度 | +| 33 | P1 | Smoke | `SMOKE_VOICE=1` 断言输入构成 | smoke 检查 fallback/upload 计数 | +| 34 | P1 | Smoke | `SMOKE_VOICE=1` 断言成功率字段 | smoke 检查 ASR/TTS/assistant audio 率 | +| 35 | P1 | Tests | 增加输入构成测试 | 后端测试覆盖 fallback/upload 计数 | +| 36 | P1 | Tests | 增加音频 ready 率测试 | 后端测试覆盖 assistant audio ready | +| 37 | P1 | Tests | 增加平均置信度测试 | 后端测试覆盖 confidence 均值 | +| 38 | P1 | Docs | 更新技术方案 analytics 字段 | tech spec 与接口一致 | +| 39 | P1 | Docs | 更新 demo checklist 观测项 | checklist 包含语音观测字段 | +| 40 | P1 | Docs | 更新 validation log | 日志记录命令与结果 | +| 41 | P2 | Product | 真实儿童表达样本集 | 至少 10 条样本进入验收文档 | +| 42 | P2 | Product | 低置信度文案 A/B 草案 | 输出两版确认文案 | +| 43 | P2 | Frontend | 移动端确认卡密度优化 | 小屏按钮不拥挤 | +| 44 | P2 | Frontend | 会话列表显示观测摘要 | 列表可见需处理原因和输入模式 | +| 45 | P2 | Backend | 支持 analytics 按 provider 过滤 | query 可筛选 provider | +| 46 | P2 | Backend | 支持 analytics 按 status 过滤 | query 可筛选会话状态 | +| 47 | P2 | Ops | ASR Provider 管理端摘要 | admin 侧可见 ASR 调用情况 | +| 48 | P2 | QA | Docker voice smoke 回归 | Docker 栈下 `SMOKE_VOICE=1` 通过 | +| 49 | P2 | Review | 自审语音服务复杂度 | 列出可拆分函数和风险点 | +| 50 | P2 | Review | 自审演示口径一致性 | PRD、tech spec、checklist 口径一致 | + +### 今日执行策略 + +- 先完成 #01-#40 中无需数据库迁移的观测与验收项。 +- #41-#50 作为后续产品化和演示质量任务,不阻塞今天的 Alpha 收束。 +- 每批完成后必须跑后端语音测试、前端 build、ruff,并追加验证日志。 + ## Success Metrics ### Product Metrics @@ -600,3 +665,38 @@ DreamWeaver 的语音共创模式应当成为一种“孩子可以开口参与 4. 复用现有生成主干,新增 voice session 层,而不是另起一套平行系统 这样既能保持当前 PRD 主线不被打断,也能确保后续做语音共创时,我们是在按计划推进,而不是临时起意。 + +## Phase A Alpha Child Expression Samples(P2 Seed) + +这些样本用于后续补齐真实儿童表达验收,不作为模型提示词硬编码。 + +| # | Sample | Expected Intent | Review Focus | +| --- | --- | --- | --- | +| 01 | 我想听小熊和星星找家的故事 | start_story | 能否抓住主角与目标 | +| 02 | 不要让小熊害怕,让月亮姐姐帮它 | correct_story | 修正是否接上上一轮 | +| 03 | 然后小狐狸也来了,它带了饼干 | continue_story | 新角色是否自然进入 | +| 04 | 我不喜欢黑黑的森林,换成彩虹森林 | correct_story | 负面场景是否温和替换 | +| 05 | 让恐龙变小一点,不要踩坏花 | correct_story | 安全和教育主题是否保留 | +| 06 | 再讲一段,它们坐上云朵船 | continue_story | 奇幻想象是否延续 | +| 07 | 结束吧,我想保存这个故事 | save_story | 是否引导 finalize | +| 08 | 先停一下,我等会再讲 | end_story | 是否保持会话可恢复 | +| 09 | 它们可以一起道歉吗 | continue_story | 是否融入教育主题 | +| 10 | 我刚才说错了,不是兔子,是小猫 | correct_story | 指代修正是否准确 | + +## Phase A Alpha Confirmation Copy Options(P2 Seed) + +- 版本 A(更温柔):`我刚才听到的是「{summary}」。如果听对了,我们就按这个继续;如果不对,可以重说一遍或改成文字。` +- 版本 B(更高效):`本轮系统理解为「{summary}」。请家长确认:继续、重说,或改成文本输入。` + +默认建议继续使用版本 B,因为 Alpha 演示时更短、更容易解释系统状态。 + +## Phase A Alpha Execution Update(2026-04-25) + +本轮继续推进真实开发任务,而不是只维护任务池: + +- 已完成 #45:voice analytics 支持 `provider` 查询参数,可按转写来源筛选 turn、事件和会话集合。 +- 已完成 #46:voice analytics 支持 `session_status` 查询参数,可按会话状态筛选统计窗口。 +- 已扩展 Voice Studio 观测卡:支持转写来源和会话状态筛选,便于演示时解释 demo/fallback/真实 ASR 差异。 +- 已扩展 `SMOKE_VOICE=1`:增加 provider/status 过滤断言,避免 analytics 只验证全量路径。 + +后续仍未完成:#47 ASR Provider 管理端摘要、#48 Docker voice smoke 回归、#49 服务复杂度拆分、#50 演示口径最终复核。 diff --git a/docs/technical/voice-co-creation-phase-a-tech-spec.md b/docs/technical/voice-co-creation-phase-a-tech-spec.md index 6468171..280aeb9 100644 --- a/docs/technical/voice-co-creation-phase-a-tech-spec.md +++ b/docs/technical/voice-co-creation-phase-a-tech-spec.md @@ -28,7 +28,7 @@ - 低置信度确认链路已有后端测试覆盖,可作为下一阶段继续接 ASR 与更细确认交互的基础 - 已新增用户转写安全检查、assistant 输出柔性改写与 `safety_flags` 事件记录 - finalize 会生成更稳定的标题/摘要,并在条件允许时自动排队封面补全 job -- 已新增 `voice session analytics` 聚合指标,可跟踪 turn 成功率、ASR/TTS 失败、低置信度触发和 finalize 转化率 +- 已新增 `voice session analytics` 聚合指标,可跟踪 turn 成功率、ASR/TTS 失败、低置信度触发、finalize 转化率、输入构成、语音时长、Provider 分布、确认率和平均置信度,并支持按转写 Provider 与会话状态筛选 - `voice session finalize` 现在会返回可追踪的 `generation_job_id`,让正式 Story 资产补全重新接回现有 generation trace 主干 - 语音共创触发的 `asset_generation` job 现在也支持沿用统一 generation job 的取消 / 重试控制 diff --git a/frontend/src/types/voiceSession.ts b/frontend/src/types/voiceSession.ts index 2288116..da0d3bc 100644 --- a/frontend/src/types/voiceSession.ts +++ b/frontend/src/types/voiceSession.ts @@ -6,6 +6,7 @@ export interface VoiceTurnSummary { user_transcript: string | null transcript_confidence: number | null transcription_provider: string | null + user_audio_duration_ms: number | null detected_intent: string intent_confidence: number | null understanding_summary: string | null @@ -17,6 +18,7 @@ export interface VoiceTurnSummary { safety_blocked: boolean safety_message: string | null assistant_text: string | null + assistant_audio_duration_ms: number | null assistant_audio_ready: boolean assistant_audio_url: string | null user_audio_ready: boolean @@ -81,6 +83,8 @@ export interface VoiceTurnAcceptedResponse { export interface VoiceSessionAnalytics { window_days: number | null + provider: string | null + session_status: string | null total_sessions: number attention_sessions: number confirmation_attention_sessions: number @@ -96,6 +100,24 @@ export interface VoiceSessionAnalytics { tts_failures: number low_confidence_turns: number safety_interventions: number + text_fallback_turns: number + uploaded_audio_turns: number + user_audio_turn_rate: number + assistant_audio_ready_turns: number + assistant_audio_ready_rate: number + asr_success_rate: number + tts_success_rate: number + avg_transcript_confidence: number + avg_intent_confidence: number + safety_intervention_rate: number + failure_event_counts: Record + total_user_audio_duration_ms: number + avg_user_audio_duration_ms: number + total_assistant_audio_turns: number + total_assistant_audio_duration_ms: number + avg_assistant_audio_duration_ms: number + transcription_provider_counts: Record + confirmation_request_rate: number turn_success_rate: number finalize_conversion_rate: number } diff --git a/frontend/src/views/VoiceStudio.vue b/frontend/src/views/VoiceStudio.vue index cfba5fd..73e0478 100644 --- a/frontend/src/views/VoiceStudio.vue +++ b/frontend/src/views/VoiceStudio.vue @@ -80,6 +80,8 @@ const selectedUniverseId = ref('') const sessionFilter = ref('active') const attentionReasonFilter = ref('all') const analyticsWindow = ref<'7' | '30' | 'all'>('30') +const analyticsProviderFilter = ref('') +const analyticsStatusFilter = ref('') const textTurnInput = ref('') const uploadTranscriptHint = ref('') const loadingSessions = ref(false) @@ -113,10 +115,30 @@ const profileOptions = computed(() => const universeOptions = computed(() => universes.value.map((universe) => ({ value: universe.id, label: universe.name })), ) +const analyticsProviderOptions = [ + { value: 'fallback', label: '文本 fallback' }, + { value: 'demo', label: 'Demo ASR' }, + { value: 'openai', label: 'OpenAI ASR' }, + { value: 'openai_asr', label: 'OpenAI ASR Adapter' }, +] +const analyticsStatusOptions = [ + { value: 'draft', label: '草稿' }, + { value: 'active', label: '进行中' }, + { value: 'waiting_user', label: '等待用户' }, + { value: 'completed', label: '已完成' }, + { value: 'abandoned', label: '已放弃' }, +] const filteredSessions = computed(() => { return resolveDisplayedSessions(sessions.value) }) +const getSessionInputModeSummary = (session: VoiceSessionSummary) => { + if (session.latest_requires_confirmation) return '上一轮待确认' + if (session.latest_safety_flags.length) return `安全介入 ${session.latest_safety_flags.length} 项` + if (session.latest_detected_intent) return `最近意图:${formatIntent(session.latest_detected_intent)}` + return '等待输入' +} + const activeTurnList = computed(() => activeSession.value?.recent_turns ?? []) const hasPendingConfirmation = computed(() => activeSession.value?.latest_requires_confirmation ?? false) const latestPendingConfirmationTurn = computed( @@ -269,6 +291,50 @@ const finalizeConversionRateLabel = computed(() => { if (!voiceAnalytics.value) return '0%' return `${Math.round(voiceAnalytics.value.finalize_conversion_rate * 100)}%` }) +const confirmationRequestRateLabel = computed(() => { + if (!voiceAnalytics.value) return '0%' + return `${Math.round(voiceAnalytics.value.confirmation_request_rate * 100)}%` +}) +const userAudioTurnRateLabel = computed(() => { + if (!voiceAnalytics.value) return '0%' + return `${Math.round(voiceAnalytics.value.user_audio_turn_rate * 100)}%` +}) +const assistantAudioReadyRateLabel = computed(() => { + if (!voiceAnalytics.value) return '0%' + return `${Math.round(voiceAnalytics.value.assistant_audio_ready_rate * 100)}%` +}) +const asrSuccessRateLabel = computed(() => { + if (!voiceAnalytics.value) return '0%' + return `${Math.round(voiceAnalytics.value.asr_success_rate * 100)}%` +}) +const ttsSuccessRateLabel = computed(() => { + if (!voiceAnalytics.value) return '0%' + return `${Math.round(voiceAnalytics.value.tts_success_rate * 100)}%` +}) +const avgConfidenceSummary = computed(() => { + if (!voiceAnalytics.value) return '转写 0%,意图 0%' + const transcript = Math.round(voiceAnalytics.value.avg_transcript_confidence * 100) + const intent = Math.round(voiceAnalytics.value.avg_intent_confidence * 100) + return `转写 ${transcript}%,意图 ${intent}%` +}) +const avgUserAudioDurationLabel = computed(() => { + if (!voiceAnalytics.value || !voiceAnalytics.value.avg_user_audio_duration_ms) return '0.0 秒' + return `${(voiceAnalytics.value.avg_user_audio_duration_ms / 1000).toFixed(1)} 秒` +}) +const avgAssistantAudioDurationLabel = computed(() => { + if (!voiceAnalytics.value || !voiceAnalytics.value.avg_assistant_audio_duration_ms) return '0.0 秒' + return `${(voiceAnalytics.value.avg_assistant_audio_duration_ms / 1000).toFixed(1)} 秒` +}) +const formatDurationMs = (durationMs: number | null | undefined) => { + if (!durationMs) return '0.0 秒' + return `${(durationMs / 1000).toFixed(1)} 秒` +} +const transcriptionProviderSummary = computed(() => { + const counts = voiceAnalytics.value?.transcription_provider_counts ?? {} + const entries = Object.entries(counts).sort((left, right) => right[1] - left[1]) + if (!entries.length) return '暂无转写来源' + return entries.map(([provider, count]) => `${provider} ${count} 次`).join(',') +}) const analyticsWindowLabel = computed(() => formatAnalyticsWindowLabel(voiceAnalytics.value?.window_days ?? null), ) @@ -616,10 +682,19 @@ async function syncVoiceStudioRouteState(options?: { } function buildVoiceAnalyticsPath() { - if (analyticsWindow.value === 'all') { - return '/api/voice-sessions/analytics' + const params = new URLSearchParams() + if (analyticsWindow.value !== 'all') { + params.set('days', analyticsWindow.value) } - return `/api/voice-sessions/analytics?days=${analyticsWindow.value}` + if (analyticsProviderFilter.value) { + params.set('provider', analyticsProviderFilter.value) + } + if (analyticsStatusFilter.value) { + params.set('session_status', analyticsStatusFilter.value) + } + const query = params.toString() + const path = '/api/voice-sessions/analytics' + return query ? `${path}?${query}` : path } function buildVoiceSessionListPath() { @@ -1157,6 +1232,14 @@ function setAnalyticsWindow(value: '7' | '30' | 'all') { analyticsWindow.value = value } +function setAnalyticsProviderFilter(value: string | number) { + analyticsProviderFilter.value = String(value) +} + +function setAnalyticsStatusFilter(value: string | number) { + analyticsStatusFilter.value = String(value) +} + function setSessionFilter(value: SessionFilter) { suppressAutoAdvanceNotice.value = true clearAttentionCompletionNotice() @@ -1258,6 +1341,10 @@ watch(analyticsWindow, () => { void loadVoiceAnalytics() }) +watch([analyticsProviderFilter, analyticsStatusFilter], () => { + void loadVoiceAnalytics() +}) + watch(sessionFilter, () => { void loadSessions() }) @@ -1528,6 +1615,9 @@ onBeforeUnmount(() => {
{{ formatSessionStatus(session.status) }} · {{ session.total_turns }} 轮
+
+ {{ getSessionInputModeSummary(session) }} +
{ 全部
+
+ + +
Turn 成功率
@@ -1677,6 +1783,7 @@ onBeforeUnmount(() => {
低置信度触发
{{ voiceAnalytics.low_confidence_turns }}
+
确认率 {{ confirmationRequestRateLabel }}
安全介入
@@ -1686,12 +1793,36 @@ onBeforeUnmount(() => {
Finalize 转化率
{{ finalizeConversionRateLabel }}
+
+
上传语音占比
+
{{ userAudioTurnRateLabel }}
+
上传 {{ voiceAnalytics.uploaded_audio_turns }} / 文本 {{ voiceAnalytics.text_fallback_turns }}
+
+
+
助手语音覆盖
+
{{ assistantAudioReadyRateLabel }}
+
{{ voiceAnalytics.assistant_audio_ready_turns }} 轮有语音
+
+
+
ASR 成功率
+
{{ asrSuccessRateLabel }}
+
+
+
TTS 成功率
+
{{ ttsSuccessRateLabel }}
+

ASR 失败 {{ voiceAnalytics.asr_failures }} 次,TTS 失败 {{ voiceAnalytics.tts_failures }} 次; 当前共有 {{ voiceAnalytics.total_sessions }} 个会话,其中 {{ voiceAnalytics.attention_sessions }} 个仍需处理, 已完成 {{ voiceAnalytics.finalized_sessions }} 个。

+

+ 平均用户语音 {{ avgUserAudioDurationLabel }},平均助手语音 {{ avgAssistantAudioDurationLabel }},转写来源:{{ transcriptionProviderSummary }}。 +

+

+ 平均置信度:{{ avgConfidenceSummary }};安全介入率 {{ Math.round(voiceAnalytics.safety_intervention_rate * 100) }}%。 +

{ · {{ formatIntent(turn.detected_intent) }} · {{ turn.transcription_provider }} + · 用户语音 {{ formatDurationMs(turn.user_audio_duration_ms) }} + · 助手语音 {{ formatDurationMs(turn.assistant_audio_duration_ms) }}

孩子: diff --git a/scripts/demo_smoke.sh b/scripts/demo_smoke.sh index 20a1a19..6b3cd40 100755 --- a/scripts/demo_smoke.sh +++ b/scripts/demo_smoke.sh @@ -9,8 +9,9 @@ SMOKE_AUDIO="${SMOKE_AUDIO:-0}" SMOKE_VOICE="${SMOKE_VOICE:-0}" COOKIE_JAR="$(mktemp "${TMPDIR:-/tmp}/dreamweaver-cookie.XXXXXX")" +VOICE_SMOKE_AUDIO="$(mktemp "${TMPDIR:-/tmp}/dreamweaver-voice-audio.XXXXXX")" cleanup() { - rm -f "$COOKIE_JAR" + rm -f "$COOKIE_JAR" "$VOICE_SMOKE_AUDIO" } trap cleanup EXIT @@ -174,12 +175,13 @@ if [[ "$SMOKE_VOICE" == "1" ]]; then voice_turn_detail_json="$(get_json "$APP_URL/api/voice-sessions/$voice_session_id/turns/$voice_turn_id")" assert_jq "$voice_turn_detail_json" '.user_transcript | contains("小熊")' "voice fallback turn should keep user transcript" assert_jq "$voice_turn_detail_json" '.assistant_text != null and .assistant_text != ""' "voice fallback turn should return assistant text" - assert_jq "$voice_turn_detail_json" '.detected_intent == "start" and .requires_confirmation == false' "first voice turn should start the story without confirmation" + assert_jq "$voice_turn_detail_json" '.detected_intent == "start_story" and .requires_confirmation == false' "first voice turn should start the story without confirmation" echo "$voice_turn_detail_json" | jq '{id,status,detected_intent,requires_confirmation,assistant_audio_ready,assistant_text}' say "Submitting voice uploaded turn with demo transcript hint" + printf 'dreamweaver-demo-audio' > "$VOICE_SMOKE_AUDIO" voice_upload_json="$(post_form "$APP_URL/api/voice-sessions/$voice_session_id/turns" \ - -F 'audio_file=@/dev/null;filename=turn.webm;type=audio/webm' \ + -F "audio_file=@${VOICE_SMOKE_AUDIO};filename=turn.webm;type=audio/webm" \ -F 'duration_ms=900' \ -F 'transcript_hint=不要让小熊害怕,让月亮姐姐帮它')" voice_upload_turn_id="$(jq -r '.turn_id' <<<"$voice_upload_json")" @@ -187,18 +189,27 @@ if [[ "$SMOKE_VOICE" == "1" ]]; then voice_upload_detail_json="$(get_json "$APP_URL/api/voice-sessions/$voice_session_id/turns/$voice_upload_turn_id")" assert_jq "$voice_upload_detail_json" '.user_transcript | contains("月亮姐姐")' "voice upload turn should expose hinted transcript" - assert_jq "$voice_upload_detail_json" '.detected_intent == "correct" and .assistant_text != null' "voice upload correction should continue the narrative" - echo "$voice_upload_detail_json" | jq '{id,status,transcription_provider,detected_intent,requires_confirmation,assistant_audio_ready,assistant_text}' + assert_jq "$voice_upload_detail_json" '.detected_intent == "correct_story" and .assistant_text != null' "voice upload correction should continue the narrative" + assert_jq "$voice_upload_detail_json" '.user_audio_duration_ms == 900' "voice upload turn should expose user audio duration" + echo "$voice_upload_detail_json" | jq '{id,status,transcription_provider,user_audio_duration_ms,detected_intent,requires_confirmation,assistant_audio_ready,assistant_text}' say "Checking voice session detail and analytics" voice_detail_json="$(get_json "$APP_URL/api/voice-sessions/$voice_session_id")" assert_jq "$voice_detail_json" '.current_turn_index >= 2 and (.recent_turns | length) >= 2 and (.events | length) >= 2 and .can_finalize == true' "voice session should include turns/events and be finalizable" - assert_jq "$voice_detail_json" '([.events[].event_type] | index("turn_transcribed")) != null and ([.events[].event_type] | index("turn_narrative_ready")) != null' "voice session should record key turn events" + assert_jq "$voice_detail_json" '([.events[].event_type] | index("turn_transcribed")) != null and ([.events[].event_type] | index("assistant_text_ready")) != null' "voice session should record key turn events" echo "$voice_detail_json" | jq '{id,status,current_turn_index,can_finalize,latest_detected_intent,events:([.events[].event_type] | unique)}' voice_analytics_json="$(get_json "$APP_URL/api/voice-sessions/analytics?days=7")" assert_jq "$voice_analytics_json" '.window_days == 7 and .total_sessions >= 1 and .total_turns >= 2 and .successful_turns >= 2' "voice analytics should include the smoke session" - echo "$voice_analytics_json" | jq '{window_days,total_sessions,total_turns,successful_turns,failed_turns,turn_success_rate,finalize_conversion_rate}' + assert_jq "$voice_analytics_json" '.total_user_audio_duration_ms >= 2100 and .avg_user_audio_duration_ms > 0 and .transcription_provider_counts.demo >= 1 and .transcription_provider_counts.fallback >= 1' "voice analytics should expose duration and provider distribution" + assert_jq "$voice_analytics_json" '.text_fallback_turns >= 1 and .uploaded_audio_turns >= 1 and .user_audio_turn_rate > 0 and .assistant_audio_ready_rate > 0 and .asr_success_rate > 0 and .tts_success_rate > 0' "voice analytics should expose turn mix and success rates" + echo "$voice_analytics_json" | jq '{window_days,total_sessions,total_turns,successful_turns,failed_turns,text_fallback_turns,uploaded_audio_turns,user_audio_turn_rate,assistant_audio_ready_rate,asr_success_rate,tts_success_rate,total_user_audio_duration_ms,avg_user_audio_duration_ms,transcription_provider_counts,confirmation_request_rate,turn_success_rate,finalize_conversion_rate}' + + voice_demo_analytics_json="$(get_json "$APP_URL/api/voice-sessions/analytics?days=7&provider=demo")" + assert_jq "$voice_demo_analytics_json" '.provider == "demo" and .uploaded_audio_turns >= 1 and (.transcription_provider_counts | keys == ["demo"])' "voice analytics should filter by ASR provider" + + voice_waiting_analytics_json="$(get_json "$APP_URL/api/voice-sessions/analytics?days=7&session_status=waiting_user")" + assert_jq "$voice_waiting_analytics_json" '.session_status == "waiting_user" and .total_sessions >= 1' "voice analytics should filter by session status" say "Finalizing voice session into story" voice_finalize_json="$(post_json "$APP_URL/api/voice-sessions/$voice_session_id/finalize" '{