Add voice analytics filters and metrics

This commit is contained in:
2026-04-26 22:00:34 +08:00
parent 3805c18622
commit 55ca0985eb
25 changed files with 710 additions and 39 deletions

View File

@@ -1,3 +1,5 @@
from typing import Literal
from fastapi import APIRouter, Depends, HTTPException, Query from fastapi import APIRouter, Depends, HTTPException, Query
from pydantic import BaseModel, ConfigDict, Field from pydantic import BaseModel, ConfigDict, Field
from sqlalchemy import select from sqlalchemy import select
@@ -120,7 +122,9 @@ async def list_provider_capabilities():
@router.get("/providers/analytics", response_model=ProviderAnalyticsResponse) @router.get("/providers/analytics", response_model=ProviderAnalyticsResponse)
async def get_provider_analytics( async def get_provider_analytics(
days: int | None = Query(default=None, ge=1, le=365), days: int | None = Query(default=None, ge=1, le=365),
capability: str | None = Query(default=None), capability: Literal["text", "image", "tts", "storybook", "asr"] | None = Query(
default=None
),
db: AsyncSession = Depends(get_db), db: AsyncSession = Depends(get_db),
): ):
"""获取当前环境跨用户的 Provider 运营摘要。""" """获取当前环境跨用户的 Provider 运营摘要。"""

View File

@@ -116,11 +116,21 @@ async def get_latest_active_voice_session(
@router.get("/voice-sessions/analytics", response_model=VoiceSessionAnalyticsResponse) @router.get("/voice-sessions/analytics", response_model=VoiceSessionAnalyticsResponse)
async def get_voice_session_analytics( async def get_voice_session_analytics(
days: int | None = Query(default=30, ge=1, le=365), days: int | None = Query(default=30, ge=1, le=365),
provider: str | None = Query(default=None, min_length=1, max_length=64),
session_status: (
Literal["draft", "active", "waiting_user", "completed", "abandoned"] | None
) = Query(default=None),
user: User = Depends(require_user), user: User = Depends(require_user),
db: AsyncSession = Depends(get_db), db: AsyncSession = Depends(get_db),
): ):
"""Get aggregate voice co-creation analytics for the current user.""" """Get aggregate voice co-creation analytics for the current user."""
return await get_voice_session_analytics_service(user.id, db, days=days) return await get_voice_session_analytics_service(
user.id,
db,
days=days,
provider=provider,
session_status=session_status,
)
@router.get("/voice-sessions/{session_id}", response_model=VoiceSessionDetailResponse) @router.get("/voice-sessions/{session_id}", response_model=VoiceSessionDetailResponse)

View File

@@ -34,6 +34,14 @@ else:
) )
celery_app.conf.update( celery_app.conf.update(
imports=(
"app.tasks.achievements",
"app.tasks.audio_cache",
"app.tasks.generation_maintenance",
"app.tasks.generation_workflow",
"app.tasks.memory",
"app.tasks.push_notifications",
),
task_track_started=True, task_track_started=True,
task_serializer="json", task_serializer="json",
accept_content=["json"], accept_content=["json"],

View File

@@ -73,7 +73,10 @@ class Settings(BaseSettings):
) )
voice_transcription_mode: str = Field( voice_transcription_mode: str = Field(
"provider", "provider",
description="Voice transcription mode: provider or disabled; provider order is controlled by ASR_PROVIDERS", description=(
"Voice transcription mode: provider or disabled; provider order is "
"controlled by ASR_PROVIDERS"
),
) )
voice_transcription_model: str = Field( voice_transcription_model: str = Field(
"gpt-4o-mini-transcribe", "gpt-4o-mini-transcribe",

View File

@@ -1,4 +1,4 @@
from datetime import datetime from datetime import datetime, timezone
from decimal import Decimal from decimal import Decimal
from uuid import uuid4 from uuid import uuid4
@@ -12,6 +12,10 @@ def _uuid() -> str:
return str(uuid4()) return str(uuid4())
def _utcnow() -> datetime:
return datetime.now(timezone.utc)
class Provider(Base): class Provider(Base):
"""Model provider registry.""" """Model provider registry."""
@@ -34,9 +38,9 @@ class Provider(Base):
nullable=True, nullable=True,
) # 存储额外配置(speed, vol, etc) ) # 存储额外配置(speed, vol, etc)
config_ref: Mapped[str] = mapped_column(String(100), nullable=True) # 环境变量 key 名称(回退) config_ref: Mapped[str] = mapped_column(String(100), nullable=True) # 环境变量 key 名称(回退)
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=datetime.utcnow) created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=_utcnow)
updated_at: Mapped[datetime] = mapped_column( updated_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True), default=datetime.utcnow, onupdate=datetime.utcnow DateTime(timezone=True), default=_utcnow, onupdate=_utcnow
) )
updated_by: Mapped[str] = mapped_column(String(100), nullable=True) updated_by: Mapped[str] = mapped_column(String(100), nullable=True)
@@ -51,7 +55,7 @@ class ProviderMetrics(Base):
String(36), ForeignKey("providers.id", ondelete="CASCADE"), nullable=False, index=True String(36), ForeignKey("providers.id", ondelete="CASCADE"), nullable=False, index=True
) )
timestamp: Mapped[datetime] = mapped_column( timestamp: Mapped[datetime] = mapped_column(
DateTime(timezone=True), default=datetime.utcnow, index=True DateTime(timezone=True), default=_utcnow, index=True
) )
success: Mapped[bool] = mapped_column(Boolean, nullable=False) success: Mapped[bool] = mapped_column(Boolean, nullable=False)
latency_ms: Mapped[int] = mapped_column(Integer, nullable=True) latency_ms: Mapped[int] = mapped_column(Integer, nullable=True)
@@ -82,9 +86,9 @@ class ProviderSecret(Base):
id: Mapped[str] = mapped_column(String(36), primary_key=True, default=_uuid) id: Mapped[str] = mapped_column(String(36), primary_key=True, default=_uuid)
name: Mapped[str] = mapped_column(String(100), unique=True, nullable=False) name: Mapped[str] = mapped_column(String(100), unique=True, nullable=False)
encrypted_value: Mapped[str] = mapped_column(Text, nullable=False) encrypted_value: Mapped[str] = mapped_column(Text, nullable=False)
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=datetime.utcnow) created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=_utcnow)
updated_at: Mapped[datetime] = mapped_column( updated_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True), default=datetime.utcnow, onupdate=datetime.utcnow DateTime(timezone=True), default=_utcnow, onupdate=_utcnow
) )
@@ -97,10 +101,10 @@ class CostRecord(Base):
user_id: Mapped[str] = mapped_column(String(36), nullable=False, index=True) user_id: Mapped[str] = mapped_column(String(36), nullable=False, index=True)
provider_id: Mapped[str] = mapped_column(String(36), nullable=True) # 可能是环境变量配置 provider_id: Mapped[str] = mapped_column(String(36), nullable=True) # 可能是环境变量配置
provider_name: Mapped[str] = mapped_column(String(100), nullable=False) provider_name: Mapped[str] = mapped_column(String(100), nullable=False)
capability: Mapped[str] = mapped_column(String(50), nullable=False) # text/image/tts/storybook/asr capability: Mapped[str] = mapped_column(String(50), nullable=False)
estimated_cost: Mapped[Decimal] = mapped_column(Numeric(10, 6), nullable=False) estimated_cost: Mapped[Decimal] = mapped_column(Numeric(10, 6), nullable=False)
timestamp: Mapped[datetime] = mapped_column( timestamp: Mapped[datetime] = mapped_column(
DateTime(timezone=True), default=datetime.utcnow, index=True DateTime(timezone=True), default=_utcnow, index=True
) )
@@ -116,7 +120,7 @@ class UserBudget(Base):
Numeric(3, 2), default=Decimal("0.8") Numeric(3, 2), default=Decimal("0.8")
) # 80% 时告警 ) # 80% 时告警
enabled: Mapped[bool] = mapped_column(Boolean, default=True) enabled: Mapped[bool] = mapped_column(Boolean, default=True)
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=datetime.utcnow) created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=_utcnow)
updated_at: Mapped[datetime] = mapped_column( updated_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True), default=datetime.utcnow, onupdate=datetime.utcnow DateTime(timezone=True), default=_utcnow, onupdate=_utcnow
) )

View File

@@ -6,7 +6,7 @@ from app.core.config import settings
_engine = None _engine = None
_session_factory: async_sessionmaker[AsyncSession] | None = None _session_factory: async_sessionmaker[AsyncSession] | None = None
_lock = threading.Lock() _lock = threading.RLock()
def _get_engine(): def _get_engine():
@@ -34,6 +34,25 @@ def _get_session_factory():
return _session_factory return _session_factory
async def dispose_engine():
"""Dispose the async engine and reset cached DB handles.
Celery tasks run async code through ``asyncio.run()``, which creates and closes
one event loop per task. Asyncpg connections are bound to the loop that created
them, so worker tasks must not keep pooled connections across task runs.
"""
global _engine, _session_factory
engine = _engine
if engine is not None:
await engine.dispose()
with _lock:
if _engine is engine:
_engine = None
_session_factory = None
async def init_db(): async def init_db():
"""Create tables if they do not exist.""" """Create tables if they do not exist."""
from app.db.models import Base # main models from app.db.models import Base # main models

View File

@@ -77,6 +77,7 @@ class VoiceTurnSummaryResponse(BaseModel):
user_transcript: str | None = None user_transcript: str | None = None
transcript_confidence: float | None = None transcript_confidence: float | None = None
transcription_provider: str | None = None transcription_provider: str | None = None
user_audio_duration_ms: int | None = None
detected_intent: str detected_intent: str
intent_confidence: float | None = None intent_confidence: float | None = None
understanding_summary: str | None = None understanding_summary: str | None = None
@@ -88,6 +89,7 @@ class VoiceTurnSummaryResponse(BaseModel):
safety_blocked: bool = False safety_blocked: bool = False
safety_message: str | None = None safety_message: str | None = None
assistant_text: str | None = None assistant_text: str | None = None
assistant_audio_duration_ms: int | None = None
assistant_audio_ready: bool = False assistant_audio_ready: bool = False
assistant_audio_url: str | None = None assistant_audio_url: str | None = None
user_audio_ready: bool = False user_audio_ready: bool = False
@@ -149,6 +151,8 @@ class VoiceSessionAnalyticsResponse(BaseModel):
"""Aggregated voice co-creation analytics for one user.""" """Aggregated voice co-creation analytics for one user."""
window_days: int | None = None window_days: int | None = None
provider: str | None = None
session_status: str | None = None
total_sessions: int = 0 total_sessions: int = 0
attention_sessions: int = 0 attention_sessions: int = 0
confirmation_attention_sessions: int = 0 confirmation_attention_sessions: int = 0
@@ -164,6 +168,24 @@ class VoiceSessionAnalyticsResponse(BaseModel):
tts_failures: int = 0 tts_failures: int = 0
low_confidence_turns: int = 0 low_confidence_turns: int = 0
safety_interventions: int = 0 safety_interventions: int = 0
text_fallback_turns: int = 0
uploaded_audio_turns: int = 0
user_audio_turn_rate: float = 0.0
assistant_audio_ready_turns: int = 0
assistant_audio_ready_rate: float = 0.0
asr_success_rate: float = 0.0
tts_success_rate: float = 0.0
avg_transcript_confidence: float = 0.0
avg_intent_confidence: float = 0.0
safety_intervention_rate: float = 0.0
failure_event_counts: dict[str, int] = Field(default_factory=dict)
total_user_audio_duration_ms: int = 0
avg_user_audio_duration_ms: float = 0.0
total_assistant_audio_turns: int = 0
total_assistant_audio_duration_ms: int = 0
avg_assistant_audio_duration_ms: float = 0.0
transcription_provider_counts: dict[str, int] = Field(default_factory=dict)
confirmation_request_rate: float = 0.0
turn_success_rate: float = 0.0 turn_success_rate: float = 0.0
finalize_conversion_rate: float = 0.0 finalize_conversion_rate: float = 0.0

View File

@@ -2,11 +2,11 @@
# Demo adapters # Demo adapters
from app.services.adapters import demo as _demo_adapters # noqa: F401 from app.services.adapters import demo as _demo_adapters # noqa: F401
from app.services.adapters.base import AdapterConfig, BaseAdapter
# ASR adapters # ASR adapters
from app.services.adapters.asr import demo as _asr_demo_adapter # noqa: F401 from app.services.adapters.asr import demo as _asr_demo_adapter # noqa: F401
from app.services.adapters.asr import openai as _asr_openai_adapter # noqa: F401 from app.services.adapters.asr import openai as _asr_openai_adapter # noqa: F401
from app.services.adapters.base import AdapterConfig, BaseAdapter
# Image adapters # Image adapters
from app.services.adapters.image import cqtai as _image_cqtai_adapter # noqa: F401 from app.services.adapters.image import cqtai as _image_cqtai_adapter # noqa: F401

View File

@@ -335,6 +335,7 @@ def _turn_to_summary(turn: VoiceTurn) -> VoiceTurnSummaryResponse:
user_transcript=turn.user_transcript, user_transcript=turn.user_transcript,
transcript_confidence=turn.transcript_confidence, transcript_confidence=turn.transcript_confidence,
transcription_provider=turn_patch.get("transcription_provider"), transcription_provider=turn_patch.get("transcription_provider"),
user_audio_duration_ms=turn.user_audio_duration_ms,
detected_intent=turn.detected_intent, detected_intent=turn.detected_intent,
intent_confidence=turn.intent_confidence, intent_confidence=turn.intent_confidence,
understanding_summary=confirmation_state["understanding_summary"], understanding_summary=confirmation_state["understanding_summary"],
@@ -346,6 +347,7 @@ def _turn_to_summary(turn: VoiceTurn) -> VoiceTurnSummaryResponse:
safety_blocked=safety_state["safety_blocked"], safety_blocked=safety_state["safety_blocked"],
safety_message=safety_state["safety_message"], safety_message=safety_state["safety_message"],
assistant_text=turn.assistant_text, assistant_text=turn.assistant_text,
assistant_audio_duration_ms=turn.assistant_audio_duration_ms,
assistant_audio_ready=session_audio_exists(turn.assistant_audio_path), assistant_audio_ready=session_audio_exists(turn.assistant_audio_path),
assistant_audio_url=_assistant_audio_url( assistant_audio_url=_assistant_audio_url(
turn.session_id, turn.session_id,
@@ -1194,10 +1196,14 @@ async def get_voice_session_analytics_service(
db: AsyncSession, db: AsyncSession,
*, *,
days: int | None = 30, days: int | None = 30,
provider: str | None = None,
session_status: str | None = None,
) -> VoiceSessionAnalyticsResponse: ) -> VoiceSessionAnalyticsResponse:
cutoff = None cutoff = None
if days is not None: if days is not None:
cutoff = datetime.now(timezone.utc) - timedelta(days=days) cutoff = datetime.now(timezone.utc) - timedelta(days=days)
provider_filter = (provider or "").strip() or None
session_status_filter = (session_status or "").strip() or None
session_query = select(VoiceSession).where(VoiceSession.user_id == user_id) session_query = select(VoiceSession).where(VoiceSession.user_id == user_id)
turn_query = ( turn_query = (
@@ -1215,10 +1221,30 @@ async def get_voice_session_analytics_service(
session_query = session_query.where(VoiceSession.created_at >= cutoff) session_query = session_query.where(VoiceSession.created_at >= cutoff)
turn_query = turn_query.where(VoiceTurn.created_at >= cutoff) turn_query = turn_query.where(VoiceTurn.created_at >= cutoff)
event_query = event_query.where(VoiceSessionEvent.created_at >= cutoff) event_query = event_query.where(VoiceSessionEvent.created_at >= cutoff)
if session_status_filter is not None:
session_query = session_query.where(VoiceSession.status == session_status_filter)
turn_query = turn_query.where(VoiceSession.status == session_status_filter)
event_query = event_query.where(VoiceSession.status == session_status_filter)
sessions = (await db.execute(session_query)).scalars().all() sessions = (await db.execute(session_query)).scalars().all()
turns = (await db.execute(turn_query)).scalars().all() turns = (await db.execute(turn_query)).scalars().all()
events = (await db.execute(event_query)).scalars().all() events = (await db.execute(event_query)).scalars().all()
if provider_filter is not None:
provider_turn_ids = {
turn.id
for turn in turns
if ((turn.story_patch or {}).get("transcription_provider") or "unknown")
== provider_filter
}
provider_session_ids = {turn.session_id for turn in turns if turn.id in provider_turn_ids}
sessions = [session for session in sessions if session.id in provider_session_ids]
turns = [turn for turn in turns if turn.id in provider_turn_ids]
events = [
event
for event in events
if event.turn_id in provider_turn_ids
or (event.turn_id is None and event.session_id in provider_session_ids)
]
session_summaries = [await _build_session_summary(db, session) for session in sessions] session_summaries = [await _build_session_summary(db, session) for session in sessions]
total_sessions = len(sessions) total_sessions = len(sessions)
@@ -1258,6 +1284,36 @@ async def get_voice_session_analytics_service(
safety_interventions = sum( safety_interventions = sum(
1 for event in events if event.event_type == "safety_intervention_requested" 1 for event in events if event.event_type == "safety_intervention_requested"
) )
text_fallback_turns = sum(
1 for turn in turns if (turn.story_patch or {}).get("transcription_provider") == "fallback"
)
uploaded_audio_turns = sum(1 for turn in turns if turn.user_audio_path)
assistant_audio_ready_turns = sum(
1 for turn in turns if session_audio_exists(turn.assistant_audio_path)
)
user_audio_durations = [
duration for turn in turns if (duration := turn.user_audio_duration_ms) is not None
]
assistant_audio_durations = [
duration for turn in turns if (duration := turn.assistant_audio_duration_ms) is not None
]
total_user_audio_duration_ms = sum(user_audio_durations)
total_assistant_audio_duration_ms = sum(assistant_audio_durations)
transcription_provider_counts: dict[str, int] = {}
for turn in turns:
provider = (turn.story_patch or {}).get("transcription_provider") or "unknown"
transcription_provider_counts[provider] = transcription_provider_counts.get(provider, 0) + 1
failure_event_counts: dict[str, int] = {}
for event in events:
if event.status != "failed":
continue
failure_event_counts[event.event_type] = failure_event_counts.get(event.event_type, 0) + 1
transcript_confidences = [
confidence for turn in turns if (confidence := turn.transcript_confidence) is not None
]
intent_confidences = [
confidence for turn in turns if (confidence := turn.intent_confidence) is not None
]
turn_success_rate = ( turn_success_rate = (
round(successful_turns / total_turns, 4) if total_turns else 0.0 round(successful_turns / total_turns, 4) if total_turns else 0.0
@@ -1265,9 +1321,27 @@ async def get_voice_session_analytics_service(
finalize_conversion_rate = ( finalize_conversion_rate = (
round(finalized_sessions / total_sessions, 4) if total_sessions else 0.0 round(finalized_sessions / total_sessions, 4) if total_sessions else 0.0
) )
confirmation_request_rate = (
round(low_confidence_turns / total_turns, 4) if total_turns else 0.0
)
user_audio_turn_rate = round(uploaded_audio_turns / total_turns, 4) if total_turns else 0.0
assistant_audio_ready_rate = (
round(assistant_audio_ready_turns / successful_turns, 4) if successful_turns else 0.0
)
asr_attempts = uploaded_audio_turns + asr_failures
asr_success_rate = round(uploaded_audio_turns / asr_attempts, 4) if asr_attempts else 0.0
tts_attempts = assistant_audio_ready_turns + tts_failures
tts_success_rate = (
round(assistant_audio_ready_turns / tts_attempts, 4) if tts_attempts else 0.0
)
safety_intervention_rate = (
round(safety_interventions / total_turns, 4) if total_turns else 0.0
)
return VoiceSessionAnalyticsResponse( return VoiceSessionAnalyticsResponse(
window_days=days, window_days=days,
provider=provider_filter,
session_status=session_status_filter,
total_sessions=total_sessions, total_sessions=total_sessions,
attention_sessions=attention_sessions, attention_sessions=attention_sessions,
confirmation_attention_sessions=confirmation_attention_sessions, confirmation_attention_sessions=confirmation_attention_sessions,
@@ -1283,6 +1357,40 @@ async def get_voice_session_analytics_service(
tts_failures=tts_failures, tts_failures=tts_failures,
low_confidence_turns=low_confidence_turns, low_confidence_turns=low_confidence_turns,
safety_interventions=safety_interventions, safety_interventions=safety_interventions,
text_fallback_turns=text_fallback_turns,
uploaded_audio_turns=uploaded_audio_turns,
user_audio_turn_rate=user_audio_turn_rate,
assistant_audio_ready_turns=assistant_audio_ready_turns,
assistant_audio_ready_rate=assistant_audio_ready_rate,
asr_success_rate=asr_success_rate,
tts_success_rate=tts_success_rate,
avg_transcript_confidence=(
round(sum(transcript_confidences) / len(transcript_confidences), 4)
if transcript_confidences
else 0.0
),
avg_intent_confidence=(
round(sum(intent_confidences) / len(intent_confidences), 4)
if intent_confidences
else 0.0
),
safety_intervention_rate=safety_intervention_rate,
failure_event_counts=failure_event_counts,
total_user_audio_duration_ms=total_user_audio_duration_ms,
avg_user_audio_duration_ms=(
round(total_user_audio_duration_ms / len(user_audio_durations), 2)
if user_audio_durations
else 0.0
),
total_assistant_audio_turns=len(assistant_audio_durations),
total_assistant_audio_duration_ms=total_assistant_audio_duration_ms,
avg_assistant_audio_duration_ms=(
round(total_assistant_audio_duration_ms / len(assistant_audio_durations), 2)
if assistant_audio_durations
else 0.0
),
transcription_provider_counts=transcription_provider_counts,
confirmation_request_rate=confirmation_request_rate,
turn_success_rate=turn_success_rate, turn_success_rate=turn_success_rate,
finalize_conversion_rate=finalize_conversion_rate, finalize_conversion_rate=finalize_conversion_rate,
) )

View File

@@ -10,6 +10,7 @@ from app.core.logging import get_logger
from app.db.database import _get_session_factory from app.db.database import _get_session_factory
from app.db.models import Story, StoryUniverse from app.db.models import Story, StoryUniverse
from app.services.achievement_extractor import extract_achievements from app.services.achievement_extractor import extract_achievements
from app.tasks.utils import run_with_disposed_engine
logger = get_logger(__name__) logger = get_logger(__name__)
@@ -17,7 +18,7 @@ logger = get_logger(__name__)
@celery_app.task @celery_app.task
def extract_story_achievements(story_id: int, universe_id: str) -> None: def extract_story_achievements(story_id: int, universe_id: str) -> None:
"""Extract achievements and update universe.""" """Extract achievements and update universe."""
asyncio.run(_extract_story_achievements(story_id, universe_id)) asyncio.run(run_with_disposed_engine(_extract_story_achievements(story_id, universe_id)))
async def _extract_story_achievements(story_id: int, universe_id: str) -> None: async def _extract_story_achievements(story_id: int, universe_id: str) -> None:

View File

@@ -6,6 +6,7 @@ from app.core.celery_app import celery_app
from app.core.logging import get_logger from app.core.logging import get_logger
from app.db.database import _get_session_factory from app.db.database import _get_session_factory
from app.services.story_service import prune_story_audio_cache from app.services.story_service import prune_story_audio_cache
from app.tasks.utils import run_with_disposed_engine
logger = get_logger(__name__) logger = get_logger(__name__)
@@ -21,7 +22,7 @@ def prune_story_audio_cache_task():
return await prune_story_audio_cache(session) return await prune_story_audio_cache(session)
try: try:
result = asyncio.run(_run()) result = asyncio.run(run_with_disposed_engine(_run()))
logger.info("prune_story_audio_cache_task_completed", **result) logger.info("prune_story_audio_cache_task_completed", **result)
return result return result
except Exception as exc: except Exception as exc:

View File

@@ -6,6 +6,7 @@ from app.core.celery_app import celery_app
from app.core.logging import get_logger from app.core.logging import get_logger
from app.db.database import _get_session_factory from app.db.database import _get_session_factory
from app.services.generation_jobs import mark_stale_generation_jobs from app.services.generation_jobs import mark_stale_generation_jobs
from app.tasks.utils import run_with_disposed_engine
logger = get_logger(__name__) logger = get_logger(__name__)
@@ -22,7 +23,7 @@ def prune_stale_generation_jobs_task():
return await mark_stale_generation_jobs(session) return await mark_stale_generation_jobs(session)
try: try:
result = asyncio.run(_run()) result = asyncio.run(run_with_disposed_engine(_run()))
logger.info("prune_stale_generation_jobs_task_completed", **result) logger.info("prune_stale_generation_jobs_task_completed", **result)
return result return result
except Exception as exc: except Exception as exc:

View File

@@ -6,6 +6,7 @@ from app.core.celery_app import celery_app
from app.core.logging import get_logger from app.core.logging import get_logger
from app.db.database import _get_session_factory from app.db.database import _get_session_factory
from app.services.story_service import run_generation_job_service from app.services.story_service import run_generation_job_service
from app.tasks.utils import run_with_disposed_engine
logger = get_logger(__name__) logger = get_logger(__name__)
@@ -22,7 +23,7 @@ def run_generation_workflow_task(job_id: str):
return await run_generation_job_service(job_id, session) return await run_generation_job_service(job_id, session)
try: try:
result = asyncio.run(_run()) result = asyncio.run(run_with_disposed_engine(_run()))
logger.info( logger.info(
"generation_workflow_task_completed", "generation_workflow_task_completed",
job_id=job_id, job_id=job_id,

View File

@@ -5,6 +5,7 @@ from app.core.celery_app import celery_app
from app.core.logging import get_logger from app.core.logging import get_logger
from app.db.database import _get_session_factory from app.db.database import _get_session_factory
from app.services.memory_service import prune_expired_memories from app.services.memory_service import prune_expired_memories
from app.tasks.utils import run_with_disposed_engine
logger = get_logger(__name__) logger = get_logger(__name__)
@@ -21,7 +22,7 @@ def prune_memories_task():
try: try:
# Create a new event loop for this task execution # Create a new event loop for this task execution
count = asyncio.run(_run()) count = asyncio.run(run_with_disposed_engine(_run()))
logger.info("prune_memories_task_completed", deleted_count=count) logger.info("prune_memories_task_completed", deleted_count=count)
return f"Deleted {count} expired memories" return f"Deleted {count} expired memories"
except Exception as exc: except Exception as exc:

View File

@@ -10,6 +10,7 @@ from app.core.celery_app import celery_app
from app.core.logging import get_logger from app.core.logging import get_logger
from app.db.database import _get_session_factory from app.db.database import _get_session_factory
from app.db.models import PushConfig, PushEvent from app.db.models import PushConfig, PushEvent
from app.tasks.utils import run_with_disposed_engine
logger = get_logger(__name__) logger = get_logger(__name__)
@@ -22,7 +23,7 @@ TRIGGER_WINDOW_MINUTES = 30
@celery_app.task @celery_app.task
def check_push_notifications() -> None: def check_push_notifications() -> None:
"""Check push configs and create push events.""" """Check push configs and create push events."""
asyncio.run(_check_push_notifications()) asyncio.run(run_with_disposed_engine(_check_push_notifications()))
def _is_quiet_hours(current: time) -> bool: def _is_quiet_hours(current: time) -> bool:

View File

@@ -0,0 +1,17 @@
"""Shared helpers for Celery tasks."""
from collections.abc import Awaitable
from typing import TypeVar
from app.db.database import dispose_engine
T = TypeVar("T")
async def run_with_disposed_engine(awaitable: Awaitable[T]) -> T:
"""Run async task work and drop DB pools before the event loop closes."""
try:
return await awaitable
finally:
await dispose_engine()

View File

@@ -283,3 +283,6 @@ async def test_admin_provider_analytics_support_days_and_capability_filters(
assert data["job_count"] == 1 assert data["job_count"] == 1
assert data["story_count"] == 1 assert data["story_count"] == 1
assert data["failure_reasons"] == [{"reason": "timeout", "count": 1}] assert data["failure_reasons"] == [{"reason": "timeout", "count": 1}]
response = await client.get("/admin/providers/analytics?capability=unknown")
assert response.status_code == 422

View File

@@ -342,6 +342,7 @@ async def test_voice_session_low_confidence_turn_requests_confirmation(
files={ files={
"audio_file": ("turn.webm", b"fake-webm-audio", "audio/webm"), "audio_file": ("turn.webm", b"fake-webm-audio", "audio/webm"),
}, },
data={"duration_ms": "1200"},
) )
assert response.status_code == 202 assert response.status_code == 202
turn_id = response.json()["turn_id"] turn_id = response.json()["turn_id"]
@@ -431,6 +432,7 @@ async def test_voice_session_confirmation_accept_continues_original_turn(
files={ files={
"audio_file": ("turn.webm", b"fake-webm-audio", "audio/webm"), "audio_file": ("turn.webm", b"fake-webm-audio", "audio/webm"),
}, },
data={"duration_ms": "1200"},
) )
turn_id = response.json()["turn_id"] turn_id = response.json()["turn_id"]
@@ -503,6 +505,7 @@ async def test_voice_session_confirmation_switch_to_text_allows_follow_up_turn(
files={ files={
"audio_file": ("turn.webm", b"fake-webm-audio", "audio/webm"), "audio_file": ("turn.webm", b"fake-webm-audio", "audio/webm"),
}, },
data={"duration_ms": "1200"},
) )
turn_id = response.json()["turn_id"] turn_id = response.json()["turn_id"]
@@ -647,6 +650,7 @@ async def test_voice_session_analytics_summarize_failures_and_confirmations(
files={ files={
"audio_file": ("turn.webm", b"fake-webm-audio", "audio/webm"), "audio_file": ("turn.webm", b"fake-webm-audio", "audio/webm"),
}, },
data={"duration_ms": "1200"},
) )
turn_id = response.json()["turn_id"] turn_id = response.json()["turn_id"]
await client.post( await client.post(
@@ -677,6 +681,46 @@ async def test_voice_session_analytics_summarize_failures_and_confirmations(
assert analytics["asr_failures"] >= 1 assert analytics["asr_failures"] >= 1
assert analytics["finalized_sessions"] >= 1 assert analytics["finalized_sessions"] >= 1
assert analytics["finalize_conversion_rate"] > 0 assert analytics["finalize_conversion_rate"] > 0
assert analytics["text_fallback_turns"] >= 1
assert analytics["uploaded_audio_turns"] >= 1
assert analytics["user_audio_turn_rate"] > 0
assert analytics["assistant_audio_ready_turns"] >= 1
assert analytics["assistant_audio_ready_rate"] > 0
assert analytics["asr_success_rate"] > 0
assert analytics["tts_success_rate"] > 0
assert analytics["avg_transcript_confidence"] > 0
assert analytics["avg_intent_confidence"] > 0
assert analytics["failure_event_counts"]["turn_transcription_failed"] >= 1
assert analytics["failure_event_counts"]["assistant_audio_failed"] >= 1
assert analytics["total_user_audio_duration_ms"] >= 1200
assert analytics["avg_user_audio_duration_ms"] >= 1200
assert analytics["transcription_provider_counts"]["openai"] >= 1
assert analytics["transcription_provider_counts"]["fallback"] >= 1
assert analytics["confirmation_request_rate"] > 0
response = await client.get(
"/api/voice-sessions/analytics?days=30&provider=openai"
)
assert response.status_code == 200
provider_analytics = response.json()
assert provider_analytics["provider"] == "openai"
assert provider_analytics["uploaded_audio_turns"] >= 1
assert provider_analytics["text_fallback_turns"] == 0
assert set(provider_analytics["transcription_provider_counts"]) == {"openai"}
response = await client.get(
"/api/voice-sessions/analytics?days=30&session_status=completed"
)
assert response.status_code == 200
status_analytics = response.json()
assert status_analytics["session_status"] == "completed"
assert status_analytics["total_sessions"] >= 1
assert status_analytics["finalized_sessions"] >= 1
response = await client.get(
"/api/voice-sessions/analytics?days=30&session_status=unknown"
)
assert response.status_code == 422
finally: finally:
app.dependency_overrides.clear() app.dependency_overrides.clear()

View File

@@ -79,6 +79,8 @@ SMOKE_AUDIO=1 SMOKE_VOICE=1 ./scripts/demo_smoke.sh
- [ ] `/api/audio/{story_id}/status` 能查询音频缓存状态且不触发生成 - [ ] `/api/audio/{story_id}/status` 能查询音频缓存状态且不触发生成
- [ ] 如果启用 `SMOKE_AUDIO=1`,音频 retry 后 `audio_status=ready` - [ ] 如果启用 `SMOKE_AUDIO=1`,音频 retry 后 `audio_status=ready`
- [ ] 如果启用 `SMOKE_VOICE=1`,语音共创会话可完成文本 fallback、上传回合、analytics 和 finalize 到 Story - [ ] 如果启用 `SMOKE_VOICE=1`,语音共创会话可完成文本 fallback、上传回合、analytics 和 finalize 到 Story
- [ ] 如果启用 `SMOKE_VOICE=1`analytics 返回输入构成、语音时长、Provider 分布、ASR/TTS 成功率和低置信度确认率
- [ ] 如果启用 `SMOKE_VOICE=1`analytics 支持按 `provider``session_status` 筛选
- [ ] 验证结果已记录到 `docs/planning/demo-validation-log.md` - [ ] 验证结果已记录到 `docs/planning/demo-validation-log.md`
--- ---

View File

@@ -128,3 +128,158 @@ SMOKE_AUDIO=1 ./scripts/demo_smoke.sh
限制: 限制:
- 本机浏览器自动化脚本默认寻找标准版 Chrome当前电脑安装的是 Google Chrome Beta所以本轮没有生成 CDP 截图。 - 本机浏览器自动化脚本默认寻找标准版 Chrome当前电脑安装的是 Google Chrome Beta所以本轮没有生成 CDP 截图。
## 2026-04-24 语音共创 Alpha 观测补强
- 今日优先级:先收束 Phase A Alpha 的可解释性,不进入 Phase B 实时化。
- 后端 `VoiceTurnSummaryResponse` 已返回用户/助手音频时长,便于定位单轮录音质量与 TTS 产物状态。
- 后端 `VoiceSessionAnalyticsResponse` 已新增用户语音总时长、平均时长、助手音频统计、转写 Provider 分布和低置信度确认率。
- 用户端 Voice Studio 观测卡片已展示平均用户语音时长、转写来源分布和确认率。
- `SMOKE_VOICE=1 ./scripts/demo_smoke.sh` 已新增语音时长与转写 Provider 分布断言。
验证命令:
```bash
cd backend && .venv/bin/pytest tests/test_voice_sessions.py -q
cd frontend && npm run build
```
结果:
- `tests/test_voice_sessions.py` 通过15 passed保留 1 个 SQLAlchemy/SQLite `datetime.utcnow()` 上游 deprecation warning。
- 用户端 `vue-tsc && vite build` 通过,保留 `baseline-browser-mapping` 数据偏旧提示。
## 2026-04-24 语音共创 Alpha 50 项执行池与 P1 观测扩展
- PRD 已新增 Phase A Alpha 50 项执行 Backlog明确 P0/P1/P2、验收口径和今日执行策略。
- 后端 voice analytics 已扩展输入构成、上传语音占比、助手语音覆盖率、ASR/TTS 成功率、平均转写/意图置信度、安全介入率和失败事件分布。
- Voice Studio 已展示上传/文本构成、助手语音覆盖、ASR/TTS 成功率、平均置信度、用户/助手平均语音时长,并在单 turn 卡片展示用户/助手语音时长。
- `SMOKE_VOICE=1` smoke 已新增输入构成与 ASR/TTS 成功率断言。
- 技术方案与 demo checklist 已同步语音观测字段。
验证命令:
```bash
cd backend && .venv/bin/pytest tests/test_voice_sessions.py -q
cd backend && .venv/bin/ruff check app/schemas/voice_session_schemas.py app/services/voice_session_service.py tests/test_voice_sessions.py
cd frontend && npm run build
```
结果:
- `tests/test_voice_sessions.py` 通过15 passed保留 1 个 SQLAlchemy/SQLite `datetime.utcnow()` 上游 deprecation warning。
- `ruff check` 通过。
- 用户端 `vue-tsc && vite build` 通过,保留 `baseline-browser-mapping` 数据偏旧提示。
## 2026-04-24 语音共创 P2 样本与列表摘要补充
- Voice Studio 最近会话列表已增加轻量状态摘要:待确认、安全介入、最近意图或等待输入。
- PRD 已补 10 条儿童表达样本和 2 版低置信度确认文案草案,用于后续 Alpha 人工验收。
- 代码自审结论本轮没有新增数据库迁移新增字段均为响应层兼容扩展前端使用空值兜底smoke 断言只在 `SMOKE_VOICE=1` 路径生效,不影响默认演示。
复验命令:
```bash
cd frontend && npm run build
cd backend && .venv/bin/pytest tests/test_voice_sessions.py -q
cd backend && .venv/bin/ruff check app/schemas/voice_session_schemas.py app/services/voice_session_service.py tests/test_voice_sessions.py
```
结果:
- 用户端 `vue-tsc && vite build` 通过。
- `tests/test_voice_sessions.py` 通过15 passed保留 1 个 SQLAlchemy/SQLite `datetime.utcnow()` 上游 deprecation warning。
- `ruff check` 通过。
## 2026-04-25 语音 Analytics Provider/Status 过滤开发
- 后端 `GET /api/voice-sessions/analytics` 新增 `provider``session_status` 查询参数。
- analytics 响应新增当前筛选条件回显:`provider``session_status`
- Voice Studio 观测卡新增转写来源与会话状态筛选控件。
- `SMOKE_VOICE=1` 已新增 provider/status 过滤断言。
- 技术方案、demo checklist、PRD 执行状态已同步。
验证命令:
```bash
cd backend && .venv/bin/pytest tests/test_voice_sessions.py -q
cd backend && .venv/bin/ruff check app/api/voice_sessions.py app/schemas/voice_session_schemas.py app/services/voice_session_service.py tests/test_voice_sessions.py
cd frontend && npm run build
```
结果:
- `tests/test_voice_sessions.py` 通过15 passed保留 1 个 SQLAlchemy/SQLite `datetime.utcnow()` 上游 deprecation warning。
- `ruff check` 通过。
- 用户端 `vue-tsc && vite build` 通过,保留 `baseline-browser-mapping` 数据偏旧提示。
## 2026-04-25 Warning 与前端依赖安全收敛
- 后端移除 `datetime.utcnow()`Provider admin models、cost tracker、provider metrics 已改为 timezone-aware UTC 时间。
- `tests/test_voice_sessions.py` 不再输出 SQLAlchemy/SQLite `datetime.utcnow()` deprecation warning。
- 前端更新 `baseline-browser-mapping``npm run build` 不再输出 Baseline 数据过期提示。
- 执行非破坏性 `npm audit fix` 后,用户端生产依赖 `npm audit --omit=dev` 为 0 vulnerabilities。
验证命令:
```bash
cd backend && .venv/bin/pytest tests/test_admin_providers.py tests/test_voice_sessions.py -q
cd backend && .venv/bin/ruff check app/db/admin_models.py app/services/cost_tracker.py app/services/provider_metrics.py app/api/voice_sessions.py app/schemas/voice_session_schemas.py app/services/voice_session_service.py tests/test_voice_sessions.py
cd frontend && npm audit --omit=dev
cd frontend && npm run build
```
结果:
- `tests/test_admin_providers.py tests/test_voice_sessions.py` 通过17 passed。
- `ruff check` 通过。
- `npm audit --omit=dev` 返回 0 vulnerabilities。
- 用户端 `vue-tsc && vite build` 通过。
## 2026-04-25 行尾噪音收敛与 Admin Analytics 校验
- 已撤回高噪音 CRLF / lockfile 变更,当前 diff 保留在语音 analytics、Voice Studio、测试、smoke、文档和低噪音 admin models 修复范围内。
- 后端 admin provider analytics 的 `capability` 参数已收紧为 `text/image/tts/storybook/asr` 枚举,无效能力返回 `422`
- 语音 analytics 的 `session_status` 参数已收紧为明确会话状态枚举,无效状态返回 `422`
验证命令:
```bash
cd backend && .venv/bin/ruff check app/api/admin_providers.py app/api/voice_sessions.py app/db/admin_models.py app/schemas/voice_session_schemas.py app/services/voice_session_service.py tests/test_admin_providers.py tests/test_voice_sessions.py
cd backend && .venv/bin/pytest tests/test_admin_providers.py tests/test_voice_sessions.py -q
cd frontend && npm run build
```
结果:
- `ruff check` 通过。
- `tests/test_admin_providers.py tests/test_voice_sessions.py` 通过17 passed。
- 用户端 `vue-tsc && vite build` 通过。
## 2026-04-25 Docker Voice Smoke 回归闭环
- Docker 栈已用当前代码重建backend、backend-admin、worker、celery-beat、frontend、frontend-admin 均可启动。
- 修复 Celery task 注册不完整问题worker 现在注册 generation workflow、generation maintenance、audio cache、memory、push 和 achievements 任务。
- 修复 worker 冷启动 DB session factory 自锁:数据库锁改为可重入锁。
- 修复 Celery async task 跨 event loop 复用 asyncpg 连接问题:任务结束时 dispose async engine。
- `SMOKE_VOICE=1` smoke 对齐当前 intent/event 命名,并使用非空临时 demo audio 上传样本。
验证命令:
```bash
cd backend && .venv/bin/python -m ruff check app/db/database.py app/core/celery_app.py app/tasks
cd backend && .venv/bin/python -m pytest tests/test_admin_providers.py tests/test_voice_sessions.py -q
cd frontend && npm run build
cd admin-frontend && npm run build
docker compose up -d --build
SMOKE_VOICE=1 ./scripts/demo_smoke.sh
```
结果:
- `ruff check` 通过。
- `tests/test_admin_providers.py tests/test_voice_sessions.py` 通过17 passed。
- 用户端 `vue-tsc && vite build` 通过。
- 管理端 `vue-tsc && vite build` 通过,仍有 `baseline-browser-mapping` 数据偏旧提示。
- `docker compose up -d --build` 通过,当前本地服务可访问 `http://localhost:52080``http://localhost:52888`
- `SMOKE_VOICE=1 ./scripts/demo_smoke.sh` 完整通过,覆盖普通故事、语音共创文本 fallback、上传回合、voice analytics、provider/status 筛选、finalize、绘本、资产重试、provider analytics 与 ops summary。

View File

@@ -556,6 +556,71 @@ DreamWeaver 的语音共创模式应当成为一种“孩子可以开口参与
--- ---
## Phase A Alpha 50-Task Execution Backlog2026-04-24
> 目标:先把语音共创 Alpha 做到“可演示、可解释、可复验”,再进入 Phase B 实时化。以下 50 项按今天可连续推进的优先级排列;实现时优先选择无需新迁移、风险低、能用测试和 smoke 验证的任务。
| # | Priority | Area | Task | Acceptance |
| --- | --- | --- | --- | --- |
| 01 | P0 | PRD | 固化 50 项 Alpha 执行池 | PRD 中能看到任务、优先级、验收口径 |
| 02 | P0 | Analytics | turn summary 返回用户录音时长 | `GET /turns/{id}``user_audio_duration_ms` |
| 03 | P0 | Analytics | turn summary 返回助手音频时长 | `GET /turns/{id}``assistant_audio_duration_ms` |
| 04 | P0 | Analytics | voice analytics 返回用户语音总时长 | analytics 有 `total_user_audio_duration_ms` |
| 05 | P0 | Analytics | voice analytics 返回用户平均语音时长 | analytics 有 `avg_user_audio_duration_ms` |
| 06 | P0 | Analytics | voice analytics 返回转写 Provider 分布 | analytics 有 `transcription_provider_counts` |
| 07 | P0 | Analytics | voice analytics 返回低置信度确认率 | analytics 有 `confirmation_request_rate` |
| 08 | P0 | Frontend | Voice Studio 展示平均用户语音时长 | 观测卡片可见平均秒数 |
| 09 | P0 | Frontend | Voice Studio 展示转写来源分布 | 观测卡片可见 fallback/demo/openai 次数 |
| 10 | P0 | Frontend | Voice Studio 展示确认率 | 低置信度卡片显示确认率 |
| 11 | P0 | Smoke | `SMOKE_VOICE=1` 断言上传回合时长 | smoke 检查 `user_audio_duration_ms` |
| 12 | P0 | Smoke | `SMOKE_VOICE=1` 断言 Provider 分布 | smoke 检查 demo/fallback 次数 |
| 13 | P0 | Tests | 增加 analytics 时长测试 | `test_voice_sessions.py` 覆盖新增字段 |
| 14 | P0 | Tests | 增加 Provider 分布测试 | 测试覆盖 fallback/openai 分布 |
| 15 | P0 | Tests | 增加确认率测试 | 测试覆盖 `confirmation_request_rate` |
| 16 | P1 | Analytics | 统计文本 fallback turn 数 | analytics 有 `text_fallback_turns` |
| 17 | P1 | Analytics | 统计上传音频 turn 数 | analytics 有 `uploaded_audio_turns` |
| 18 | P1 | Analytics | 统计用户语音 turn 占比 | analytics 有 `user_audio_turn_rate` |
| 19 | P1 | Analytics | 统计助手音频 ready turn 数 | analytics 有 `assistant_audio_ready_turns` |
| 20 | P1 | Analytics | 统计助手音频 ready 率 | analytics 有 `assistant_audio_ready_rate` |
| 21 | P1 | Analytics | 统计 ASR 成功率 | analytics 有 `asr_success_rate` |
| 22 | P1 | Analytics | 统计 TTS 成功率 | analytics 有 `tts_success_rate` |
| 23 | P1 | Analytics | 统计平均转写置信度 | analytics 有 `avg_transcript_confidence` |
| 24 | P1 | Analytics | 统计平均意图置信度 | analytics 有 `avg_intent_confidence` |
| 25 | P1 | Analytics | 统计安全介入率 | analytics 有 `safety_intervention_rate` |
| 26 | P1 | Analytics | 统计语音失败事件分布 | analytics 有 `failure_event_counts` |
| 27 | P1 | Frontend | Voice Studio 展示 fallback/upload turn 数 | 观测卡片可见输入构成 |
| 28 | P1 | Frontend | Voice Studio 展示助手音频 ready 率 | 观测卡片可见 TTS 产物覆盖 |
| 29 | P1 | Frontend | Voice Studio 展示 ASR/TTS 成功率 | 观测卡片文案可见成功率 |
| 30 | P1 | Frontend | Voice Studio 展示平均置信度 | 观测卡片文案可见转写/意图均值 |
| 31 | P1 | Frontend | Turn 卡片展示用户录音时长 | 单轮卡片可解释录音长度 |
| 32 | P1 | Frontend | Turn 卡片展示助手音频时长 | 单轮卡片可解释 TTS 产物长度 |
| 33 | P1 | Smoke | `SMOKE_VOICE=1` 断言输入构成 | smoke 检查 fallback/upload 计数 |
| 34 | P1 | Smoke | `SMOKE_VOICE=1` 断言成功率字段 | smoke 检查 ASR/TTS/assistant audio 率 |
| 35 | P1 | Tests | 增加输入构成测试 | 后端测试覆盖 fallback/upload 计数 |
| 36 | P1 | Tests | 增加音频 ready 率测试 | 后端测试覆盖 assistant audio ready |
| 37 | P1 | Tests | 增加平均置信度测试 | 后端测试覆盖 confidence 均值 |
| 38 | P1 | Docs | 更新技术方案 analytics 字段 | tech spec 与接口一致 |
| 39 | P1 | Docs | 更新 demo checklist 观测项 | checklist 包含语音观测字段 |
| 40 | P1 | Docs | 更新 validation log | 日志记录命令与结果 |
| 41 | P2 | Product | 真实儿童表达样本集 | 至少 10 条样本进入验收文档 |
| 42 | P2 | Product | 低置信度文案 A/B 草案 | 输出两版确认文案 |
| 43 | P2 | Frontend | 移动端确认卡密度优化 | 小屏按钮不拥挤 |
| 44 | P2 | Frontend | 会话列表显示观测摘要 | 列表可见需处理原因和输入模式 |
| 45 | P2 | Backend | 支持 analytics 按 provider 过滤 | query 可筛选 provider |
| 46 | P2 | Backend | 支持 analytics 按 status 过滤 | query 可筛选会话状态 |
| 47 | P2 | Ops | ASR Provider 管理端摘要 | admin 侧可见 ASR 调用情况 |
| 48 | P2 | QA | Docker voice smoke 回归 | Docker 栈下 `SMOKE_VOICE=1` 通过 |
| 49 | P2 | Review | 自审语音服务复杂度 | 列出可拆分函数和风险点 |
| 50 | P2 | Review | 自审演示口径一致性 | PRD、tech spec、checklist 口径一致 |
### 今日执行策略
- 先完成 #01-#40 中无需数据库迁移的观测与验收项。
- #41-#50 作为后续产品化和演示质量任务,不阻塞今天的 Alpha 收束。
- 每批完成后必须跑后端语音测试、前端 build、ruff并追加验证日志。
## Success Metrics ## Success Metrics
### Product Metrics ### Product Metrics
@@ -600,3 +665,38 @@ DreamWeaver 的语音共创模式应当成为一种“孩子可以开口参与
4. 复用现有生成主干,新增 voice session 层,而不是另起一套平行系统 4. 复用现有生成主干,新增 voice session 层,而不是另起一套平行系统
这样既能保持当前 PRD 主线不被打断,也能确保后续做语音共创时,我们是在按计划推进,而不是临时起意。 这样既能保持当前 PRD 主线不被打断,也能确保后续做语音共创时,我们是在按计划推进,而不是临时起意。
## Phase A Alpha Child Expression SamplesP2 Seed
这些样本用于后续补齐真实儿童表达验收,不作为模型提示词硬编码。
| # | Sample | Expected Intent | Review Focus |
| --- | --- | --- | --- |
| 01 | 我想听小熊和星星找家的故事 | start_story | 能否抓住主角与目标 |
| 02 | 不要让小熊害怕,让月亮姐姐帮它 | correct_story | 修正是否接上上一轮 |
| 03 | 然后小狐狸也来了,它带了饼干 | continue_story | 新角色是否自然进入 |
| 04 | 我不喜欢黑黑的森林,换成彩虹森林 | correct_story | 负面场景是否温和替换 |
| 05 | 让恐龙变小一点,不要踩坏花 | correct_story | 安全和教育主题是否保留 |
| 06 | 再讲一段,它们坐上云朵船 | continue_story | 奇幻想象是否延续 |
| 07 | 结束吧,我想保存这个故事 | save_story | 是否引导 finalize |
| 08 | 先停一下,我等会再讲 | end_story | 是否保持会话可恢复 |
| 09 | 它们可以一起道歉吗 | continue_story | 是否融入教育主题 |
| 10 | 我刚才说错了,不是兔子,是小猫 | correct_story | 指代修正是否准确 |
## Phase A Alpha Confirmation Copy OptionsP2 Seed
- 版本 A更温柔`我刚才听到的是「{summary}」。如果听对了,我们就按这个继续;如果不对,可以重说一遍或改成文字。`
- 版本 B更高效`本轮系统理解为「{summary}」。请家长确认:继续、重说,或改成文本输入。`
默认建议继续使用版本 B因为 Alpha 演示时更短、更容易解释系统状态。
## Phase A Alpha Execution Update2026-04-25
本轮继续推进真实开发任务,而不是只维护任务池:
- 已完成 #45voice analytics 支持 `provider` 查询参数,可按转写来源筛选 turn、事件和会话集合。
- 已完成 #46voice analytics 支持 `session_status` 查询参数,可按会话状态筛选统计窗口。
- 已扩展 Voice Studio 观测卡:支持转写来源和会话状态筛选,便于演示时解释 demo/fallback/真实 ASR 差异。
- 已扩展 `SMOKE_VOICE=1`:增加 provider/status 过滤断言,避免 analytics 只验证全量路径。
后续仍未完成:#47 ASR Provider 管理端摘要、#48 Docker voice smoke 回归、#49 服务复杂度拆分、#50 演示口径最终复核。

View File

@@ -28,7 +28,7 @@
- 低置信度确认链路已有后端测试覆盖,可作为下一阶段继续接 ASR 与更细确认交互的基础 - 低置信度确认链路已有后端测试覆盖,可作为下一阶段继续接 ASR 与更细确认交互的基础
- 已新增用户转写安全检查、assistant 输出柔性改写与 `safety_flags` 事件记录 - 已新增用户转写安全检查、assistant 输出柔性改写与 `safety_flags` 事件记录
- finalize 会生成更稳定的标题/摘要,并在条件允许时自动排队封面补全 job - finalize 会生成更稳定的标题/摘要,并在条件允许时自动排队封面补全 job
- 已新增 `voice session analytics` 聚合指标,可跟踪 turn 成功率、ASR/TTS 失败、低置信度触发finalize 转化率 - 已新增 `voice session analytics` 聚合指标,可跟踪 turn 成功率、ASR/TTS 失败、低置信度触发finalize 转化率、输入构成、语音时长、Provider 分布、确认率和平均置信度,并支持按转写 Provider 与会话状态筛选
- `voice session finalize` 现在会返回可追踪的 `generation_job_id`,让正式 Story 资产补全重新接回现有 generation trace 主干 - `voice session finalize` 现在会返回可追踪的 `generation_job_id`,让正式 Story 资产补全重新接回现有 generation trace 主干
- 语音共创触发的 `asset_generation` job 现在也支持沿用统一 generation job 的取消 / 重试控制 - 语音共创触发的 `asset_generation` job 现在也支持沿用统一 generation job 的取消 / 重试控制

View File

@@ -6,6 +6,7 @@ export interface VoiceTurnSummary {
user_transcript: string | null user_transcript: string | null
transcript_confidence: number | null transcript_confidence: number | null
transcription_provider: string | null transcription_provider: string | null
user_audio_duration_ms: number | null
detected_intent: string detected_intent: string
intent_confidence: number | null intent_confidence: number | null
understanding_summary: string | null understanding_summary: string | null
@@ -17,6 +18,7 @@ export interface VoiceTurnSummary {
safety_blocked: boolean safety_blocked: boolean
safety_message: string | null safety_message: string | null
assistant_text: string | null assistant_text: string | null
assistant_audio_duration_ms: number | null
assistant_audio_ready: boolean assistant_audio_ready: boolean
assistant_audio_url: string | null assistant_audio_url: string | null
user_audio_ready: boolean user_audio_ready: boolean
@@ -81,6 +83,8 @@ export interface VoiceTurnAcceptedResponse {
export interface VoiceSessionAnalytics { export interface VoiceSessionAnalytics {
window_days: number | null window_days: number | null
provider: string | null
session_status: string | null
total_sessions: number total_sessions: number
attention_sessions: number attention_sessions: number
confirmation_attention_sessions: number confirmation_attention_sessions: number
@@ -96,6 +100,24 @@ export interface VoiceSessionAnalytics {
tts_failures: number tts_failures: number
low_confidence_turns: number low_confidence_turns: number
safety_interventions: number safety_interventions: number
text_fallback_turns: number
uploaded_audio_turns: number
user_audio_turn_rate: number
assistant_audio_ready_turns: number
assistant_audio_ready_rate: number
asr_success_rate: number
tts_success_rate: number
avg_transcript_confidence: number
avg_intent_confidence: number
safety_intervention_rate: number
failure_event_counts: Record<string, number>
total_user_audio_duration_ms: number
avg_user_audio_duration_ms: number
total_assistant_audio_turns: number
total_assistant_audio_duration_ms: number
avg_assistant_audio_duration_ms: number
transcription_provider_counts: Record<string, number>
confirmation_request_rate: number
turn_success_rate: number turn_success_rate: number
finalize_conversion_rate: number finalize_conversion_rate: number
} }

View File

@@ -80,6 +80,8 @@ const selectedUniverseId = ref('')
const sessionFilter = ref<SessionFilter>('active') const sessionFilter = ref<SessionFilter>('active')
const attentionReasonFilter = ref<AttentionReasonFilter>('all') const attentionReasonFilter = ref<AttentionReasonFilter>('all')
const analyticsWindow = ref<'7' | '30' | 'all'>('30') const analyticsWindow = ref<'7' | '30' | 'all'>('30')
const analyticsProviderFilter = ref('')
const analyticsStatusFilter = ref('')
const textTurnInput = ref('') const textTurnInput = ref('')
const uploadTranscriptHint = ref('') const uploadTranscriptHint = ref('')
const loadingSessions = ref(false) const loadingSessions = ref(false)
@@ -113,10 +115,30 @@ const profileOptions = computed(() =>
const universeOptions = computed(() => const universeOptions = computed(() =>
universes.value.map((universe) => ({ value: universe.id, label: universe.name })), universes.value.map((universe) => ({ value: universe.id, label: universe.name })),
) )
const analyticsProviderOptions = [
{ value: 'fallback', label: '文本 fallback' },
{ value: 'demo', label: 'Demo ASR' },
{ value: 'openai', label: 'OpenAI ASR' },
{ value: 'openai_asr', label: 'OpenAI ASR Adapter' },
]
const analyticsStatusOptions = [
{ value: 'draft', label: '草稿' },
{ value: 'active', label: '进行中' },
{ value: 'waiting_user', label: '等待用户' },
{ value: 'completed', label: '已完成' },
{ value: 'abandoned', label: '已放弃' },
]
const filteredSessions = computed(() => { const filteredSessions = computed(() => {
return resolveDisplayedSessions(sessions.value) return resolveDisplayedSessions(sessions.value)
}) })
const getSessionInputModeSummary = (session: VoiceSessionSummary) => {
if (session.latest_requires_confirmation) return '上一轮待确认'
if (session.latest_safety_flags.length) return `安全介入 ${session.latest_safety_flags.length}`
if (session.latest_detected_intent) return `最近意图:${formatIntent(session.latest_detected_intent)}`
return '等待输入'
}
const activeTurnList = computed(() => activeSession.value?.recent_turns ?? []) const activeTurnList = computed(() => activeSession.value?.recent_turns ?? [])
const hasPendingConfirmation = computed(() => activeSession.value?.latest_requires_confirmation ?? false) const hasPendingConfirmation = computed(() => activeSession.value?.latest_requires_confirmation ?? false)
const latestPendingConfirmationTurn = computed( const latestPendingConfirmationTurn = computed(
@@ -269,6 +291,50 @@ const finalizeConversionRateLabel = computed(() => {
if (!voiceAnalytics.value) return '0%' if (!voiceAnalytics.value) return '0%'
return `${Math.round(voiceAnalytics.value.finalize_conversion_rate * 100)}%` return `${Math.round(voiceAnalytics.value.finalize_conversion_rate * 100)}%`
}) })
const confirmationRequestRateLabel = computed(() => {
if (!voiceAnalytics.value) return '0%'
return `${Math.round(voiceAnalytics.value.confirmation_request_rate * 100)}%`
})
const userAudioTurnRateLabel = computed(() => {
if (!voiceAnalytics.value) return '0%'
return `${Math.round(voiceAnalytics.value.user_audio_turn_rate * 100)}%`
})
const assistantAudioReadyRateLabel = computed(() => {
if (!voiceAnalytics.value) return '0%'
return `${Math.round(voiceAnalytics.value.assistant_audio_ready_rate * 100)}%`
})
const asrSuccessRateLabel = computed(() => {
if (!voiceAnalytics.value) return '0%'
return `${Math.round(voiceAnalytics.value.asr_success_rate * 100)}%`
})
const ttsSuccessRateLabel = computed(() => {
if (!voiceAnalytics.value) return '0%'
return `${Math.round(voiceAnalytics.value.tts_success_rate * 100)}%`
})
const avgConfidenceSummary = computed(() => {
if (!voiceAnalytics.value) return '转写 0%,意图 0%'
const transcript = Math.round(voiceAnalytics.value.avg_transcript_confidence * 100)
const intent = Math.round(voiceAnalytics.value.avg_intent_confidence * 100)
return `转写 ${transcript}%,意图 ${intent}%`
})
const avgUserAudioDurationLabel = computed(() => {
if (!voiceAnalytics.value || !voiceAnalytics.value.avg_user_audio_duration_ms) return '0.0 秒'
return `${(voiceAnalytics.value.avg_user_audio_duration_ms / 1000).toFixed(1)}`
})
const avgAssistantAudioDurationLabel = computed(() => {
if (!voiceAnalytics.value || !voiceAnalytics.value.avg_assistant_audio_duration_ms) return '0.0 秒'
return `${(voiceAnalytics.value.avg_assistant_audio_duration_ms / 1000).toFixed(1)}`
})
const formatDurationMs = (durationMs: number | null | undefined) => {
if (!durationMs) return '0.0 秒'
return `${(durationMs / 1000).toFixed(1)}`
}
const transcriptionProviderSummary = computed(() => {
const counts = voiceAnalytics.value?.transcription_provider_counts ?? {}
const entries = Object.entries(counts).sort((left, right) => right[1] - left[1])
if (!entries.length) return '暂无转写来源'
return entries.map(([provider, count]) => `${provider} ${count}`).join('')
})
const analyticsWindowLabel = computed(() => const analyticsWindowLabel = computed(() =>
formatAnalyticsWindowLabel(voiceAnalytics.value?.window_days ?? null), formatAnalyticsWindowLabel(voiceAnalytics.value?.window_days ?? null),
) )
@@ -616,10 +682,19 @@ async function syncVoiceStudioRouteState(options?: {
} }
function buildVoiceAnalyticsPath() { function buildVoiceAnalyticsPath() {
if (analyticsWindow.value === 'all') { const params = new URLSearchParams()
return '/api/voice-sessions/analytics' if (analyticsWindow.value !== 'all') {
params.set('days', analyticsWindow.value)
} }
return `/api/voice-sessions/analytics?days=${analyticsWindow.value}` if (analyticsProviderFilter.value) {
params.set('provider', analyticsProviderFilter.value)
}
if (analyticsStatusFilter.value) {
params.set('session_status', analyticsStatusFilter.value)
}
const query = params.toString()
const path = '/api/voice-sessions/analytics'
return query ? `${path}?${query}` : path
} }
function buildVoiceSessionListPath() { function buildVoiceSessionListPath() {
@@ -1157,6 +1232,14 @@ function setAnalyticsWindow(value: '7' | '30' | 'all') {
analyticsWindow.value = value analyticsWindow.value = value
} }
function setAnalyticsProviderFilter(value: string | number) {
analyticsProviderFilter.value = String(value)
}
function setAnalyticsStatusFilter(value: string | number) {
analyticsStatusFilter.value = String(value)
}
function setSessionFilter(value: SessionFilter) { function setSessionFilter(value: SessionFilter) {
suppressAutoAdvanceNotice.value = true suppressAutoAdvanceNotice.value = true
clearAttentionCompletionNotice() clearAttentionCompletionNotice()
@@ -1258,6 +1341,10 @@ watch(analyticsWindow, () => {
void loadVoiceAnalytics() void loadVoiceAnalytics()
}) })
watch([analyticsProviderFilter, analyticsStatusFilter], () => {
void loadVoiceAnalytics()
})
watch(sessionFilter, () => { watch(sessionFilter, () => {
void loadSessions() void loadSessions()
}) })
@@ -1528,6 +1615,9 @@ onBeforeUnmount(() => {
<div class="mt-1 text-xs text-gray-500"> <div class="mt-1 text-xs text-gray-500">
{{ formatSessionStatus(session.status) }} · {{ session.total_turns }} {{ formatSessionStatus(session.status) }} · {{ session.total_turns }}
</div> </div>
<div class="mt-1 text-xs text-gray-400">
{{ getSessionInputModeSummary(session) }}
</div>
<div <div
class="mt-3 rounded-xl border px-3 py-2 text-xs leading-5" class="mt-3 rounded-xl border px-3 py-2 text-xs leading-5"
:class="getVoiceSessionNextStep(session).toneClass" :class="getVoiceSessionNextStep(session).toneClass"
@@ -1669,6 +1759,22 @@ onBeforeUnmount(() => {
全部 全部
</button> </button>
</div> </div>
<div class="mt-3 grid grid-cols-1 gap-3 sm:grid-cols-2">
<BaseSelect
v-model="analyticsProviderFilter"
label="转写来源筛选"
:options="analyticsProviderOptions"
placeholder="全部来源"
@update:modelValue="setAnalyticsProviderFilter"
/>
<BaseSelect
v-model="analyticsStatusFilter"
label="会话状态筛选"
:options="analyticsStatusOptions"
placeholder="全部状态"
@update:modelValue="setAnalyticsStatusFilter"
/>
</div>
<div class="mt-4 grid grid-cols-2 gap-3 xl:grid-cols-4"> <div class="mt-4 grid grid-cols-2 gap-3 xl:grid-cols-4">
<div class="rounded-xl border border-gray-100 bg-gray-50 px-4 py-3"> <div class="rounded-xl border border-gray-100 bg-gray-50 px-4 py-3">
<div class="text-xs text-gray-500">Turn 成功率</div> <div class="text-xs text-gray-500">Turn 成功率</div>
@@ -1677,6 +1783,7 @@ onBeforeUnmount(() => {
<div class="rounded-xl border border-gray-100 bg-gray-50 px-4 py-3"> <div class="rounded-xl border border-gray-100 bg-gray-50 px-4 py-3">
<div class="text-xs text-gray-500">低置信度触发</div> <div class="text-xs text-gray-500">低置信度触发</div>
<div class="mt-1 text-lg font-semibold text-amber-700">{{ voiceAnalytics.low_confidence_turns }}</div> <div class="mt-1 text-lg font-semibold text-amber-700">{{ voiceAnalytics.low_confidence_turns }}</div>
<div class="mt-1 text-xs text-gray-400">确认率 {{ confirmationRequestRateLabel }}</div>
</div> </div>
<div class="rounded-xl border border-gray-100 bg-gray-50 px-4 py-3"> <div class="rounded-xl border border-gray-100 bg-gray-50 px-4 py-3">
<div class="text-xs text-gray-500">安全介入</div> <div class="text-xs text-gray-500">安全介入</div>
@@ -1686,12 +1793,36 @@ onBeforeUnmount(() => {
<div class="text-xs text-gray-500">Finalize 转化率</div> <div class="text-xs text-gray-500">Finalize 转化率</div>
<div class="mt-1 text-lg font-semibold text-emerald-700">{{ finalizeConversionRateLabel }}</div> <div class="mt-1 text-lg font-semibold text-emerald-700">{{ finalizeConversionRateLabel }}</div>
</div> </div>
<div class="rounded-xl border border-gray-100 bg-gray-50 px-4 py-3">
<div class="text-xs text-gray-500">上传语音占比</div>
<div class="mt-1 text-lg font-semibold text-sky-700">{{ userAudioTurnRateLabel }}</div>
<div class="mt-1 text-xs text-gray-400">上传 {{ voiceAnalytics.uploaded_audio_turns }} / 文本 {{ voiceAnalytics.text_fallback_turns }}</div>
</div>
<div class="rounded-xl border border-gray-100 bg-gray-50 px-4 py-3">
<div class="text-xs text-gray-500">助手语音覆盖</div>
<div class="mt-1 text-lg font-semibold text-purple-700">{{ assistantAudioReadyRateLabel }}</div>
<div class="mt-1 text-xs text-gray-400">{{ voiceAnalytics.assistant_audio_ready_turns }} 轮有语音</div>
</div>
<div class="rounded-xl border border-gray-100 bg-gray-50 px-4 py-3">
<div class="text-xs text-gray-500">ASR 成功率</div>
<div class="mt-1 text-lg font-semibold text-indigo-700">{{ asrSuccessRateLabel }}</div>
</div>
<div class="rounded-xl border border-gray-100 bg-gray-50 px-4 py-3">
<div class="text-xs text-gray-500">TTS 成功率</div>
<div class="mt-1 text-lg font-semibold text-fuchsia-700">{{ ttsSuccessRateLabel }}</div>
</div>
</div> </div>
<p class="mt-4 text-sm text-gray-500"> <p class="mt-4 text-sm text-gray-500">
ASR 失败 {{ voiceAnalytics.asr_failures }} TTS 失败 {{ voiceAnalytics.tts_failures }} ASR 失败 {{ voiceAnalytics.asr_failures }} TTS 失败 {{ voiceAnalytics.tts_failures }}
当前共有 {{ voiceAnalytics.total_sessions }} 个会话其中 {{ voiceAnalytics.attention_sessions }} 个仍需处理 当前共有 {{ voiceAnalytics.total_sessions }} 个会话其中 {{ voiceAnalytics.attention_sessions }} 个仍需处理
已完成 {{ voiceAnalytics.finalized_sessions }} 已完成 {{ voiceAnalytics.finalized_sessions }}
</p> </p>
<p class="mt-2 text-sm text-gray-500">
平均用户语音 {{ avgUserAudioDurationLabel }}平均助手语音 {{ avgAssistantAudioDurationLabel }}转写来源{{ transcriptionProviderSummary }}
</p>
<p class="mt-2 text-sm text-gray-500">
平均置信度{{ avgConfidenceSummary }}安全介入率 {{ Math.round(voiceAnalytics.safety_intervention_rate * 100) }}%
</p>
<p <p
v-if="voiceAnalytics.attention_sessions" v-if="voiceAnalytics.attention_sessions"
class="mt-2 text-sm text-gray-500" class="mt-2 text-sm text-gray-500"
@@ -2199,6 +2330,8 @@ onBeforeUnmount(() => {
<span>·</span> <span>·</span>
<span>{{ formatIntent(turn.detected_intent) }}</span> <span>{{ formatIntent(turn.detected_intent) }}</span>
<span v-if="turn.transcription_provider">· {{ turn.transcription_provider }}</span> <span v-if="turn.transcription_provider">· {{ turn.transcription_provider }}</span>
<span v-if="turn.user_audio_duration_ms">· 用户语音 {{ formatDurationMs(turn.user_audio_duration_ms) }}</span>
<span v-if="turn.assistant_audio_duration_ms">· 助手语音 {{ formatDurationMs(turn.assistant_audio_duration_ms) }}</span>
</div> </div>
<div class="mt-3 text-sm text-gray-800"> <div class="mt-3 text-sm text-gray-800">
<span class="font-medium text-gray-900">孩子</span> <span class="font-medium text-gray-900">孩子</span>

View File

@@ -9,8 +9,9 @@ SMOKE_AUDIO="${SMOKE_AUDIO:-0}"
SMOKE_VOICE="${SMOKE_VOICE:-0}" SMOKE_VOICE="${SMOKE_VOICE:-0}"
COOKIE_JAR="$(mktemp "${TMPDIR:-/tmp}/dreamweaver-cookie.XXXXXX")" COOKIE_JAR="$(mktemp "${TMPDIR:-/tmp}/dreamweaver-cookie.XXXXXX")"
VOICE_SMOKE_AUDIO="$(mktemp "${TMPDIR:-/tmp}/dreamweaver-voice-audio.XXXXXX")"
cleanup() { cleanup() {
rm -f "$COOKIE_JAR" rm -f "$COOKIE_JAR" "$VOICE_SMOKE_AUDIO"
} }
trap cleanup EXIT trap cleanup EXIT
@@ -174,12 +175,13 @@ if [[ "$SMOKE_VOICE" == "1" ]]; then
voice_turn_detail_json="$(get_json "$APP_URL/api/voice-sessions/$voice_session_id/turns/$voice_turn_id")" voice_turn_detail_json="$(get_json "$APP_URL/api/voice-sessions/$voice_session_id/turns/$voice_turn_id")"
assert_jq "$voice_turn_detail_json" '.user_transcript | contains("小熊")' "voice fallback turn should keep user transcript" assert_jq "$voice_turn_detail_json" '.user_transcript | contains("小熊")' "voice fallback turn should keep user transcript"
assert_jq "$voice_turn_detail_json" '.assistant_text != null and .assistant_text != ""' "voice fallback turn should return assistant text" assert_jq "$voice_turn_detail_json" '.assistant_text != null and .assistant_text != ""' "voice fallback turn should return assistant text"
assert_jq "$voice_turn_detail_json" '.detected_intent == "start" and .requires_confirmation == false' "first voice turn should start the story without confirmation" assert_jq "$voice_turn_detail_json" '.detected_intent == "start_story" and .requires_confirmation == false' "first voice turn should start the story without confirmation"
echo "$voice_turn_detail_json" | jq '{id,status,detected_intent,requires_confirmation,assistant_audio_ready,assistant_text}' echo "$voice_turn_detail_json" | jq '{id,status,detected_intent,requires_confirmation,assistant_audio_ready,assistant_text}'
say "Submitting voice uploaded turn with demo transcript hint" say "Submitting voice uploaded turn with demo transcript hint"
printf 'dreamweaver-demo-audio' > "$VOICE_SMOKE_AUDIO"
voice_upload_json="$(post_form "$APP_URL/api/voice-sessions/$voice_session_id/turns" \ voice_upload_json="$(post_form "$APP_URL/api/voice-sessions/$voice_session_id/turns" \
-F 'audio_file=@/dev/null;filename=turn.webm;type=audio/webm' \ -F "audio_file=@${VOICE_SMOKE_AUDIO};filename=turn.webm;type=audio/webm" \
-F 'duration_ms=900' \ -F 'duration_ms=900' \
-F 'transcript_hint=不要让小熊害怕,让月亮姐姐帮它')" -F 'transcript_hint=不要让小熊害怕,让月亮姐姐帮它')"
voice_upload_turn_id="$(jq -r '.turn_id' <<<"$voice_upload_json")" voice_upload_turn_id="$(jq -r '.turn_id' <<<"$voice_upload_json")"
@@ -187,18 +189,27 @@ if [[ "$SMOKE_VOICE" == "1" ]]; then
voice_upload_detail_json="$(get_json "$APP_URL/api/voice-sessions/$voice_session_id/turns/$voice_upload_turn_id")" voice_upload_detail_json="$(get_json "$APP_URL/api/voice-sessions/$voice_session_id/turns/$voice_upload_turn_id")"
assert_jq "$voice_upload_detail_json" '.user_transcript | contains("月亮姐姐")' "voice upload turn should expose hinted transcript" assert_jq "$voice_upload_detail_json" '.user_transcript | contains("月亮姐姐")' "voice upload turn should expose hinted transcript"
assert_jq "$voice_upload_detail_json" '.detected_intent == "correct" and .assistant_text != null' "voice upload correction should continue the narrative" assert_jq "$voice_upload_detail_json" '.detected_intent == "correct_story" and .assistant_text != null' "voice upload correction should continue the narrative"
echo "$voice_upload_detail_json" | jq '{id,status,transcription_provider,detected_intent,requires_confirmation,assistant_audio_ready,assistant_text}' assert_jq "$voice_upload_detail_json" '.user_audio_duration_ms == 900' "voice upload turn should expose user audio duration"
echo "$voice_upload_detail_json" | jq '{id,status,transcription_provider,user_audio_duration_ms,detected_intent,requires_confirmation,assistant_audio_ready,assistant_text}'
say "Checking voice session detail and analytics" say "Checking voice session detail and analytics"
voice_detail_json="$(get_json "$APP_URL/api/voice-sessions/$voice_session_id")" voice_detail_json="$(get_json "$APP_URL/api/voice-sessions/$voice_session_id")"
assert_jq "$voice_detail_json" '.current_turn_index >= 2 and (.recent_turns | length) >= 2 and (.events | length) >= 2 and .can_finalize == true' "voice session should include turns/events and be finalizable" assert_jq "$voice_detail_json" '.current_turn_index >= 2 and (.recent_turns | length) >= 2 and (.events | length) >= 2 and .can_finalize == true' "voice session should include turns/events and be finalizable"
assert_jq "$voice_detail_json" '([.events[].event_type] | index("turn_transcribed")) != null and ([.events[].event_type] | index("turn_narrative_ready")) != null' "voice session should record key turn events" assert_jq "$voice_detail_json" '([.events[].event_type] | index("turn_transcribed")) != null and ([.events[].event_type] | index("assistant_text_ready")) != null' "voice session should record key turn events"
echo "$voice_detail_json" | jq '{id,status,current_turn_index,can_finalize,latest_detected_intent,events:([.events[].event_type] | unique)}' echo "$voice_detail_json" | jq '{id,status,current_turn_index,can_finalize,latest_detected_intent,events:([.events[].event_type] | unique)}'
voice_analytics_json="$(get_json "$APP_URL/api/voice-sessions/analytics?days=7")" voice_analytics_json="$(get_json "$APP_URL/api/voice-sessions/analytics?days=7")"
assert_jq "$voice_analytics_json" '.window_days == 7 and .total_sessions >= 1 and .total_turns >= 2 and .successful_turns >= 2' "voice analytics should include the smoke session" assert_jq "$voice_analytics_json" '.window_days == 7 and .total_sessions >= 1 and .total_turns >= 2 and .successful_turns >= 2' "voice analytics should include the smoke session"
echo "$voice_analytics_json" | jq '{window_days,total_sessions,total_turns,successful_turns,failed_turns,turn_success_rate,finalize_conversion_rate}' assert_jq "$voice_analytics_json" '.total_user_audio_duration_ms >= 2100 and .avg_user_audio_duration_ms > 0 and .transcription_provider_counts.demo >= 1 and .transcription_provider_counts.fallback >= 1' "voice analytics should expose duration and provider distribution"
assert_jq "$voice_analytics_json" '.text_fallback_turns >= 1 and .uploaded_audio_turns >= 1 and .user_audio_turn_rate > 0 and .assistant_audio_ready_rate > 0 and .asr_success_rate > 0 and .tts_success_rate > 0' "voice analytics should expose turn mix and success rates"
echo "$voice_analytics_json" | jq '{window_days,total_sessions,total_turns,successful_turns,failed_turns,text_fallback_turns,uploaded_audio_turns,user_audio_turn_rate,assistant_audio_ready_rate,asr_success_rate,tts_success_rate,total_user_audio_duration_ms,avg_user_audio_duration_ms,transcription_provider_counts,confirmation_request_rate,turn_success_rate,finalize_conversion_rate}'
voice_demo_analytics_json="$(get_json "$APP_URL/api/voice-sessions/analytics?days=7&provider=demo")"
assert_jq "$voice_demo_analytics_json" '.provider == "demo" and .uploaded_audio_turns >= 1 and (.transcription_provider_counts | keys == ["demo"])' "voice analytics should filter by ASR provider"
voice_waiting_analytics_json="$(get_json "$APP_URL/api/voice-sessions/analytics?days=7&session_status=waiting_user")"
assert_jq "$voice_waiting_analytics_json" '.session_status == "waiting_user" and .total_sessions >= 1' "voice analytics should filter by session status"
say "Finalizing voice session into story" say "Finalizing voice session into story"
voice_finalize_json="$(post_json "$APP_URL/api/voice-sessions/$voice_session_id/finalize" '{ voice_finalize_json="$(post_json "$APP_URL/api/voice-sessions/$voice_session_id/finalize" '{