Add voice analytics filters and metrics

This commit is contained in:
2026-04-26 22:00:34 +08:00
parent 3805c18622
commit 55ca0985eb
25 changed files with 710 additions and 39 deletions

View File

@@ -335,6 +335,7 @@ def _turn_to_summary(turn: VoiceTurn) -> VoiceTurnSummaryResponse:
user_transcript=turn.user_transcript,
transcript_confidence=turn.transcript_confidence,
transcription_provider=turn_patch.get("transcription_provider"),
user_audio_duration_ms=turn.user_audio_duration_ms,
detected_intent=turn.detected_intent,
intent_confidence=turn.intent_confidence,
understanding_summary=confirmation_state["understanding_summary"],
@@ -346,6 +347,7 @@ def _turn_to_summary(turn: VoiceTurn) -> VoiceTurnSummaryResponse:
safety_blocked=safety_state["safety_blocked"],
safety_message=safety_state["safety_message"],
assistant_text=turn.assistant_text,
assistant_audio_duration_ms=turn.assistant_audio_duration_ms,
assistant_audio_ready=session_audio_exists(turn.assistant_audio_path),
assistant_audio_url=_assistant_audio_url(
turn.session_id,
@@ -1194,10 +1196,14 @@ async def get_voice_session_analytics_service(
db: AsyncSession,
*,
days: int | None = 30,
provider: str | None = None,
session_status: str | None = None,
) -> VoiceSessionAnalyticsResponse:
cutoff = None
if days is not None:
cutoff = datetime.now(timezone.utc) - timedelta(days=days)
provider_filter = (provider or "").strip() or None
session_status_filter = (session_status or "").strip() or None
session_query = select(VoiceSession).where(VoiceSession.user_id == user_id)
turn_query = (
@@ -1215,10 +1221,30 @@ async def get_voice_session_analytics_service(
session_query = session_query.where(VoiceSession.created_at >= cutoff)
turn_query = turn_query.where(VoiceTurn.created_at >= cutoff)
event_query = event_query.where(VoiceSessionEvent.created_at >= cutoff)
if session_status_filter is not None:
session_query = session_query.where(VoiceSession.status == session_status_filter)
turn_query = turn_query.where(VoiceSession.status == session_status_filter)
event_query = event_query.where(VoiceSession.status == session_status_filter)
sessions = (await db.execute(session_query)).scalars().all()
turns = (await db.execute(turn_query)).scalars().all()
events = (await db.execute(event_query)).scalars().all()
if provider_filter is not None:
provider_turn_ids = {
turn.id
for turn in turns
if ((turn.story_patch or {}).get("transcription_provider") or "unknown")
== provider_filter
}
provider_session_ids = {turn.session_id for turn in turns if turn.id in provider_turn_ids}
sessions = [session for session in sessions if session.id in provider_session_ids]
turns = [turn for turn in turns if turn.id in provider_turn_ids]
events = [
event
for event in events
if event.turn_id in provider_turn_ids
or (event.turn_id is None and event.session_id in provider_session_ids)
]
session_summaries = [await _build_session_summary(db, session) for session in sessions]
total_sessions = len(sessions)
@@ -1258,6 +1284,36 @@ async def get_voice_session_analytics_service(
safety_interventions = sum(
1 for event in events if event.event_type == "safety_intervention_requested"
)
text_fallback_turns = sum(
1 for turn in turns if (turn.story_patch or {}).get("transcription_provider") == "fallback"
)
uploaded_audio_turns = sum(1 for turn in turns if turn.user_audio_path)
assistant_audio_ready_turns = sum(
1 for turn in turns if session_audio_exists(turn.assistant_audio_path)
)
user_audio_durations = [
duration for turn in turns if (duration := turn.user_audio_duration_ms) is not None
]
assistant_audio_durations = [
duration for turn in turns if (duration := turn.assistant_audio_duration_ms) is not None
]
total_user_audio_duration_ms = sum(user_audio_durations)
total_assistant_audio_duration_ms = sum(assistant_audio_durations)
transcription_provider_counts: dict[str, int] = {}
for turn in turns:
provider = (turn.story_patch or {}).get("transcription_provider") or "unknown"
transcription_provider_counts[provider] = transcription_provider_counts.get(provider, 0) + 1
failure_event_counts: dict[str, int] = {}
for event in events:
if event.status != "failed":
continue
failure_event_counts[event.event_type] = failure_event_counts.get(event.event_type, 0) + 1
transcript_confidences = [
confidence for turn in turns if (confidence := turn.transcript_confidence) is not None
]
intent_confidences = [
confidence for turn in turns if (confidence := turn.intent_confidence) is not None
]
turn_success_rate = (
round(successful_turns / total_turns, 4) if total_turns else 0.0
@@ -1265,9 +1321,27 @@ async def get_voice_session_analytics_service(
finalize_conversion_rate = (
round(finalized_sessions / total_sessions, 4) if total_sessions else 0.0
)
confirmation_request_rate = (
round(low_confidence_turns / total_turns, 4) if total_turns else 0.0
)
user_audio_turn_rate = round(uploaded_audio_turns / total_turns, 4) if total_turns else 0.0
assistant_audio_ready_rate = (
round(assistant_audio_ready_turns / successful_turns, 4) if successful_turns else 0.0
)
asr_attempts = uploaded_audio_turns + asr_failures
asr_success_rate = round(uploaded_audio_turns / asr_attempts, 4) if asr_attempts else 0.0
tts_attempts = assistant_audio_ready_turns + tts_failures
tts_success_rate = (
round(assistant_audio_ready_turns / tts_attempts, 4) if tts_attempts else 0.0
)
safety_intervention_rate = (
round(safety_interventions / total_turns, 4) if total_turns else 0.0
)
return VoiceSessionAnalyticsResponse(
window_days=days,
provider=provider_filter,
session_status=session_status_filter,
total_sessions=total_sessions,
attention_sessions=attention_sessions,
confirmation_attention_sessions=confirmation_attention_sessions,
@@ -1283,6 +1357,40 @@ async def get_voice_session_analytics_service(
tts_failures=tts_failures,
low_confidence_turns=low_confidence_turns,
safety_interventions=safety_interventions,
text_fallback_turns=text_fallback_turns,
uploaded_audio_turns=uploaded_audio_turns,
user_audio_turn_rate=user_audio_turn_rate,
assistant_audio_ready_turns=assistant_audio_ready_turns,
assistant_audio_ready_rate=assistant_audio_ready_rate,
asr_success_rate=asr_success_rate,
tts_success_rate=tts_success_rate,
avg_transcript_confidence=(
round(sum(transcript_confidences) / len(transcript_confidences), 4)
if transcript_confidences
else 0.0
),
avg_intent_confidence=(
round(sum(intent_confidences) / len(intent_confidences), 4)
if intent_confidences
else 0.0
),
safety_intervention_rate=safety_intervention_rate,
failure_event_counts=failure_event_counts,
total_user_audio_duration_ms=total_user_audio_duration_ms,
avg_user_audio_duration_ms=(
round(total_user_audio_duration_ms / len(user_audio_durations), 2)
if user_audio_durations
else 0.0
),
total_assistant_audio_turns=len(assistant_audio_durations),
total_assistant_audio_duration_ms=total_assistant_audio_duration_ms,
avg_assistant_audio_duration_ms=(
round(total_assistant_audio_duration_ms / len(assistant_audio_durations), 2)
if assistant_audio_durations
else 0.0
),
transcription_provider_counts=transcription_provider_counts,
confirmation_request_rate=confirmation_request_rate,
turn_success_rate=turn_success_rate,
finalize_conversion_rate=finalize_conversion_rate,
)