feat: improve generation analytics and maintenance

This commit is contained in:
2026-04-19 09:03:40 +08:00
parent d5a173aa0d
commit 5318de670f
21 changed files with 1155 additions and 57 deletions

View File

@@ -0,0 +1,65 @@
"""Story audio cache maintenance tests."""
import os
from datetime import datetime, timedelta, timezone
from pathlib import Path
import pytest
from fastapi import HTTPException
from sqlalchemy import select
from app.core.config import settings
from app.db.models import Story
from app.services.generation_jobs import create_generation_job
from app.services.story_service import generate_story_audio, prune_story_audio_cache
pytestmark = pytest.mark.asyncio
async def test_prune_story_audio_cache_removes_expired_audio(
db_session,
test_story,
mock_tts_provider,
monkeypatch,
):
await generate_story_audio(test_story.id, test_story.user_id, db_session)
cached_audio_path = Path(settings.story_audio_cache_dir) / f"story-{test_story.id}.mp3"
assert cached_audio_path.is_file()
old_time = datetime.now(timezone.utc) - timedelta(days=10)
timestamp = old_time.timestamp()
os.utime(cached_audio_path, (timestamp, timestamp))
monkeypatch.setattr(settings, "story_audio_cache_ttl_days", 7)
result = await prune_story_audio_cache(db_session)
assert result == {"scanned": 1, "pruned": 1, "repaired": 0}
assert not cached_audio_path.exists()
story = (
await db_session.execute(select(Story).where(Story.id == test_story.id))
).scalar_one()
assert story.audio_path is None
assert story.audio_status == "not_requested"
assert story.generation_status == "partial_ready"
async def test_generate_story_audio_rejects_when_story_has_active_job(
db_session,
test_story,
):
await create_generation_job(
db_session,
user_id=test_story.user_id,
output_mode="asset_retry",
input_type="audio",
request_payload={"story_id": test_story.id},
story_id=test_story.id,
)
with pytest.raises(HTTPException) as exc_info:
await generate_story_audio(test_story.id, test_story.user_id, db_session)
assert exc_info.value.status_code == 409
assert "已有运行中的任务" in str(exc_info.value.detail)

View File

@@ -1,5 +1,6 @@
"""Generation job tracking tests."""
from datetime import datetime, timedelta, timezone
from unittest.mock import AsyncMock, patch
import pytest
@@ -12,7 +13,11 @@ from app.main import app
from app.services.adapters import AdapterConfig
from app.services.adapters.storybook.primary import Storybook, StorybookPage
from app.services.adapters.text.models import StoryOutput
from app.services.generation_jobs import create_generation_job, record_generation_event
from app.services.generation_jobs import (
create_generation_job,
mark_stale_generation_jobs,
record_generation_event,
)
pytestmark = pytest.mark.asyncio
@@ -520,6 +525,7 @@ async def test_user_provider_analytics_aggregate_across_stories(
assert data["failed_calls"] == 1
assert data["avg_latency_ms"] == 60.0
assert data["estimated_cost_usd"] == 0.013
assert data["failure_reasons"] == [{"reason": "timeout", "count": 1}]
assert data["by_provider"] == [
{
"capability": "image",
@@ -551,3 +557,249 @@ async def test_user_provider_analytics_aggregate_across_stories(
]
finally:
app.dependency_overrides.clear()
async def test_provider_analytics_support_days_and_capability_filters(
db_session,
auth_token,
degraded_story_with_text,
test_story,
):
async def override_get_db():
yield db_session
app.dependency_overrides[get_db] = override_get_db
image_job = await create_generation_job(
db_session,
user_id=degraded_story_with_text.user_id,
output_mode="asset_retry",
input_type="image",
request_payload={"assets": ["image"]},
story_id=degraded_story_with_text.id,
)
old_event = await record_generation_event(
db_session,
job=image_job,
story_id=degraded_story_with_text.id,
event_type="provider_call_failed",
status="failed",
metadata={
"capability": "image",
"adapter": "cqtai",
"strategy": "priority",
"latency_ms": 120,
"error": "timeout",
},
)
old_event.created_at = datetime.now(timezone.utc) - timedelta(days=10)
await db_session.commit()
tts_job = await create_generation_job(
db_session,
user_id=test_story.user_id,
output_mode="asset_retry",
input_type="audio",
request_payload={"assets": ["audio"]},
story_id=test_story.id,
)
await record_generation_event(
db_session,
job=tts_job,
story_id=test_story.id,
event_type="provider_call_succeeded",
status="succeeded",
metadata={
"capability": "tts",
"adapter": "edge_tts",
"strategy": "priority",
"latency_ms": 18,
"estimated_cost_usd": 0.003,
},
)
transport = ASGITransport(app=app)
try:
async with AsyncClient(transport=transport, base_url="http://test") as client:
client.cookies.set("access_token", auth_token)
response = await client.get("/api/generations/provider-analytics?days=7")
assert response.status_code == 200
data = response.json()
assert data["window_days"] == 7
assert data["total_calls"] == 1
assert data["job_count"] == 1
assert data["story_count"] == 1
assert data["failure_reasons"] == []
response = await client.get(
"/api/generations/provider-analytics?capability=image"
)
assert response.status_code == 200
data = response.json()
assert data["capability"] == "image"
assert data["total_calls"] == 1
assert data["failed_calls"] == 1
assert data["job_count"] == 1
assert data["story_count"] == 1
assert data["failure_reasons"] == [{"reason": "timeout", "count": 1}]
response = await client.get(
f"/api/generations/{degraded_story_with_text.id}/provider-stats?capability=image"
)
assert response.status_code == 200
data = response.json()
assert data["capability"] == "image"
assert data["failure_reasons"] == [{"reason": "timeout", "count": 1}]
finally:
app.dependency_overrides.clear()
async def test_generation_ops_summary_exposes_running_stale_and_recent_failures(
db_session,
auth_token,
degraded_story_with_text,
test_story,
):
async def override_get_db():
yield db_session
app.dependency_overrides[get_db] = override_get_db
running_job = await create_generation_job(
db_session,
user_id=test_story.user_id,
output_mode="story",
input_type="keywords",
request_payload={"data": "星星"},
story_id=test_story.id,
)
stale_job = await create_generation_job(
db_session,
user_id=degraded_story_with_text.user_id,
output_mode="asset_generation",
input_type="image",
request_payload={"story_id": degraded_story_with_text.id},
story_id=degraded_story_with_text.id,
)
failed_job = await create_generation_job(
db_session,
user_id=degraded_story_with_text.user_id,
output_mode="asset_retry",
input_type="image",
request_payload={"assets": ["image"]},
story_id=degraded_story_with_text.id,
)
degraded_job = await create_generation_job(
db_session,
user_id=test_story.user_id,
output_mode="storybook",
input_type="keywords",
request_payload={"data": "月亮"},
story_id=test_story.id,
)
stale_job.updated_at = datetime.now(timezone.utc) - timedelta(hours=3)
failed_job.status = "failed"
failed_job.current_step = "asset_retry_failed"
failed_job.error_message = "image timeout"
failed_job.updated_at = datetime.now(timezone.utc) - timedelta(hours=1)
degraded_job.status = "degraded_completed"
degraded_job.current_step = "generation_completed"
degraded_job.updated_at = datetime.now(timezone.utc) - timedelta(minutes=30)
running_job.updated_at = datetime.now(timezone.utc) - timedelta(minutes=10)
await db_session.commit()
transport = ASGITransport(app=app)
try:
async with AsyncClient(transport=transport, base_url="http://test") as client:
client.cookies.set("access_token", auth_token)
response = await client.get("/api/generations/ops-summary?hours=48")
assert response.status_code == 200
data = response.json()
assert data["window_hours"] == 48
assert data["active_jobs"] == 2
assert data["stale_running_jobs"] == 1
assert data["failed_jobs"] == 1
assert data["degraded_jobs"] == 1
assert data["asset_retry_jobs"] == 2
assert len(data["recent_failures"]) == 1
assert data["recent_failures"][0]["job_id"] == failed_job.id
assert data["recent_failures"][0]["story_title"] == degraded_story_with_text.title
assert data["recent_failures"][0]["failure_label"] == "资源重试失败"
finally:
app.dependency_overrides.clear()
async def test_mark_stale_generation_jobs_marks_old_running_jobs_failed(
db_session,
degraded_story_with_text,
):
stale_job = await create_generation_job(
db_session,
user_id=degraded_story_with_text.user_id,
output_mode="story",
input_type="keywords",
request_payload={"data": "超时任务"},
story_id=degraded_story_with_text.id,
)
stale_job.updated_at = datetime.now(timezone.utc) - timedelta(hours=2)
await db_session.commit()
result = await mark_stale_generation_jobs(db_session, stale_after_minutes=30)
assert result == {"running": 1, "marked_stale": 1, "stale_after_minutes": 30}
refreshed_job = (
await db_session.execute(select(GenerationJob).where(GenerationJob.id == stale_job.id))
).scalar_one()
assert refreshed_job.status == "failed"
assert refreshed_job.current_step == "generation_stale_failed"
assert refreshed_job.error_message == "Generation job exceeded 30 minutes without progress."
events = (
await db_session.execute(
select(GenerationJobEvent)
.where(GenerationJobEvent.job_id == stale_job.id)
.order_by(GenerationJobEvent.id)
)
).scalars().all()
assert events[-1].event_type == "generation_stale_failed"
assert events[-1].event_metadata["stale_after_minutes"] == 30
async def test_retry_assets_rejects_when_story_has_active_job(
db_session,
auth_token,
degraded_story_with_text,
):
async def override_get_db():
yield db_session
app.dependency_overrides[get_db] = override_get_db
await create_generation_job(
db_session,
user_id=degraded_story_with_text.user_id,
output_mode="asset_generation",
input_type="image",
request_payload={"story_id": degraded_story_with_text.id},
story_id=degraded_story_with_text.id,
)
transport = ASGITransport(app=app)
try:
async with AsyncClient(transport=transport, base_url="http://test") as client:
client.cookies.set("access_token", auth_token)
response = await client.post(
f"/api/generations/{degraded_story_with_text.id}/retry-assets",
json={"assets": ["image"]},
)
assert response.status_code == 409
assert "已有运行中的任务" in response.json()["detail"]
finally:
app.dependency_overrides.clear()