Expand generation harness observability

This commit is contained in:
2026-06-24 10:48:23 +08:00
parent 459ca9edef
commit 1f34d80083
35 changed files with 8003 additions and 112 deletions

View File

@@ -1,4 +1,5 @@
from typing import Literal
from datetime import datetime
from typing import Any, Literal
from fastapi import APIRouter, Depends, HTTPException, Query
from pydantic import BaseModel, ConfigDict, Field
@@ -9,6 +10,10 @@ from app.core.admin_auth import admin_guard
from app.db.admin_models import Provider
from app.db.database import get_db
from app.services.adapters.registry import AdapterRegistry
from app.services.admin_evaluation_analytics import get_admin_evaluation_analytics
from app.services.admin_executor_coverage import get_admin_executor_coverage
from app.services.admin_generation_trace import get_admin_generation_job_trace
from app.services.admin_harness_readiness import get_admin_harness_readiness
from app.services.admin_provider_analytics import get_admin_provider_analytics
from app.services.cost_tracker import cost_tracker
from app.services.provider_policy import DEFAULT_PROVIDERS, list_capability_policies
@@ -103,6 +108,169 @@ class ProviderAnalyticsResponse(BaseModel):
by_user: list[ProviderAnalyticsUserBucket]
failure_reasons: list[ProviderAnalyticsFailureReason]
class EvaluationAnalyticsArtifactBucket(BaseModel):
artifact: str
count: int
class EvaluationAnalyticsOutputModeBucket(BaseModel):
output_mode: str
count: int
class EvaluationAnalyticsScoreBandBucket(BaseModel):
band: str
count: int
class EvaluationAnalyticsDimensionScore(BaseModel):
dimension: str
average_score: float
count: int
class EvaluationAnalyticsQualityGateIssue(BaseModel):
code: str
count: int
class EvaluationAnalyticsFailureCategory(BaseModel):
category: str
count: int
class EvaluationAnalyticsWarning(BaseModel):
message: str
count: int
class EvaluationAnalyticsResponse(BaseModel):
scope: str
window_days: int | None = None
artifact: str | None = None
total_evaluations: int
passed_evaluations: int
blocked_evaluations: int
pass_rate: float
average_score: float | None = None
job_count: int
story_count: int
user_count: int
by_artifact: list[EvaluationAnalyticsArtifactBucket]
by_output_mode: list[EvaluationAnalyticsOutputModeBucket]
score_bands: list[EvaluationAnalyticsScoreBandBucket]
dimension_scores: list[EvaluationAnalyticsDimensionScore]
quality_gate_issues: list[EvaluationAnalyticsQualityGateIssue]
failure_categories: list[EvaluationAnalyticsFailureCategory]
warnings: list[EvaluationAnalyticsWarning]
class ExecutorCoveragePlanModeBucket(BaseModel):
plan_mode: str
count: int
class ExecutorCoverageOutputModeBucket(BaseModel):
output_mode: str
count: int
class ExecutorCoverageTaskKeyBucket(BaseModel):
task_key: str
count: int
class ExecutorCoverageAssetBucket(BaseModel):
asset: str
count: int
class ExecutorCoverageResponse(BaseModel):
scope: str
window_days: int | None = None
plan_mode: str | None = None
total_runs: int
total_planned_tasks: int
total_executed_tasks: int
total_ignored_tasks: int
coverage_ratio: float
job_count: int
story_count: int
user_count: int
by_plan_mode: list[ExecutorCoveragePlanModeBucket]
by_output_mode: list[ExecutorCoverageOutputModeBucket]
executed_task_keys: list[ExecutorCoverageTaskKeyBucket]
ignored_task_keys: list[ExecutorCoverageTaskKeyBucket]
result_assets: list[ExecutorCoverageAssetBucket]
class AdminGenerationJobEventResponse(BaseModel):
id: int
job_id: str
story_id: int | None = None
event_type: str
status: str
message: str | None = None
event_metadata: dict[str, Any] = Field(default_factory=dict)
created_at: datetime
class AdminGenerationJobTraceResponse(BaseModel):
id: str
user_id: str
story_id: int | None = None
output_mode: str
input_type: str
status: str
current_step: str
progress_percent: int
progress_label: str
is_terminal: bool
can_cancel: bool = False
can_retry: bool = False
result_snapshot: dict[str, Any] = Field(default_factory=dict)
error_message: str | None = None
request_payload: dict[str, Any] = Field(default_factory=dict)
executor_coverage: ExecutorCoverageResponse
events: list[AdminGenerationJobEventResponse] = Field(default_factory=list)
created_at: datetime
updated_at: datetime
class HarnessReadinessCheck(BaseModel):
code: str
status: Literal["ready", "needs_attention", "blocked"]
message: str
details: dict[str, Any] = Field(default_factory=dict)
class HarnessReadinessGoldenReplay(BaseModel):
passed: bool
total_cases: int
failed_case_ids: list[str]
coverage_summary: dict[str, dict[str, int]] = Field(default_factory=dict)
class HarnessReadinessThresholds(BaseModel):
min_runtime_evaluations: int
min_executor_runs: int
min_evaluation_pass_rate: float
min_evaluation_average_score: float
min_executor_coverage_ratio: float
class HarnessReadinessResponse(BaseModel):
scope: str
window_days: int | None = None
status: Literal["ready", "needs_attention", "blocked"]
thresholds: HarnessReadinessThresholds
checks: list[HarnessReadinessCheck]
golden_replay: HarnessReadinessGoldenReplay
evaluation_analytics: EvaluationAnalyticsResponse
executor_coverage: ExecutorCoverageResponse
@router.get("/providers/adapters")
async def list_available_adapters():
"""获取所有可用的适配器类型 (定义的类)。"""
@@ -137,6 +305,55 @@ async def get_provider_analytics(
)
@router.get("/evaluations/analytics", response_model=EvaluationAnalyticsResponse)
async def get_evaluation_analytics(
days: int | None = Query(default=None, ge=1, le=365),
artifact: Literal["story_text", "storybook_pages"] | None = Query(default=None),
db: AsyncSession = Depends(get_db),
):
"""获取内部内容评测摘要,仅供管理控制面使用。"""
return await get_admin_evaluation_analytics(
db,
days=days,
artifact=artifact,
)
@router.get("/executors/coverage", response_model=ExecutorCoverageResponse)
async def get_executor_coverage(
days: int | None = Query(default=None, ge=1, le=365),
plan_mode: Literal["asset_generation", "asset_retry"] | None = Query(default=None),
db: AsyncSession = Depends(get_db),
):
"""获取内部 executor 执行覆盖率,仅供管理控制面使用。"""
return await get_admin_executor_coverage(
db,
days=days,
plan_mode=plan_mode,
)
@router.get("/harness/readiness", response_model=HarnessReadinessResponse)
async def get_harness_readiness(
days: int | None = Query(default=None, ge=1, le=365),
db: AsyncSession = Depends(get_db),
):
"""获取内部 harness readiness 审查摘要,仅供管理控制面使用。"""
return await get_admin_harness_readiness(db, days=days)
@router.get(
"/generations/jobs/{job_id}/trace",
response_model=AdminGenerationJobTraceResponse,
)
async def get_generation_job_trace(
job_id: str,
db: AsyncSession = Depends(get_db),
):
"""获取完整内部生成链路,仅供管理控制面排查与审查使用。"""
return await get_admin_generation_job_trace(db, job_id=job_id)
@router.get("/providers", response_model=list[ProviderResponse])
async def list_providers(db: AsyncSession = Depends(get_db)):
result = await db.execute(select(Provider))

View File

@@ -24,6 +24,7 @@ from app.schemas.story_schemas import (
GenerationProviderStatsResponse,
GenerationRequest,
GenerationResponse,
GenerationTraceSummaryResponse,
StoryAssetRetryRequest,
StoryAudioStatusResponse,
StorybookRequest,
@@ -37,6 +38,7 @@ from app.services import story_service
from app.services.generation_jobs import (
get_generation_job_detail,
get_story_provider_stats,
get_story_trace_summary,
get_user_generation_ops_summary,
get_user_provider_analytics,
list_story_generation_jobs,
@@ -181,6 +183,25 @@ async def get_generation_provider_stats(
)
@router.get(
"/generations/{story_id}/trace-summary",
response_model=GenerationTraceSummaryResponse,
)
async def get_generation_trace_summary(
story_id: int,
days: int | None = Query(default=None, ge=1, le=365),
user: User = Depends(require_user),
db: AsyncSession = Depends(get_db),
):
"""Get workflow trace summary aggregated from generation job events."""
return await get_story_trace_summary(
db,
story_id=story_id,
user_id=user.id,
days=days,
)
@router.get("/generations/{story_id}", response_model=StoryDetailResponse)
async def get_generation(
story_id: int,