Expand generation harness observability
This commit is contained in:
@@ -1,4 +1,5 @@
|
||||
from typing import Literal
|
||||
from datetime import datetime
|
||||
from typing import Any, Literal
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query
|
||||
from pydantic import BaseModel, ConfigDict, Field
|
||||
@@ -9,6 +10,10 @@ from app.core.admin_auth import admin_guard
|
||||
from app.db.admin_models import Provider
|
||||
from app.db.database import get_db
|
||||
from app.services.adapters.registry import AdapterRegistry
|
||||
from app.services.admin_evaluation_analytics import get_admin_evaluation_analytics
|
||||
from app.services.admin_executor_coverage import get_admin_executor_coverage
|
||||
from app.services.admin_generation_trace import get_admin_generation_job_trace
|
||||
from app.services.admin_harness_readiness import get_admin_harness_readiness
|
||||
from app.services.admin_provider_analytics import get_admin_provider_analytics
|
||||
from app.services.cost_tracker import cost_tracker
|
||||
from app.services.provider_policy import DEFAULT_PROVIDERS, list_capability_policies
|
||||
@@ -103,6 +108,169 @@ class ProviderAnalyticsResponse(BaseModel):
|
||||
by_user: list[ProviderAnalyticsUserBucket]
|
||||
failure_reasons: list[ProviderAnalyticsFailureReason]
|
||||
|
||||
|
||||
class EvaluationAnalyticsArtifactBucket(BaseModel):
|
||||
artifact: str
|
||||
count: int
|
||||
|
||||
|
||||
class EvaluationAnalyticsOutputModeBucket(BaseModel):
|
||||
output_mode: str
|
||||
count: int
|
||||
|
||||
|
||||
class EvaluationAnalyticsScoreBandBucket(BaseModel):
|
||||
band: str
|
||||
count: int
|
||||
|
||||
|
||||
class EvaluationAnalyticsDimensionScore(BaseModel):
|
||||
dimension: str
|
||||
average_score: float
|
||||
count: int
|
||||
|
||||
|
||||
class EvaluationAnalyticsQualityGateIssue(BaseModel):
|
||||
code: str
|
||||
count: int
|
||||
|
||||
|
||||
class EvaluationAnalyticsFailureCategory(BaseModel):
|
||||
category: str
|
||||
count: int
|
||||
|
||||
|
||||
class EvaluationAnalyticsWarning(BaseModel):
|
||||
message: str
|
||||
count: int
|
||||
|
||||
|
||||
class EvaluationAnalyticsResponse(BaseModel):
|
||||
scope: str
|
||||
window_days: int | None = None
|
||||
artifact: str | None = None
|
||||
total_evaluations: int
|
||||
passed_evaluations: int
|
||||
blocked_evaluations: int
|
||||
pass_rate: float
|
||||
average_score: float | None = None
|
||||
job_count: int
|
||||
story_count: int
|
||||
user_count: int
|
||||
by_artifact: list[EvaluationAnalyticsArtifactBucket]
|
||||
by_output_mode: list[EvaluationAnalyticsOutputModeBucket]
|
||||
score_bands: list[EvaluationAnalyticsScoreBandBucket]
|
||||
dimension_scores: list[EvaluationAnalyticsDimensionScore]
|
||||
quality_gate_issues: list[EvaluationAnalyticsQualityGateIssue]
|
||||
failure_categories: list[EvaluationAnalyticsFailureCategory]
|
||||
warnings: list[EvaluationAnalyticsWarning]
|
||||
|
||||
|
||||
class ExecutorCoveragePlanModeBucket(BaseModel):
|
||||
plan_mode: str
|
||||
count: int
|
||||
|
||||
|
||||
class ExecutorCoverageOutputModeBucket(BaseModel):
|
||||
output_mode: str
|
||||
count: int
|
||||
|
||||
|
||||
class ExecutorCoverageTaskKeyBucket(BaseModel):
|
||||
task_key: str
|
||||
count: int
|
||||
|
||||
|
||||
class ExecutorCoverageAssetBucket(BaseModel):
|
||||
asset: str
|
||||
count: int
|
||||
|
||||
|
||||
class ExecutorCoverageResponse(BaseModel):
|
||||
scope: str
|
||||
window_days: int | None = None
|
||||
plan_mode: str | None = None
|
||||
total_runs: int
|
||||
total_planned_tasks: int
|
||||
total_executed_tasks: int
|
||||
total_ignored_tasks: int
|
||||
coverage_ratio: float
|
||||
job_count: int
|
||||
story_count: int
|
||||
user_count: int
|
||||
by_plan_mode: list[ExecutorCoveragePlanModeBucket]
|
||||
by_output_mode: list[ExecutorCoverageOutputModeBucket]
|
||||
executed_task_keys: list[ExecutorCoverageTaskKeyBucket]
|
||||
ignored_task_keys: list[ExecutorCoverageTaskKeyBucket]
|
||||
result_assets: list[ExecutorCoverageAssetBucket]
|
||||
|
||||
|
||||
class AdminGenerationJobEventResponse(BaseModel):
|
||||
id: int
|
||||
job_id: str
|
||||
story_id: int | None = None
|
||||
event_type: str
|
||||
status: str
|
||||
message: str | None = None
|
||||
event_metadata: dict[str, Any] = Field(default_factory=dict)
|
||||
created_at: datetime
|
||||
|
||||
|
||||
class AdminGenerationJobTraceResponse(BaseModel):
|
||||
id: str
|
||||
user_id: str
|
||||
story_id: int | None = None
|
||||
output_mode: str
|
||||
input_type: str
|
||||
status: str
|
||||
current_step: str
|
||||
progress_percent: int
|
||||
progress_label: str
|
||||
is_terminal: bool
|
||||
can_cancel: bool = False
|
||||
can_retry: bool = False
|
||||
result_snapshot: dict[str, Any] = Field(default_factory=dict)
|
||||
error_message: str | None = None
|
||||
request_payload: dict[str, Any] = Field(default_factory=dict)
|
||||
executor_coverage: ExecutorCoverageResponse
|
||||
events: list[AdminGenerationJobEventResponse] = Field(default_factory=list)
|
||||
created_at: datetime
|
||||
updated_at: datetime
|
||||
|
||||
|
||||
class HarnessReadinessCheck(BaseModel):
|
||||
code: str
|
||||
status: Literal["ready", "needs_attention", "blocked"]
|
||||
message: str
|
||||
details: dict[str, Any] = Field(default_factory=dict)
|
||||
|
||||
|
||||
class HarnessReadinessGoldenReplay(BaseModel):
|
||||
passed: bool
|
||||
total_cases: int
|
||||
failed_case_ids: list[str]
|
||||
coverage_summary: dict[str, dict[str, int]] = Field(default_factory=dict)
|
||||
|
||||
|
||||
class HarnessReadinessThresholds(BaseModel):
|
||||
min_runtime_evaluations: int
|
||||
min_executor_runs: int
|
||||
min_evaluation_pass_rate: float
|
||||
min_evaluation_average_score: float
|
||||
min_executor_coverage_ratio: float
|
||||
|
||||
|
||||
class HarnessReadinessResponse(BaseModel):
|
||||
scope: str
|
||||
window_days: int | None = None
|
||||
status: Literal["ready", "needs_attention", "blocked"]
|
||||
thresholds: HarnessReadinessThresholds
|
||||
checks: list[HarnessReadinessCheck]
|
||||
golden_replay: HarnessReadinessGoldenReplay
|
||||
evaluation_analytics: EvaluationAnalyticsResponse
|
||||
executor_coverage: ExecutorCoverageResponse
|
||||
|
||||
|
||||
@router.get("/providers/adapters")
|
||||
async def list_available_adapters():
|
||||
"""获取所有可用的适配器类型 (定义的类)。"""
|
||||
@@ -137,6 +305,55 @@ async def get_provider_analytics(
|
||||
)
|
||||
|
||||
|
||||
@router.get("/evaluations/analytics", response_model=EvaluationAnalyticsResponse)
|
||||
async def get_evaluation_analytics(
|
||||
days: int | None = Query(default=None, ge=1, le=365),
|
||||
artifact: Literal["story_text", "storybook_pages"] | None = Query(default=None),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
):
|
||||
"""获取内部内容评测摘要,仅供管理控制面使用。"""
|
||||
return await get_admin_evaluation_analytics(
|
||||
db,
|
||||
days=days,
|
||||
artifact=artifact,
|
||||
)
|
||||
|
||||
|
||||
@router.get("/executors/coverage", response_model=ExecutorCoverageResponse)
|
||||
async def get_executor_coverage(
|
||||
days: int | None = Query(default=None, ge=1, le=365),
|
||||
plan_mode: Literal["asset_generation", "asset_retry"] | None = Query(default=None),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
):
|
||||
"""获取内部 executor 执行覆盖率,仅供管理控制面使用。"""
|
||||
return await get_admin_executor_coverage(
|
||||
db,
|
||||
days=days,
|
||||
plan_mode=plan_mode,
|
||||
)
|
||||
|
||||
|
||||
@router.get("/harness/readiness", response_model=HarnessReadinessResponse)
|
||||
async def get_harness_readiness(
|
||||
days: int | None = Query(default=None, ge=1, le=365),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
):
|
||||
"""获取内部 harness readiness 审查摘要,仅供管理控制面使用。"""
|
||||
return await get_admin_harness_readiness(db, days=days)
|
||||
|
||||
|
||||
@router.get(
|
||||
"/generations/jobs/{job_id}/trace",
|
||||
response_model=AdminGenerationJobTraceResponse,
|
||||
)
|
||||
async def get_generation_job_trace(
|
||||
job_id: str,
|
||||
db: AsyncSession = Depends(get_db),
|
||||
):
|
||||
"""获取完整内部生成链路,仅供管理控制面排查与审查使用。"""
|
||||
return await get_admin_generation_job_trace(db, job_id=job_id)
|
||||
|
||||
|
||||
@router.get("/providers", response_model=list[ProviderResponse])
|
||||
async def list_providers(db: AsyncSession = Depends(get_db)):
|
||||
result = await db.execute(select(Provider))
|
||||
|
||||
@@ -24,6 +24,7 @@ from app.schemas.story_schemas import (
|
||||
GenerationProviderStatsResponse,
|
||||
GenerationRequest,
|
||||
GenerationResponse,
|
||||
GenerationTraceSummaryResponse,
|
||||
StoryAssetRetryRequest,
|
||||
StoryAudioStatusResponse,
|
||||
StorybookRequest,
|
||||
@@ -37,6 +38,7 @@ from app.services import story_service
|
||||
from app.services.generation_jobs import (
|
||||
get_generation_job_detail,
|
||||
get_story_provider_stats,
|
||||
get_story_trace_summary,
|
||||
get_user_generation_ops_summary,
|
||||
get_user_provider_analytics,
|
||||
list_story_generation_jobs,
|
||||
@@ -181,6 +183,25 @@ async def get_generation_provider_stats(
|
||||
)
|
||||
|
||||
|
||||
@router.get(
|
||||
"/generations/{story_id}/trace-summary",
|
||||
response_model=GenerationTraceSummaryResponse,
|
||||
)
|
||||
async def get_generation_trace_summary(
|
||||
story_id: int,
|
||||
days: int | None = Query(default=None, ge=1, le=365),
|
||||
user: User = Depends(require_user),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
):
|
||||
"""Get workflow trace summary aggregated from generation job events."""
|
||||
return await get_story_trace_summary(
|
||||
db,
|
||||
story_id=story_id,
|
||||
user_id=user.id,
|
||||
days=days,
|
||||
)
|
||||
|
||||
|
||||
@router.get("/generations/{story_id}", response_model=StoryDetailResponse)
|
||||
async def get_generation(
|
||||
story_id: int,
|
||||
|
||||
Reference in New Issue
Block a user