Expand generation harness observability

This commit is contained in:
2026-06-24 10:48:23 +08:00
parent 459ca9edef
commit 1f34d80083
35 changed files with 8003 additions and 112 deletions

View File

@@ -0,0 +1,204 @@
"""Admin-only analytics for internal generation evaluation events."""
from __future__ import annotations
from datetime import datetime, timedelta, timezone
from typing import Any
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
from app.db.models import GenerationJob, GenerationJobEvent
def _as_float(value: Any) -> float | None:
if isinstance(value, int | float):
return float(value)
return None
def _sorted_count_buckets(counts: dict[str, int], *, key_name: str) -> list[dict[str, Any]]:
return [
{key_name: name, "count": count}
for name, count in sorted(
counts.items(),
key=lambda item: (-item[1], item[0]),
)
]
def _average_bucket(
totals: dict[str, float],
counts: dict[str, int],
*,
key_name: str,
) -> list[dict[str, Any]]:
rows = [
{
key_name: name,
"average_score": round(totals[name] / counts[name], 4),
"count": counts[name],
}
for name in totals
if counts.get(name)
]
rows.sort(key=lambda item: (-int(item["count"]), str(item[key_name])))
return rows
def _score_band(score: float) -> str:
if score >= 0.9:
return "excellent"
if score >= 0.8:
return "good"
if score >= 0.7:
return "pass"
if score > 0:
return "blocked_low_score"
return "blocked_quality_gate"
def _metadata_scores(metadata: dict[str, Any]) -> list[dict[str, Any]]:
raw_scores = metadata.get("scores")
if not isinstance(raw_scores, list):
return []
return [score for score in raw_scores if isinstance(score, dict)]
def _quality_gate_issues(metadata: dict[str, Any]) -> list[dict[str, Any]]:
quality_gate = metadata.get("quality_gate")
if not isinstance(quality_gate, dict):
return []
raw_issues = quality_gate.get("issues")
if not isinstance(raw_issues, list):
return []
return [issue for issue in raw_issues if isinstance(issue, dict)]
async def get_admin_evaluation_analytics(
db: AsyncSession,
*,
days: int | None = None,
artifact: str | None = None,
) -> dict[str, Any]:
"""Aggregate internal evaluation results for the admin control plane."""
cutoff = datetime.now(timezone.utc) - timedelta(days=days) if days is not None else None
query = (
select(GenerationJobEvent, GenerationJob)
.join(GenerationJob, GenerationJobEvent.job_id == GenerationJob.id)
.where(GenerationJobEvent.event_type == "evaluation_completed")
.order_by(GenerationJobEvent.id)
)
if cutoff is not None:
query = query.where(GenerationJobEvent.created_at >= cutoff)
rows = (await db.execute(query)).all()
total_evaluations = 0
passed_evaluations = 0
blocked_evaluations = 0
score_total = 0.0
score_count = 0
job_ids: set[str] = set()
story_ids: set[int] = set()
user_ids: set[str] = set()
artifacts: dict[str, int] = {}
output_modes: dict[str, int] = {}
score_bands: dict[str, int] = {}
dimension_totals: dict[str, float] = {}
dimension_counts: dict[str, int] = {}
quality_gate_codes: dict[str, int] = {}
failure_categories: dict[str, int] = {}
warning_counts: dict[str, int] = {}
for event, job in rows:
metadata = event.event_metadata or {}
event_artifact = str(metadata.get("artifact") or "unknown")
if artifact is not None and event_artifact != artifact:
continue
total_evaluations += 1
job_ids.add(job.id)
user_ids.add(job.user_id)
if event.story_id is not None:
story_ids.add(int(event.story_id))
elif job.story_id is not None:
story_ids.add(int(job.story_id))
artifacts[event_artifact] = artifacts.get(event_artifact, 0) + 1
output_modes[job.output_mode] = output_modes.get(job.output_mode, 0) + 1
passed = metadata.get("passed") is True
blocking = metadata.get("blocking") is True
if passed:
passed_evaluations += 1
if blocking:
blocked_evaluations += 1
overall_score = _as_float(metadata.get("overall_score"))
if overall_score is not None:
score_total += overall_score
score_count += 1
band = _score_band(overall_score)
score_bands[band] = score_bands.get(band, 0) + 1
for score in _metadata_scores(metadata):
dimension = score.get("dimension")
dimension_score = _as_float(score.get("score"))
if not isinstance(dimension, str) or dimension_score is None:
continue
dimension_totals[dimension] = dimension_totals.get(dimension, 0.0) + dimension_score
dimension_counts[dimension] = dimension_counts.get(dimension, 0) + 1
for issue in _quality_gate_issues(metadata):
code = issue.get("code")
if isinstance(code, str) and code:
quality_gate_codes[code] = quality_gate_codes.get(code, 0) + 1
failure_category = issue.get("failure_category")
if isinstance(failure_category, str) and failure_category:
failure_categories[failure_category] = (
failure_categories.get(failure_category, 0) + 1
)
warnings = metadata.get("warnings")
if isinstance(warnings, list):
for warning in warnings:
if isinstance(warning, str) and warning:
warning_counts[warning] = warning_counts.get(warning, 0) + 1
return {
"scope": "admin_internal_evaluations",
"window_days": days,
"artifact": artifact,
"total_evaluations": total_evaluations,
"passed_evaluations": passed_evaluations,
"blocked_evaluations": blocked_evaluations,
"pass_rate": (
round(passed_evaluations / total_evaluations, 4)
if total_evaluations
else 0.0
),
"average_score": round(score_total / score_count, 4) if score_count else None,
"job_count": len(job_ids),
"story_count": len(story_ids),
"user_count": len(user_ids),
"by_artifact": _sorted_count_buckets(artifacts, key_name="artifact"),
"by_output_mode": _sorted_count_buckets(output_modes, key_name="output_mode"),
"score_bands": _sorted_count_buckets(score_bands, key_name="band"),
"dimension_scores": _average_bucket(
dimension_totals,
dimension_counts,
key_name="dimension",
),
"quality_gate_issues": _sorted_count_buckets(
quality_gate_codes,
key_name="code",
),
"failure_categories": _sorted_count_buckets(
failure_categories,
key_name="category",
),
"warnings": _sorted_count_buckets(warning_counts, key_name="message"),
}

View File

@@ -0,0 +1,147 @@
"""Admin-only analytics for internal workflow executor coverage."""
from __future__ import annotations
from collections.abc import Iterable
from datetime import datetime, timedelta, timezone
from typing import Any
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
from app.db.models import GenerationJob, GenerationJobEvent
def _as_int(value: Any) -> int:
if isinstance(value, bool):
return int(value)
if isinstance(value, int):
return value
if isinstance(value, float):
return int(value)
return 0
def _sorted_count_buckets(counts: dict[str, int], *, key_name: str) -> list[dict[str, Any]]:
return [
{key_name: name, "count": count}
for name, count in sorted(
counts.items(),
key=lambda item: (-item[1], item[0]),
)
]
def _iter_strings(value: Any) -> Iterable[str]:
if not isinstance(value, list | tuple | set):
return
for item in value:
if isinstance(item, str) and item:
yield item
def summarize_executor_coverage_rows(
rows: Iterable[tuple[GenerationJobEvent, GenerationJob]],
*,
days: int | None = None,
plan_mode: str | None = None,
scope: str = "admin_internal_executor_coverage",
) -> dict[str, Any]:
"""Aggregate internal executor coverage rows into an admin-only summary."""
total_runs = 0
total_planned_tasks = 0
total_executed_tasks = 0
total_ignored_tasks = 0
job_ids: set[str] = set()
story_ids: set[int] = set()
user_ids: set[str] = set()
by_plan_mode: dict[str, int] = {}
by_output_mode: dict[str, int] = {}
executed_task_keys: dict[str, int] = {}
ignored_task_keys: dict[str, int] = {}
result_assets: dict[str, int] = {}
for event, job in rows:
metadata = event.event_metadata or {}
event_plan_mode = str(metadata.get("plan_mode") or "unknown")
if plan_mode is not None and event_plan_mode != plan_mode:
continue
total_runs += 1
job_ids.add(job.id)
user_ids.add(job.user_id)
if event.story_id is not None:
story_ids.add(int(event.story_id))
elif job.story_id is not None:
story_ids.add(int(job.story_id))
by_plan_mode[event_plan_mode] = by_plan_mode.get(event_plan_mode, 0) + 1
by_output_mode[job.output_mode] = by_output_mode.get(job.output_mode, 0) + 1
total_planned_tasks += _as_int(metadata.get("planned_task_count"))
total_executed_tasks += _as_int(metadata.get("executed_task_count"))
total_ignored_tasks += _as_int(metadata.get("ignored_task_count"))
for key in _iter_strings(metadata.get("executed_task_keys")):
executed_task_keys[key] = executed_task_keys.get(key, 0) + 1
for key in _iter_strings(metadata.get("ignored_task_keys")):
ignored_task_keys[key] = ignored_task_keys.get(key, 0) + 1
for asset in _iter_strings(metadata.get("result_assets")):
result_assets[asset] = result_assets.get(asset, 0) + 1
coverage_ratio = (
round(total_executed_tasks / total_planned_tasks, 4)
if total_planned_tasks
else 0.0
)
return {
"scope": scope,
"window_days": days,
"plan_mode": plan_mode,
"total_runs": total_runs,
"total_planned_tasks": total_planned_tasks,
"total_executed_tasks": total_executed_tasks,
"total_ignored_tasks": total_ignored_tasks,
"coverage_ratio": coverage_ratio,
"job_count": len(job_ids),
"story_count": len(story_ids),
"user_count": len(user_ids),
"by_plan_mode": _sorted_count_buckets(by_plan_mode, key_name="plan_mode"),
"by_output_mode": _sorted_count_buckets(by_output_mode, key_name="output_mode"),
"executed_task_keys": _sorted_count_buckets(
executed_task_keys,
key_name="task_key",
),
"ignored_task_keys": _sorted_count_buckets(
ignored_task_keys,
key_name="task_key",
),
"result_assets": _sorted_count_buckets(result_assets, key_name="asset"),
}
async def get_admin_executor_coverage(
db: AsyncSession,
*,
days: int | None = None,
plan_mode: str | None = None,
) -> dict[str, Any]:
"""Aggregate internal executor coverage events for the admin control plane."""
cutoff = datetime.now(timezone.utc) - timedelta(days=days) if days is not None else None
query = (
select(GenerationJobEvent, GenerationJob)
.join(GenerationJob, GenerationJobEvent.job_id == GenerationJob.id)
.where(GenerationJobEvent.event_type == "executor_completed")
.order_by(GenerationJobEvent.id)
)
if cutoff is not None:
query = query.where(GenerationJobEvent.created_at >= cutoff)
rows = (await db.execute(query)).all()
return summarize_executor_coverage_rows(rows, days=days, plan_mode=plan_mode)

View File

@@ -0,0 +1,52 @@
"""Admin-only generation trace detail service."""
from __future__ import annotations
from typing import Any
from fastapi import HTTPException
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
from app.db.models import GenerationJob, GenerationJobEvent
from app.services.admin_executor_coverage import summarize_executor_coverage_rows
from app.services.generation_jobs import (
generation_event_to_response,
generation_job_to_summary,
)
async def get_admin_generation_job_trace(
db: AsyncSession,
*,
job_id: str,
) -> dict[str, Any]:
"""Return a complete internal generation trace for the admin control plane."""
job = (
await db.execute(select(GenerationJob).where(GenerationJob.id == job_id))
).scalar_one_or_none()
if job is None:
raise HTTPException(status_code=404, detail="Generation job not found")
events = (
await db.execute(
select(GenerationJobEvent)
.where(GenerationJobEvent.job_id == job.id)
.order_by(GenerationJobEvent.id)
)
).scalars().all()
executor_rows = [
(event, job) for event in events if event.event_type == "executor_completed"
]
return {
**generation_job_to_summary(job),
"user_id": job.user_id,
"request_payload": job.request_payload or {},
"executor_coverage": summarize_executor_coverage_rows(
executor_rows,
scope="admin_internal_job_executor_coverage",
),
"events": [generation_event_to_response(event) for event in events],
}

View File

@@ -0,0 +1,262 @@
"""Admin-only readiness audit for harness-driven generation."""
from __future__ import annotations
from pathlib import Path
from typing import Any
from sqlalchemy.ext.asyncio import AsyncSession
from app.services.admin_evaluation_analytics import get_admin_evaluation_analytics
from app.services.admin_executor_coverage import get_admin_executor_coverage
from app.services.harness.evaluation_replay import replay_evaluation_golden_cases
_GOLDEN_CASES_PATH = (
Path(__file__).resolve().parent
/ "harness"
/ "fixtures"
/ "evaluation_golden_cases.json"
)
_MIN_RUNTIME_EVALUATIONS = 1
_MIN_EXECUTOR_RUNS = 1
_MIN_EVALUATION_PASS_RATE = 0.7
_MIN_EVALUATION_AVERAGE_SCORE = 0.7
_MIN_EXECUTOR_COVERAGE_RATIO = 0.2
def _check(
*,
code: str,
status: str,
message: str,
details: dict[str, Any] | None = None,
) -> dict[str, Any]:
return {
"code": code,
"status": status,
"message": message,
"details": details or {},
}
def _overall_status(checks: list[dict[str, Any]]) -> str:
statuses = {check["status"] for check in checks}
if "blocked" in statuses:
return "blocked"
if "needs_attention" in statuses:
return "needs_attention"
return "ready"
def _run_golden_replay() -> dict[str, Any]:
if not _GOLDEN_CASES_PATH.exists():
return {
"passed": False,
"total_cases": 0,
"failed_case_ids": ["fixture_missing"],
"coverage_summary": {},
}
result = replay_evaluation_golden_cases(_GOLDEN_CASES_PATH)
return {
"passed": result.passed,
"total_cases": len(result.cases),
"failed_case_ids": list(result.failed_case_ids),
"coverage_summary": result.coverage_summary(),
}
def _golden_replay_check(golden_replay: dict[str, Any]) -> dict[str, Any]:
if golden_replay["passed"] and golden_replay["total_cases"] > 0:
return _check(
code="golden_replay",
status="ready",
message="内部 golden replay 全部通过。",
details={
"total_cases": golden_replay["total_cases"],
"failed_case_count": len(golden_replay["failed_case_ids"]),
},
)
return _check(
code="golden_replay",
status="blocked",
message="内部 golden replay 未通过,暂停扩大 harness 接管范围。",
details={
"total_cases": golden_replay["total_cases"],
"failed_case_count": len(golden_replay["failed_case_ids"]),
"failed_case_ids": golden_replay["failed_case_ids"],
},
)
def _evaluation_sample_check(evaluation_analytics: dict[str, Any]) -> dict[str, Any]:
total = int(evaluation_analytics["total_evaluations"])
if total >= _MIN_RUNTIME_EVALUATIONS:
return _check(
code="runtime_evaluation_samples",
status="ready",
message="当前窗口已有内部 evaluation 运行样本。",
details={
"total_evaluations": total,
"min_required": _MIN_RUNTIME_EVALUATIONS,
},
)
return _check(
code="runtime_evaluation_samples",
status="needs_attention",
message="当前窗口缺少内部 evaluation 运行样本,建议先跑生成烟测。",
details={
"total_evaluations": total,
"min_required": _MIN_RUNTIME_EVALUATIONS,
},
)
def _evaluation_quality_check(evaluation_analytics: dict[str, Any]) -> dict[str, Any]:
total = int(evaluation_analytics["total_evaluations"])
pass_rate = float(evaluation_analytics["pass_rate"])
average_score = evaluation_analytics["average_score"]
if total == 0:
return _check(
code="runtime_evaluation_quality",
status="needs_attention",
message="暂无运行期 evaluation 质量样本。",
details={
"total_evaluations": total,
"min_pass_rate": _MIN_EVALUATION_PASS_RATE,
"min_average_score": _MIN_EVALUATION_AVERAGE_SCORE,
},
)
if pass_rate < _MIN_EVALUATION_PASS_RATE or (
average_score is not None
and float(average_score) < _MIN_EVALUATION_AVERAGE_SCORE
):
return _check(
code="runtime_evaluation_quality",
status="blocked",
message="运行期 evaluation 质量未达到内部 readiness 门槛。",
details={
"pass_rate": pass_rate,
"average_score": average_score,
"blocked_evaluations": evaluation_analytics["blocked_evaluations"],
"min_pass_rate": _MIN_EVALUATION_PASS_RATE,
"min_average_score": _MIN_EVALUATION_AVERAGE_SCORE,
},
)
return _check(
code="runtime_evaluation_quality",
status="ready",
message="运行期 evaluation 通过率和平均分达到内部 readiness 门槛。",
details={
"pass_rate": pass_rate,
"average_score": average_score,
"blocked_evaluations": evaluation_analytics["blocked_evaluations"],
},
)
def _executor_sample_check(executor_coverage: dict[str, Any]) -> dict[str, Any]:
total_runs = int(executor_coverage["total_runs"])
if total_runs >= _MIN_EXECUTOR_RUNS:
return _check(
code="executor_coverage_samples",
status="ready",
message="当前窗口已有 executor coverage 运行样本。",
details={
"total_runs": total_runs,
"min_required": _MIN_EXECUTOR_RUNS,
},
)
return _check(
code="executor_coverage_samples",
status="needs_attention",
message="当前窗口缺少 executor coverage 样本,建议先跑资产生成或重试烟测。",
details={
"total_runs": total_runs,
"min_required": _MIN_EXECUTOR_RUNS,
},
)
def _executor_ratio_check(executor_coverage: dict[str, Any]) -> dict[str, Any]:
total_runs = int(executor_coverage["total_runs"])
coverage_ratio = float(executor_coverage["coverage_ratio"])
if total_runs == 0:
return _check(
code="executor_coverage_ratio",
status="needs_attention",
message="暂无 executor coverage 运行样本。",
details={
"total_runs": total_runs,
"min_coverage_ratio": _MIN_EXECUTOR_COVERAGE_RATIO,
},
)
if coverage_ratio < _MIN_EXECUTOR_COVERAGE_RATIO:
return _check(
code="executor_coverage_ratio",
status="blocked",
message="executor coverage ratio 未达到内部 readiness 门槛。",
details={
"coverage_ratio": coverage_ratio,
"min_coverage_ratio": _MIN_EXECUTOR_COVERAGE_RATIO,
"total_planned_tasks": executor_coverage["total_planned_tasks"],
"total_executed_tasks": executor_coverage["total_executed_tasks"],
},
)
return _check(
code="executor_coverage_ratio",
status="ready",
message="executor coverage ratio 达到内部 readiness 门槛。",
details={
"coverage_ratio": coverage_ratio,
"total_planned_tasks": executor_coverage["total_planned_tasks"],
"total_executed_tasks": executor_coverage["total_executed_tasks"],
},
)
async def get_admin_harness_readiness(
db: AsyncSession,
*,
days: int | None = None,
) -> dict[str, Any]:
"""Return an admin-only readiness audit for harness release decisions."""
golden_replay = _run_golden_replay()
evaluation_analytics = await get_admin_evaluation_analytics(db, days=days)
executor_coverage = await get_admin_executor_coverage(db, days=days)
checks = [
_golden_replay_check(golden_replay),
_evaluation_sample_check(evaluation_analytics),
_evaluation_quality_check(evaluation_analytics),
_executor_sample_check(executor_coverage),
_executor_ratio_check(executor_coverage),
]
return {
"scope": "admin_internal_harness_readiness",
"window_days": days,
"status": _overall_status(checks),
"thresholds": {
"min_runtime_evaluations": _MIN_RUNTIME_EVALUATIONS,
"min_executor_runs": _MIN_EXECUTOR_RUNS,
"min_evaluation_pass_rate": _MIN_EVALUATION_PASS_RATE,
"min_evaluation_average_score": _MIN_EVALUATION_AVERAGE_SCORE,
"min_executor_coverage_ratio": _MIN_EXECUTOR_COVERAGE_RATIO,
},
"checks": checks,
"golden_replay": golden_replay,
"evaluation_analytics": evaluation_analytics,
"executor_coverage": executor_coverage,
}

View File

@@ -90,11 +90,13 @@ def _job_progress(job: GenerationJob) -> dict[str, Any]:
progress_map: dict[str, tuple[int, str]] = {
"request_accepted": (5, "已接收请求"),
"workflow_planned": (8, "工作流已规划"),
"retry_queued": (8, "重新排队中"),
"worker_started": (12, "后台任务已开始"),
"cancel_requested": (15, "已请求取消"),
"context_prepared": (20, "上下文已准备"),
"narrative_generated": (45, "正文已生成"),
"evaluation_completed": (52, "内容评测已完成"),
"story_saved": (60, "主记录已保存"),
"provider_call_started": (65, "Provider 调用中"),
"provider_call_succeeded": (72, "Provider 调用成功"),
@@ -307,6 +309,137 @@ def generation_event_to_response(event: GenerationJobEvent) -> dict[str, Any]:
}
_PUBLIC_EVENT_METADATA_KEYS = {
"adapter",
"artifact",
"asset",
"assets",
"attempted_cover",
"audio_status",
"blocks_main_result",
"capability",
"completed_pages",
"cover_prompt_present",
"estimated_cost_usd",
"failed_pages",
"failure_category",
"generation_status",
"has_memory_context",
"image_status",
"input_type",
"latency_ms",
"mode",
"output_mode",
"page_count",
"page_number",
"recoverable",
"requested_from_step",
"retryable",
"scope",
"stale_after_minutes",
"status",
"step",
"strategy",
"text_status",
}
_PUBLIC_REQUEST_PAYLOAD_KEYS = {
"assets",
"child_profile_id",
"generate_images",
"input_type",
"output_mode",
"page_count",
"story_id",
"type",
"universe_id",
}
def _public_metadata_value(value: Any) -> Any:
"""Return a JSON-safe public value or None when the value is internal."""
if isinstance(value, str | int | float | bool) or value is None:
return value
if isinstance(value, list):
public_items = [
item
for item in value
if isinstance(item, str | int | float | bool) or item is None
]
return public_items
return None
def public_generation_request_payload(job: GenerationJob) -> dict[str, Any]:
"""Return request payload fields safe for user-facing job details."""
payload = job.request_payload or {}
public_payload: dict[str, Any] = {}
for key in sorted(_PUBLIC_REQUEST_PAYLOAD_KEYS):
if key not in payload:
continue
value = _public_metadata_value(payload[key])
if value is not None:
public_payload[key] = value
return public_payload
def _public_plan_metadata(metadata: dict[str, Any]) -> dict[str, Any]:
"""Expose only coarse workflow plan metadata to user-facing responses."""
plan = metadata.get("plan")
if not isinstance(plan, dict):
return {}
public: dict[str, Any] = {}
mode = plan.get("mode")
if isinstance(mode, str):
public["plan_mode"] = mode
tasks = plan.get("tasks")
if isinstance(tasks, list):
public["planned_task_count"] = len(tasks)
public["recoverable_task_count"] = sum(
1
for task in tasks
if isinstance(task, dict) and task.get("recoverable") is True
)
return public
def public_generation_event_metadata(event: GenerationJobEvent) -> dict[str, Any]:
"""Return event metadata safe for user-facing job event streams."""
metadata = event.event_metadata or {}
public_metadata: dict[str, Any] = {}
for key in sorted(_PUBLIC_EVENT_METADATA_KEYS):
if key not in metadata:
continue
value = _public_metadata_value(metadata[key])
if value is not None:
public_metadata[key] = value
if event.event_type == "workflow_planned":
public_metadata.update(_public_plan_metadata(metadata))
return public_metadata
def public_generation_event_to_response(event: GenerationJobEvent) -> dict[str, Any] | None:
"""Convert a generation event for user-facing APIs with internal data removed."""
if event.event_type in {"evaluation_completed", "executor_completed"}:
return None
response = generation_event_to_response(event)
response["event_metadata"] = public_generation_event_metadata(event)
return response
def generation_job_to_summary(job: GenerationJob) -> dict[str, Any]:
"""Convert a generation job ORM object to an API summary dict."""
@@ -328,6 +461,23 @@ def generation_job_to_summary(job: GenerationJob) -> dict[str, Any]:
}
def public_generation_job_to_summary(job: GenerationJob) -> dict[str, Any]:
"""Convert a generation job for user-facing APIs with internal steps hidden."""
summary = generation_job_to_summary(job)
if summary["current_step"] == "evaluation_completed":
summary["current_step"] = "narrative_generated"
summary["progress_percent"] = 45
summary["progress_label"] = "正文已生成"
summary["is_terminal"] = False
elif summary["current_step"] == "executor_completed":
summary["current_step"] = "workflow_planned"
summary["progress_percent"] = 8
summary["progress_label"] = "工作流已规划"
summary["is_terminal"] = False
return summary
async def get_generation_job_for_user(
db: AsyncSession,
*,
@@ -362,13 +512,13 @@ async def request_generation_job_cancel(
raise HTTPException(status_code=409, detail="当前任务不支持取消")
if job.status == "canceled":
return generation_job_to_summary(job)
return public_generation_job_to_summary(job)
if _is_terminal_status(job.status):
raise HTTPException(status_code=409, detail="当前任务已终止,无法取消")
if job.current_step == "cancel_requested":
return generation_job_to_summary(job)
return public_generation_job_to_summary(job)
if job.current_step in {"request_accepted", "retry_queued"}:
story = None
@@ -391,7 +541,7 @@ async def request_generation_job_cancel(
error_message="Generation canceled by user before worker execution started.",
message="Generation job was canceled before worker execution started.",
)
return generation_job_to_summary(job)
return public_generation_job_to_summary(job)
previous_step = job.current_step
job.error_message = "Cancellation requested by user."
@@ -407,7 +557,7 @@ async def request_generation_job_cancel(
)
await db.commit()
await db.refresh(job)
return generation_job_to_summary(job)
return public_generation_job_to_summary(job)
async def get_generation_job_detail(
@@ -437,9 +587,13 @@ async def get_generation_job_detail(
).scalars().all()
return {
**generation_job_to_summary(job),
"request_payload": job.request_payload or {},
"events": [generation_event_to_response(event) for event in events],
**public_generation_job_to_summary(job),
"request_payload": public_generation_request_payload(job),
"events": [
response
for event in events
if (response := public_generation_event_to_response(event)) is not None
],
}
@@ -461,7 +615,7 @@ async def list_story_generation_jobs(
.order_by(desc(GenerationJob.created_at), desc(GenerationJob.id))
)
).scalars().all()
return [generation_job_to_summary(job) for job in jobs]
return [public_generation_job_to_summary(job) for job in jobs]
async def get_active_story_generation_job(
@@ -513,6 +667,59 @@ def _as_float(value: Any) -> float | None:
return None
def _sorted_buckets(counts: dict[str, int]) -> list[dict[str, Any]]:
return [
{"name": name, "count": count}
for name, count in sorted(
counts.items(),
key=lambda item: (-item[1], item[0]),
)
]
def _aggregate_trace_events(events: list[GenerationJobEvent]) -> dict[str, Any]:
"""Aggregate workflow trace metadata across job events."""
by_step: dict[str, int] = {}
by_artifact: dict[str, int] = {}
failure_categories: dict[str, int] = {}
failed_events = 0
total_events = 0
for event in events:
if event.event_type in {"evaluation_completed", "executor_completed"}:
continue
total_events += 1
metadata = event.event_metadata or {}
step = metadata.get("step")
artifact = metadata.get("artifact")
failure_category = metadata.get("failure_category")
if isinstance(step, str) and step:
by_step[step] = by_step.get(step, 0) + 1
if isinstance(artifact, str) and artifact and artifact != "none":
by_artifact[artifact] = by_artifact.get(artifact, 0) + 1
if event.status == "failed":
failed_events += 1
category = (
failure_category
if isinstance(failure_category, str) and failure_category
else "unknown_error"
)
failure_categories[category] = failure_categories.get(category, 0) + 1
return {
"total_events": total_events,
"failed_events": failed_events,
"by_step": _sorted_buckets(by_step),
"by_artifact": _sorted_buckets(by_artifact),
"failure_categories": _sorted_buckets(failure_categories),
}
def _aggregate_provider_events(
events: list[GenerationJobEvent],
*,
@@ -679,6 +886,38 @@ async def get_story_provider_stats(
}
async def get_story_trace_summary(
db: AsyncSession,
*,
story_id: int,
user_id: str,
days: int | None = None,
) -> dict[str, Any]:
"""Aggregate workflow trace metadata from all user-owned jobs for one story."""
query = (
select(GenerationJobEvent)
.join(GenerationJob, GenerationJobEvent.job_id == GenerationJob.id)
.where(
GenerationJob.story_id == story_id,
GenerationJob.user_id == user_id,
)
.order_by(GenerationJobEvent.id)
)
if days is not None:
cutoff = datetime.now(timezone.utc) - timedelta(days=days)
query = query.where(GenerationJobEvent.created_at >= cutoff)
events = (await db.execute(query)).scalars().all()
return {
"story_id": story_id,
"window_days": days,
**_aggregate_trace_events(events),
}
async def get_user_provider_analytics(
db: AsyncSession,
*,

View File

@@ -0,0 +1,322 @@
"""Internal golden-case replay support for harness evaluations.
The replay helpers are intentionally not wired to user-facing APIs. They exist
to make evaluation behavior reproducible in tests and internal tooling.
"""
import json
from collections import Counter
from dataclasses import dataclass, field
from enum import StrEnum
from pathlib import Path
from typing import Any, Iterable
from app.services.adapters.storybook.primary import Storybook, StorybookPage
from app.services.adapters.text.models import StoryOutput
from app.services.harness.evaluators import (
EvaluationDimension,
EvaluationResult,
evaluate_story_output,
evaluate_storybook_output,
)
class EvaluationReplayArtifact(StrEnum):
"""Artifacts supported by deterministic evaluation replay."""
STORY = "story"
STORYBOOK = "storybook"
@dataclass(frozen=True)
class ExpectedEvaluation:
"""Expected evaluation outcome for one golden case."""
passed: bool
blocking: bool
min_overall_score: float | None = None
max_overall_score: float | None = None
required_dimensions: tuple[EvaluationDimension, ...] = field(default_factory=tuple)
quality_gate_codes: tuple[str, ...] = field(default_factory=tuple)
warning_substrings: tuple[str, ...] = field(default_factory=tuple)
@classmethod
def from_payload(cls, payload: dict[str, Any]) -> "ExpectedEvaluation":
"""Build expectations from a JSON-safe payload."""
return cls(
passed=bool(payload["passed"]),
blocking=bool(payload["blocking"]),
min_overall_score=payload.get("min_overall_score"),
max_overall_score=payload.get("max_overall_score"),
required_dimensions=tuple(
EvaluationDimension(dimension)
for dimension in payload.get("required_dimensions", [])
),
quality_gate_codes=tuple(payload.get("quality_gate_codes", [])),
warning_substrings=tuple(payload.get("warning_substrings", [])),
)
@dataclass(frozen=True)
class EvaluationReplayCoverage:
"""Internal coverage labels for one golden replay case."""
age_band: str = "unknown"
content_shape: str = "unknown"
risk_area: str = "unknown"
tags: tuple[str, ...] = field(default_factory=tuple)
@classmethod
def from_payload(cls, payload: dict[str, Any] | None) -> "EvaluationReplayCoverage":
"""Build coverage labels from a JSON-safe payload."""
payload = payload or {}
return cls(
age_band=str(payload.get("age_band", "unknown")),
content_shape=str(payload.get("content_shape", "unknown")),
risk_area=str(payload.get("risk_area", "unknown")),
tags=tuple(str(tag) for tag in payload.get("tags", [])),
)
@dataclass(frozen=True)
class EvaluationReplayCase:
"""One internal golden evaluation case."""
case_id: str
artifact: EvaluationReplayArtifact
output_payload: dict[str, Any]
expected: ExpectedEvaluation
education_theme: str | None = None
minimum_score: float = 0.7
description: str = ""
input_payload: dict[str, Any] = field(default_factory=dict)
coverage: EvaluationReplayCoverage = field(default_factory=EvaluationReplayCoverage)
@classmethod
def from_payload(cls, payload: dict[str, Any]) -> "EvaluationReplayCase":
"""Build a replay case from a JSON-safe payload."""
input_payload = dict(payload.get("input", {}))
minimum_score = input_payload.get("minimum_score", payload.get("minimum_score", 0.7))
education_theme = input_payload.get("education_theme", payload.get("education_theme"))
return cls(
case_id=str(payload["id"]),
artifact=EvaluationReplayArtifact(payload["artifact"]),
description=str(payload.get("description", "")),
input_payload=input_payload,
output_payload=dict(payload["output"]),
education_theme=education_theme,
minimum_score=float(minimum_score),
expected=ExpectedEvaluation.from_payload(payload["expected"]),
coverage=EvaluationReplayCoverage.from_payload(payload.get("coverage")),
)
def evaluate(self) -> EvaluationResult:
"""Run the deterministic evaluator for this case."""
if self.artifact == EvaluationReplayArtifact.STORY:
return evaluate_story_output(
_story_output_from_payload(self.output_payload),
education_theme=self.education_theme,
minimum_score=self.minimum_score,
)
return evaluate_storybook_output(
_storybook_from_payload(self.output_payload),
education_theme=self.education_theme,
minimum_score=self.minimum_score,
)
def replay(self) -> "EvaluationReplayCaseResult":
"""Evaluate the case and compare it with expected outcomes."""
evaluation = self.evaluate()
failures = tuple(_compare_evaluation(self, evaluation))
return EvaluationReplayCaseResult(
case_id=self.case_id,
artifact=self.artifact,
coverage=self.coverage,
evaluation=evaluation,
failures=failures,
)
@dataclass(frozen=True)
class EvaluationReplayCaseResult:
"""Replay result for one golden case."""
case_id: str
artifact: EvaluationReplayArtifact
coverage: EvaluationReplayCoverage
evaluation: EvaluationResult
failures: tuple[str, ...] = field(default_factory=tuple)
@property
def expectations_met(self) -> bool:
"""Return whether the case matched all expectations."""
return not self.failures
@dataclass(frozen=True)
class EvaluationReplaySuiteResult:
"""Replay result for a set of golden cases."""
cases: tuple[EvaluationReplayCaseResult, ...]
@property
def passed(self) -> bool:
"""Return whether every replay case matched expectations."""
return all(case.expectations_met for case in self.cases)
@property
def failed_case_ids(self) -> tuple[str, ...]:
"""Return case IDs with expectation mismatches."""
return tuple(case.case_id for case in self.cases if not case.expectations_met)
def failure_report(self) -> str:
"""Return a compact failure report for assertion messages."""
lines: list[str] = []
for case in self.cases:
for failure in case.failures:
lines.append(f"{case.case_id}: {failure}")
return "\n".join(lines)
def coverage_summary(self) -> dict[str, dict[str, int]]:
"""Return internal coverage counts for golden replay review."""
return {
"artifact": _count_values(case.artifact.value for case in self.cases),
"age_band": _count_values(case.coverage.age_band for case in self.cases),
"content_shape": _count_values(
case.coverage.content_shape for case in self.cases
),
"risk_area": _count_values(case.coverage.risk_area for case in self.cases),
"tags": _count_values(
tag for case in self.cases for tag in case.coverage.tags
),
"outcome": _count_values(
"passed" if case.evaluation.passed else "blocked"
for case in self.cases
),
}
def load_evaluation_replay_cases(path: str | Path) -> tuple[EvaluationReplayCase, ...]:
"""Load internal golden replay cases from a JSON file."""
raw_cases = json.loads(Path(path).read_text(encoding="utf-8"))
if not isinstance(raw_cases, list):
raise ValueError("Evaluation replay fixture must be a JSON array.")
return tuple(EvaluationReplayCase.from_payload(item) for item in raw_cases)
def run_evaluation_replay_cases(
cases: Iterable[EvaluationReplayCase],
) -> EvaluationReplaySuiteResult:
"""Run a set of internal golden evaluation replay cases."""
return EvaluationReplaySuiteResult(cases=tuple(case.replay() for case in cases))
def replay_evaluation_golden_cases(path: str | Path) -> EvaluationReplaySuiteResult:
"""Load and run internal golden evaluation replay cases."""
return run_evaluation_replay_cases(load_evaluation_replay_cases(path))
def _story_output_from_payload(payload: dict[str, Any]) -> StoryOutput:
return StoryOutput(
mode=payload.get("mode", "generated"),
title=payload.get("title", ""),
story_text=payload.get("story_text", ""),
cover_prompt_suggestion=payload.get("cover_prompt_suggestion", ""),
)
def _storybook_from_payload(payload: dict[str, Any]) -> Storybook:
pages = [
StorybookPage(
page_number=page.get("page_number", index + 1),
text=page.get("text", ""),
image_prompt=page.get("image_prompt", ""),
image_url=page.get("image_url"),
)
for index, page in enumerate(payload.get("pages", []))
]
return Storybook(
title=payload.get("title", ""),
main_character=payload.get("main_character", ""),
art_style=payload.get("art_style", ""),
pages=pages,
cover_prompt=payload.get("cover_prompt", ""),
cover_url=payload.get("cover_url"),
)
def _count_values(values: Iterable[str]) -> dict[str, int]:
counts = Counter(value for value in values if value)
return dict(sorted(counts.items(), key=lambda item: (-item[1], item[0])))
def _compare_evaluation(
case: EvaluationReplayCase,
evaluation: EvaluationResult,
) -> list[str]:
expected = case.expected
failures: list[str] = []
if evaluation.passed != expected.passed:
failures.append(f"expected passed={expected.passed}, got {evaluation.passed}")
if evaluation.blocking != expected.blocking:
failures.append(f"expected blocking={expected.blocking}, got {evaluation.blocking}")
if (
expected.min_overall_score is not None
and evaluation.overall_score < expected.min_overall_score
):
failures.append(
"expected overall_score >= "
f"{expected.min_overall_score}, got {evaluation.overall_score}"
)
if (
expected.max_overall_score is not None
and evaluation.overall_score > expected.max_overall_score
):
failures.append(
"expected overall_score <= "
f"{expected.max_overall_score}, got {evaluation.overall_score}"
)
actual_dimensions = {score.dimension for score in evaluation.scores}
missing_dimensions = [
dimension.value
for dimension in expected.required_dimensions
if dimension not in actual_dimensions
]
if missing_dimensions:
failures.append(f"missing dimensions: {', '.join(missing_dimensions)}")
actual_quality_gate_codes = tuple(
issue.code.value for issue in evaluation.gate_error.issues
) if evaluation.gate_error is not None else ()
if actual_quality_gate_codes != expected.quality_gate_codes:
failures.append(
"expected quality_gate_codes="
f"{list(expected.quality_gate_codes)}, got {list(actual_quality_gate_codes)}"
)
for expected_warning in expected.warning_substrings:
if not any(expected_warning in warning for warning in evaluation.warnings):
failures.append(f"missing warning containing: {expected_warning}")
return failures

View File

@@ -0,0 +1,267 @@
"""Deterministic evaluation helpers for generated child-facing content."""
from dataclasses import dataclass, field
from enum import StrEnum
from typing import Any
from app.services.adapters.storybook.primary import Storybook
from app.services.adapters.text.models import StoryOutput
from app.services.harness.quality_gates import (
QualityGateError,
validate_story_output,
validate_storybook_output,
)
class EvaluationDimension(StrEnum):
"""Stable dimensions used by harness evaluations."""
STRUCTURE = "structure"
SAFETY = "safety"
AGE_FIT = "age_fit"
EDUCATIONAL_VALUE = "educational_value"
READABILITY = "readability"
@dataclass(frozen=True)
class EvaluationScore:
"""One scored evaluation dimension."""
dimension: EvaluationDimension
score: float
reason: str
def to_metadata(self) -> dict[str, Any]:
"""Return a JSON-safe metadata payload."""
return {
"dimension": self.dimension.value,
"score": self.score,
"reason": self.reason,
}
@dataclass(frozen=True)
class EvaluationResult:
"""Deterministic evaluation result for one generated artifact."""
overall_score: float
passed: bool
blocking: bool
scores: tuple[EvaluationScore, ...]
gate_error: QualityGateError | None = None
warnings: tuple[str, ...] = field(default_factory=tuple)
def to_metadata(self) -> dict[str, Any]:
"""Return a JSON-safe metadata payload."""
metadata: dict[str, Any] = {
"overall_score": self.overall_score,
"passed": self.passed,
"blocking": self.blocking,
"scores": [score.to_metadata() for score in self.scores],
"warnings": list(self.warnings),
}
if self.gate_error is not None:
metadata["quality_gate"] = self.gate_error.to_metadata()
return metadata
def _clamp_score(value: float) -> float:
return max(0.0, min(1.0, round(value, 2)))
def _story_text_readability_score(story_text: str) -> float:
"""Score text length with a conservative 3-8 age readability heuristic."""
normalized_length = len(story_text.strip())
if normalized_length < 30:
return 0.45
if normalized_length > 2500:
return 0.72
if normalized_length > 1800:
return 0.84
return 0.96
def _educational_value_score(story_text: str, education_theme: str | None) -> float:
if not education_theme:
return 0.82
return 0.96 if education_theme.strip() in story_text else 0.88
def _storybook_readability_score(page_texts: list[str]) -> float:
if not page_texts:
return 0.0
page_lengths = [len(text.strip()) for text in page_texts]
if any(length < 8 for length in page_lengths):
return 0.62
if any(length > 320 for length in page_lengths):
return 0.78
if any(length > 220 for length in page_lengths):
return 0.88
return 0.96
def _storybook_educational_value_score(
page_texts: list[str],
education_theme: str | None,
) -> float:
if not education_theme:
return 0.82
combined_text = " ".join(page_texts)
return 0.96 if education_theme.strip() in combined_text else 0.88
def evaluate_story_output(
output: StoryOutput,
*,
education_theme: str | None = None,
minimum_score: float = 0.7,
) -> EvaluationResult:
"""Evaluate a generated text story before persistence."""
try:
validate_story_output(output)
except QualityGateError as exc:
scores = (
EvaluationScore(
dimension=EvaluationDimension.STRUCTURE,
score=0.0,
reason="故事结构未通过质量门。",
),
EvaluationScore(
dimension=EvaluationDimension.SAFETY,
score=0.0,
reason="内容未通过儿童安全或结构完整性检查。",
),
)
return EvaluationResult(
overall_score=0.0,
passed=False,
blocking=True,
scores=scores,
gate_error=exc,
)
readability_score = _story_text_readability_score(output.story_text)
educational_score = _educational_value_score(output.story_text, education_theme)
warnings: list[str] = []
if readability_score < 0.8:
warnings.append("故事正文长度可能不适合 3-8 岁儿童的完整阅读体验。")
scores = (
EvaluationScore(
dimension=EvaluationDimension.STRUCTURE,
score=1.0,
reason="标题、正文和封面提示词完整。",
),
EvaluationScore(
dimension=EvaluationDimension.SAFETY,
score=1.0,
reason="未命中确定性儿童安全风险词。",
),
EvaluationScore(
dimension=EvaluationDimension.AGE_FIT,
score=readability_score,
reason="根据正文长度估算低龄儿童阅读适配度。",
),
EvaluationScore(
dimension=EvaluationDimension.EDUCATIONAL_VALUE,
score=educational_score,
reason="根据教育主题是否清晰融入正文估算。",
),
EvaluationScore(
dimension=EvaluationDimension.READABILITY,
score=readability_score,
reason="根据正文长度估算朗读和亲子共读流畅度。",
),
)
overall_score = _clamp_score(sum(score.score for score in scores) / len(scores))
return EvaluationResult(
overall_score=overall_score,
passed=overall_score >= minimum_score,
blocking=overall_score < minimum_score,
scores=scores,
warnings=tuple(warnings),
)
def evaluate_storybook_output(
output: Storybook,
*,
education_theme: str | None = None,
minimum_score: float = 0.7,
) -> EvaluationResult:
"""Evaluate generated storybook structure before persistence."""
try:
validate_storybook_output(output)
except QualityGateError as exc:
scores = (
EvaluationScore(
dimension=EvaluationDimension.STRUCTURE,
score=0.0,
reason="绘本结构未通过质量门。",
),
EvaluationScore(
dimension=EvaluationDimension.SAFETY,
score=0.0,
reason="绘本内容未通过儿童安全或结构完整性检查。",
),
)
return EvaluationResult(
overall_score=0.0,
passed=False,
blocking=True,
scores=scores,
gate_error=exc,
)
page_texts = [page.text for page in output.pages]
readability_score = _storybook_readability_score(page_texts)
educational_score = _storybook_educational_value_score(page_texts, education_theme)
warnings: list[str] = []
if readability_score < 0.8:
warnings.append("绘本分页正文长度可能不适合 3-8 岁儿童的翻页阅读体验。")
scores = (
EvaluationScore(
dimension=EvaluationDimension.STRUCTURE,
score=1.0,
reason="绘本标题、分页和页码结构完整。",
),
EvaluationScore(
dimension=EvaluationDimension.SAFETY,
score=1.0,
reason="未命中确定性儿童安全风险词。",
),
EvaluationScore(
dimension=EvaluationDimension.AGE_FIT,
score=readability_score,
reason="根据每页正文长度估算低龄儿童翻页阅读适配度。",
),
EvaluationScore(
dimension=EvaluationDimension.EDUCATIONAL_VALUE,
score=educational_score,
reason="根据教育主题是否清晰融入分页正文估算。",
),
EvaluationScore(
dimension=EvaluationDimension.READABILITY,
score=readability_score,
reason="根据分页正文长度估算亲子共读流畅度。",
),
)
overall_score = _clamp_score(sum(score.score for score in scores) / len(scores))
return EvaluationResult(
overall_score=overall_score,
passed=overall_score >= minimum_score,
blocking=overall_score < minimum_score,
scores=scores,
warnings=tuple(warnings),
)

View File

@@ -0,0 +1,150 @@
"""Small-step workflow executor helpers for generation harness adoption."""
from collections.abc import Awaitable, Callable
from dataclasses import dataclass
from typing import TYPE_CHECKING, Any
from sqlalchemy.ext.asyncio import AsyncSession
from app.services.harness.artifacts import AssetCompletionResult
from app.services.harness.plans import WorkflowPlan
from app.services.harness.trace import TraceRecorder
from app.services.harness.types import ArtifactKind, WorkflowStep
if TYPE_CHECKING:
from app.db.models import GenerationJob
AssetTask = Callable[[], Awaitable[AssetCompletionResult]]
@dataclass(frozen=True)
class AssetPlanRunResult:
"""Result of executing asset-producing tasks from one workflow plan."""
task_results: tuple[AssetCompletionResult, ...]
executed_task_keys: tuple[str, ...]
ignored_task_keys: tuple[str, ...]
@property
def result_assets(self) -> tuple[str, ...]:
"""Assets returned by executed task handlers."""
return tuple(result.asset for result in self.task_results)
def to_metadata(self, plan: WorkflowPlan) -> dict[str, Any]:
"""Return internal executor coverage metadata for admin-only analytics."""
return {
"plan_mode": plan.mode.value,
"planned_task_count": len(plan.tasks),
"executed_task_count": len(self.executed_task_keys),
"ignored_task_count": len(self.ignored_task_keys),
"result_count": len(self.task_results),
"executed_task_keys": list(self.executed_task_keys),
"ignored_task_keys": list(self.ignored_task_keys),
"result_assets": list(self.result_assets),
}
async def record_workflow_plan(
db: AsyncSession,
*,
job: "GenerationJob | None",
plan: WorkflowPlan,
) -> None:
"""Persist a workflow plan snapshot for a tracked job."""
await TraceRecorder(db).record_step(
job=job,
event_type="workflow_planned",
status="succeeded",
message="Workflow plan selected for this generation request.",
metadata={"plan": plan.to_snapshot()},
step=WorkflowStep.REQUEST_ACCEPTANCE,
artifact=ArtifactKind.NONE,
blocks_main_result=True,
)
async def record_evaluation_result(
db: AsyncSession,
*,
job: "GenerationJob | None",
story_id: int | None = None,
metadata: dict[str, Any],
status: str,
artifact: ArtifactKind | str = ArtifactKind.STORY_TEXT,
) -> None:
"""Persist a deterministic evaluation result for a tracked job."""
await TraceRecorder(db).record_step(
job=job,
story_id=story_id,
event_type="evaluation_completed",
status=status,
message="Generated content evaluation completed.",
metadata=metadata,
step=WorkflowStep.EVALUATION,
artifact=artifact,
blocks_main_result=status != "succeeded",
)
async def record_executor_result(
db: AsyncSession,
*,
job: "GenerationJob | None",
plan: WorkflowPlan,
result: AssetPlanRunResult,
) -> None:
"""Persist internal executor coverage metadata for a tracked job."""
await TraceRecorder(db).record_step(
job=job,
event_type="executor_completed",
status="succeeded",
message="Workflow executor completed planned asset tasks.",
metadata=result.to_metadata(plan),
step=WorkflowStep.UNKNOWN,
artifact=ArtifactKind.NONE,
blocks_main_result=False,
)
async def run_asset_plan(
plan: WorkflowPlan,
*,
image_task: AssetTask | None = None,
audio_task: AssetTask | None = None,
) -> AssetPlanRunResult:
"""Execute asset-producing tasks in the order declared by a workflow plan."""
if plan.mode.value not in {"asset_generation", "asset_retry"}:
raise ValueError("run_asset_plan only supports asset workflow plans")
task_results: list[AssetCompletionResult] = []
executed_task_keys: list[str] = []
ignored_task_keys: list[str] = []
for task in plan.tasks:
if task.key == "complete_image_asset":
if image_task is None:
raise ValueError("Asset workflow plan requires an image task handler")
task_results.append(await image_task())
executed_task_keys.append(task.key)
continue
if task.key == "complete_audio_asset":
if audio_task is None:
raise ValueError("Asset workflow plan requires an audio task handler")
task_results.append(await audio_task())
executed_task_keys.append(task.key)
continue
ignored_task_keys.append(task.key)
return AssetPlanRunResult(
task_results=tuple(task_results),
executed_task_keys=tuple(executed_task_keys),
ignored_task_keys=tuple(ignored_task_keys),
)

View File

@@ -0,0 +1,400 @@
[
{
"id": "story-safe-theme-pass",
"artifact": "story",
"description": "完整、儿童安全且清晰包含教育主题的普通故事。",
"coverage": {
"age_band": "5-6",
"content_shape": "short_story",
"risk_area": "happy_path",
"tags": ["theme_present", "safe", "story"]
},
"input": {
"keywords": "小兔子, 月光花园",
"education_theme": "复盘"
},
"output": {
"mode": "generated",
"title": "小兔子的月光花园",
"story_text": "小兔子露露在月光花园里照顾一朵会发光的小花。她先给小花浇水,又邀请朋友一起观察花瓣的变化。晚上睡前,露露和朋友们坐在石凳上复盘今天的努力:下次要先分好小水壶,再轮流照顾花朵。大家都觉得,分享和复盘让花园变得更温暖。",
"cover_prompt_suggestion": "A gentle watercolor rabbit in a moonlit garden"
},
"expected": {
"passed": true,
"blocking": false,
"min_overall_score": 0.9,
"required_dimensions": [
"structure",
"safety",
"age_fit",
"educational_value",
"readability"
],
"quality_gate_codes": []
}
},
{
"id": "story-long-safe-pass",
"artifact": "story",
"description": "较长但仍适合亲子共读的普通故事。",
"coverage": {
"age_band": "7-8",
"content_shape": "long_story",
"risk_area": "length_boundary",
"tags": ["theme_present", "long_text", "story"]
},
"input": {
"keywords": "小海豚, 图书馆",
"education_theme": "合作"
},
"output": {
"mode": "generated",
"title": "小海豚的蓝色图书馆",
"story_text": "小海豚多多住在一片安静的海湾里,那里有一座用贝壳和海草搭成的蓝色图书馆。每天傍晚,多多都会把漂来的故事贝壳整理好,放进不同的篮子。可是这一天,风浪把贝壳吹得到处都是,小章鱼、小海马和小螃蟹都赶来帮忙。大家先一起数贝壳,再按颜色排队,最后把每个故事放回合适的位置。多多发现,合作不是一个人做得最快,而是大家把自己的办法放在一起。夜晚来临时,蓝色图书馆重新亮起柔柔的光,小伙伴们围坐在门口,听多多讲今天学到的合作故事。",
"cover_prompt_suggestion": "A gentle dolphin organizing a blue underwater library"
},
"expected": {
"passed": true,
"blocking": false,
"min_overall_score": 0.9,
"required_dimensions": [
"structure",
"safety",
"age_fit",
"educational_value",
"readability"
],
"quality_gate_codes": []
}
},
{
"id": "story-missing-text-blocks",
"artifact": "story",
"description": "故事正文缺失会被确定性质量门阻断。",
"coverage": {
"age_band": "unknown",
"content_shape": "empty_story",
"risk_area": "schema_error",
"tags": ["missing_text", "story", "blocking"]
},
"input": {
"keywords": "小熊, 星星"
},
"output": {
"mode": "generated",
"title": "小熊找星星",
"story_text": "",
"cover_prompt_suggestion": "A bear looking at friendly stars"
},
"expected": {
"passed": false,
"blocking": true,
"max_overall_score": 0.0,
"quality_gate_codes": [
"missing_story_text"
]
}
},
{
"id": "story-missing-cover-prompt-blocks",
"artifact": "story",
"description": "故事正文完整但封面提示词缺失会被结构质量门阻断。",
"coverage": {
"age_band": "5-6",
"content_shape": "short_story",
"risk_area": "schema_error",
"tags": ["missing_cover_prompt", "story", "blocking"]
},
"input": {
"keywords": "小松鼠, 风筝",
"education_theme": "勇敢"
},
"output": {
"mode": "generated",
"title": "小松鼠的风筝",
"story_text": "小松鼠第一次放风筝时有点紧张。朋友们陪它一起数一二三,它鼓起勇敢的心,终于让风筝飞上蓝天。",
"cover_prompt_suggestion": ""
},
"expected": {
"passed": false,
"blocking": true,
"max_overall_score": 0.0,
"quality_gate_codes": [
"missing_cover_prompt"
]
}
},
{
"id": "story-unsafe-term-blocks",
"artifact": "story",
"description": "明显不适合儿童的风险词会被安全质量门阻断。",
"coverage": {
"age_band": "3-4",
"content_shape": "short_story",
"risk_area": "safety_error",
"tags": ["unsafe_term", "story", "blocking"]
},
"input": {
"keywords": "小猫, 城堡"
},
"output": {
"mode": "generated",
"title": "小猫的城堡",
"story_text": "小猫在城堡里看到血腥场景,然后感到很害怕。",
"cover_prompt_suggestion": "A cat near a castle"
},
"expected": {
"passed": false,
"blocking": true,
"max_overall_score": 0.0,
"quality_gate_codes": [
"unsafe_child_content"
]
}
},
{
"id": "story-short-high-threshold-blocks",
"artifact": "story",
"description": "结构合格但阅读体验偏短的故事在高阈值下会被内部评测阻断。",
"coverage": {
"age_band": "3-4",
"content_shape": "very_short_story",
"risk_area": "readability_warning",
"tags": ["short_text", "threshold_block", "story"]
},
"input": {
"keywords": "小鹿, 书签",
"education_theme": "耐心",
"minimum_score": 0.82
},
"output": {
"mode": "generated",
"title": "小鹿的书签",
"story_text": "小鹿学会了耐心等待。",
"cover_prompt_suggestion": "A deer with a golden bookmark"
},
"expected": {
"passed": false,
"blocking": true,
"min_overall_score": 0.7,
"max_overall_score": 0.8,
"required_dimensions": [
"structure",
"safety",
"readability"
],
"quality_gate_codes": [],
"warning_substrings": [
"正文长度"
]
}
},
{
"id": "storybook-safe-theme-pass",
"artifact": "storybook",
"description": "完整、儿童安全且包含教育主题的绘本分页输出。",
"coverage": {
"age_band": "5-6",
"content_shape": "storybook_3_pages",
"risk_area": "happy_path",
"tags": ["theme_present", "safe", "storybook"]
},
"input": {
"keywords": "小狐狸, 彩虹桥",
"education_theme": "合作"
},
"output": {
"title": "彩虹桥上的合作",
"main_character": "小狐狸米米",
"art_style": "温暖水彩",
"cover_prompt": "A warm watercolor fox near a rainbow bridge",
"pages": [
{
"page_number": 1,
"text": "小狐狸米米在雨后的森林里发现一座亮晶晶的彩虹桥。",
"image_prompt": "A little fox finds a rainbow bridge"
},
{
"page_number": 2,
"text": "桥边的小伙伴们一起商量办法,决定合作把落叶清理干净。",
"image_prompt": "Forest friends work together"
},
{
"page_number": 3,
"text": "大家轮流搬叶子、扶篮子,还互相说谢谢,彩虹桥终于露出笑脸。",
"image_prompt": "Friends carrying leaves together"
}
]
},
"expected": {
"passed": true,
"blocking": false,
"min_overall_score": 0.9,
"required_dimensions": [
"structure",
"safety",
"age_fit",
"educational_value",
"readability"
],
"quality_gate_codes": []
}
},
{
"id": "storybook-duplicate-page-blocks",
"artifact": "storybook",
"description": "重复页码的绘本结构会被质量门阻断。",
"coverage": {
"age_band": "5-6",
"content_shape": "storybook_invalid_pages",
"risk_area": "schema_error",
"tags": ["duplicate_page", "storybook", "blocking"]
},
"input": {
"keywords": "小熊, 森林"
},
"output": {
"title": "森林里的小熊",
"main_character": "小熊布布",
"art_style": "水彩",
"cover_prompt": "A bear in a forest",
"pages": [
{
"page_number": 1,
"text": "布布在森林里找到一颗松果。",
"image_prompt": "Bear finds a pinecone"
},
{
"page_number": 1,
"text": "布布把松果带给朋友一起观察。",
"image_prompt": "Bear shares the pinecone"
}
]
},
"expected": {
"passed": false,
"blocking": true,
"max_overall_score": 0.0,
"quality_gate_codes": [
"invalid_storybook_page_number"
]
}
},
{
"id": "storybook-missing-page-blocks",
"artifact": "storybook",
"description": "没有分页内容的绘本会被结构质量门阻断。",
"coverage": {
"age_band": "unknown",
"content_shape": "storybook_empty_pages",
"risk_area": "schema_error",
"tags": ["missing_page", "storybook", "blocking"]
},
"input": {
"keywords": "小鸟, 云朵"
},
"output": {
"title": "小鸟和云朵",
"main_character": "小鸟啾啾",
"art_style": "柔和水彩",
"cover_prompt": "A bird near soft clouds",
"pages": []
},
"expected": {
"passed": false,
"blocking": true,
"max_overall_score": 0.0,
"quality_gate_codes": [
"missing_storybook_page"
]
}
},
{
"id": "storybook-unsafe-term-blocks",
"artifact": "storybook",
"description": "绘本分页文字包含明显不适龄风险词时会被安全质量门阻断。",
"coverage": {
"age_band": "3-4",
"content_shape": "storybook_2_pages",
"risk_area": "safety_error",
"tags": ["unsafe_term", "storybook", "blocking"]
},
"input": {
"keywords": "小兔子, 山洞"
},
"output": {
"title": "山洞里的声音",
"main_character": "小兔子米粒",
"art_style": "温暖水彩",
"cover_prompt": "A rabbit near a cave",
"pages": [
{
"page_number": 1,
"text": "米粒走到山洞边,听见奇怪的声音。",
"image_prompt": "Rabbit near a cave"
},
{
"page_number": 2,
"text": "洞里出现血腥画面,米粒吓得跑开。",
"image_prompt": "Rabbit running away"
}
]
},
"expected": {
"passed": false,
"blocking": true,
"max_overall_score": 0.0,
"quality_gate_codes": [
"unsafe_child_content"
]
}
},
{
"id": "storybook-short-page-warning",
"artifact": "storybook",
"description": "分页正文过短时保留内部警告,用于评测回归。",
"coverage": {
"age_band": "3-4",
"content_shape": "storybook_2_pages",
"risk_area": "readability_warning",
"tags": ["short_page_text", "threshold_block", "storybook"]
},
"input": {
"keywords": "小羊, 风铃",
"minimum_score": 0.85
},
"output": {
"title": "风铃响了",
"main_character": "小羊团团",
"art_style": "柔和蜡笔",
"cover_prompt": "A lamb listening to a wind chime",
"pages": [
{
"page_number": 1,
"text": "风响。",
"image_prompt": "Wind chime rings"
},
{
"page_number": 2,
"text": "团团笑。",
"image_prompt": "Lamb smiles"
}
]
},
"expected": {
"passed": false,
"blocking": true,
"min_overall_score": 0.8,
"max_overall_score": 0.82,
"required_dimensions": [
"structure",
"safety",
"readability"
],
"quality_gate_codes": [],
"warning_substrings": [
"分页正文长度"
]
}
}
]

View File

@@ -69,6 +69,11 @@ def build_story_plan(*, generate_images: bool) -> WorkflowPlan:
step=WorkflowStep.NARRATIVE_GENERATION,
artifact=ArtifactKind.STORY_TEXT,
),
WorkflowTask(
key="evaluate_narrative",
step=WorkflowStep.EVALUATION,
artifact=ArtifactKind.STORY_TEXT,
),
WorkflowTask(
key="persist_story",
step=WorkflowStep.STORY_PERSISTENCE,
@@ -124,6 +129,11 @@ def build_storybook_plan(*, generate_images: bool) -> WorkflowPlan:
step=WorkflowStep.NARRATIVE_GENERATION,
artifact=ArtifactKind.STORYBOOK_PAGES,
),
WorkflowTask(
key="evaluate_storybook_pages",
step=WorkflowStep.EVALUATION,
artifact=ArtifactKind.STORYBOOK_PAGES,
),
]
if generate_images:

View File

@@ -11,6 +11,7 @@ class WorkflowStep(StrEnum):
WORKER_START = "worker_start"
CONTEXT_PREPARATION = "context_preparation"
NARRATIVE_GENERATION = "narrative_generation"
EVALUATION = "evaluation"
STORY_PERSISTENCE = "story_persistence"
PROVIDER_INVOCATION = "provider_invocation"
IMAGE_GENERATION = "image_generation"
@@ -64,6 +65,8 @@ class StepStatus(StrEnum):
EVENT_STEP_MAP: dict[str, WorkflowStep] = {
"request_accepted": WorkflowStep.REQUEST_ACCEPTANCE,
"workflow_planned": WorkflowStep.REQUEST_ACCEPTANCE,
"executor_completed": WorkflowStep.UNKNOWN,
"retry_queued": WorkflowStep.REQUEST_ACCEPTANCE,
"worker_started": WorkflowStep.WORKER_START,
"context_prepared": WorkflowStep.CONTEXT_PREPARATION,
@@ -73,6 +76,7 @@ EVENT_STEP_MAP: dict[str, WorkflowStep] = {
"provider_call_succeeded": WorkflowStep.PROVIDER_INVOCATION,
"provider_call_failed": WorkflowStep.PROVIDER_INVOCATION,
"quality_gate_failed": WorkflowStep.NARRATIVE_GENERATION,
"evaluation_completed": WorkflowStep.EVALUATION,
"cover_image_started": WorkflowStep.IMAGE_GENERATION,
"cover_image_succeeded": WorkflowStep.IMAGE_GENERATION,
"cover_image_failed": WorkflowStep.IMAGE_GENERATION,
@@ -100,6 +104,7 @@ EVENT_STEP_MAP: dict[str, WorkflowStep] = {
EVENT_ARTIFACT_MAP: dict[str, ArtifactKind] = {
"narrative_generated": ArtifactKind.STORY_TEXT,
"quality_gate_failed": ArtifactKind.STORY_TEXT,
"evaluation_completed": ArtifactKind.STORY_TEXT,
"cover_image_started": ArtifactKind.COVER_IMAGE,
"cover_image_succeeded": ArtifactKind.COVER_IMAGE,
"cover_image_failed": ArtifactKind.COVER_IMAGE,

View File

@@ -36,8 +36,8 @@ from app.services.generation_jobs import (
ensure_no_active_story_generation_job,
finish_generation_job,
generation_job_can_retry,
generation_job_to_summary,
get_generation_job_for_user,
public_generation_job_to_summary,
record_generation_event,
)
from app.services.harness.artifacts import (
@@ -57,12 +57,27 @@ from app.services.harness.control import (
ExecutionControl,
GenerationJobCanceledError,
)
from app.services.harness.evaluators import (
EvaluationResult,
evaluate_story_output,
evaluate_storybook_output,
)
from app.services.harness.executor import (
record_evaluation_result,
record_executor_result,
record_workflow_plan,
run_asset_plan,
)
from app.services.harness.plans import (
build_asset_plan,
build_story_plan,
build_storybook_plan,
)
from app.services.harness.quality_gates import (
QualityGateError,
validate_story_output,
validate_storybook_output,
)
from app.services.harness.trace import TraceRecorder
from app.services.harness.types import ArtifactKind
from app.services.memory_service import build_enhanced_memory_context
from app.services.provider_router import (
generate_image,
@@ -129,6 +144,24 @@ async def _record_quality_gate_failure_if_present(
)
async def _record_evaluation_result_if_present(
db: AsyncSession,
*,
job,
evaluation: EvaluationResult,
artifact: ArtifactKind | str = ArtifactKind.STORY_TEXT,
) -> None:
"""Append deterministic evaluation metadata for tracked worker jobs."""
await record_evaluation_result(
db,
job=job,
metadata=evaluation.to_metadata(),
status="succeeded" if evaluation.passed else "failed",
artifact=artifact,
)
def _asset_result_metadata(result: AssetCompletionResult) -> dict:
"""Build JSON-safe metadata for asset workflow events."""
@@ -643,18 +676,33 @@ async def generate_and_save_story(
user_id=user_id,
generation_job=job,
)
validate_story_output(result)
except QualityGateError as exc:
await _record_quality_gate_failure_if_present(db, job=job, error=exc)
raise HTTPException(
status_code=502,
detail="Story generation failed quality checks, please try again.",
) from exc
except Exception as exc:
raise HTTPException(
status_code=502,
detail="Story generation failed, please try again.",
) from exc
evaluation = evaluate_story_output(
result,
education_theme=request.education_theme,
)
if evaluation.gate_error is not None:
await _record_quality_gate_failure_if_present(
db,
job=job,
error=evaluation.gate_error,
)
await _record_evaluation_result_if_present(
db,
job=job,
evaluation=evaluation,
)
if evaluation.blocking:
raise HTTPException(
status_code=502,
detail="Story generation failed quality checks, please try again.",
)
await _record_job_event_if_present(
db,
job=job,
@@ -758,13 +806,32 @@ async def generate_storybook_service(
user_id=user_id,
generation_job=job,
)
validate_storybook_output(storybook)
except QualityGateError as exc:
await _record_quality_gate_failure_if_present(db, job=job, error=exc)
raise HTTPException(status_code=500, detail=f"故事书质量检查失败: {exc}") from exc
except Exception as e:
logger.error("storybook_generation_failed", error=str(e))
raise HTTPException(status_code=500, detail=f"故事书生成失败: {e}")
evaluation = evaluate_storybook_output(
storybook,
education_theme=request.education_theme,
)
if evaluation.gate_error is not None:
await _record_quality_gate_failure_if_present(
db,
job=job,
error=evaluation.gate_error,
)
await _record_evaluation_result_if_present(
db,
job=job,
evaluation=evaluation,
artifact=ArtifactKind.STORYBOOK_PAGES,
)
if evaluation.blocking:
raise HTTPException(
status_code=500,
detail=f"故事书质量检查失败: {evaluation.gate_error or 'evaluation blocked'}",
)
await _record_job_event_if_present(
db,
job=job,
@@ -1025,28 +1092,50 @@ async def _generate_asset_generation_service_with_job(
if not requested_assets:
raise HTTPException(status_code=400, detail="资源任务缺少 assets。")
plan = build_asset_plan(
output_mode="asset_generation",
assets=requested_assets,
)
await record_workflow_plan(
db,
job=job,
plan=plan,
)
story = await get_story_detail(int(story_id), job.user_id, db)
if "image" in requested_assets:
async def complete_image() -> AssetCompletionResult:
if story.mode == "storybook":
await _complete_storybook_image_assets(story, db, job=job)
else:
await _complete_cover_image_asset(
story,
db,
raise_on_failure=True,
log_event="cover_generation_failed",
job=job,
)
return await _complete_storybook_image_assets(story, db, job=job)
if "audio" in requested_assets:
await _complete_audio_asset(
return await _complete_cover_image_asset(
story,
db,
raise_on_failure=True,
log_event="cover_generation_failed",
job=job,
)
async def complete_audio() -> AssetCompletionResult:
return await _complete_audio_asset(
story,
db,
raise_on_failure=True,
job=job,
)
asset_plan_result = await run_asset_plan(
plan,
image_task=complete_image if "image" in requested_assets else None,
audio_task=complete_audio if "audio" in requested_assets else None,
)
await record_executor_result(
db,
job=job,
plan=plan,
result=asset_plan_result,
)
story = await get_story_detail(story.id, job.user_id, db)
await finish_generation_job(
db,
@@ -1096,7 +1185,7 @@ async def retry_generation_job_service(
)
await _dispatch_generation_job(db, job=retry_job)
await db.refresh(retry_job)
return generation_job_to_summary(retry_job)
return public_generation_job_to_summary(retry_job)
async def _generate_generation_service_with_job(
@@ -1109,6 +1198,11 @@ async def _generate_generation_service_with_job(
"""Run the unified generation workflow after the tracking job has been created."""
if request.output_mode == "storybook":
await record_workflow_plan(
db,
job=job,
plan=build_storybook_plan(generate_images=request.generate_images),
)
storybook = await generate_storybook_service(
StorybookRequest(
keywords=request.data,
@@ -1155,6 +1249,9 @@ async def _generate_generation_service_with_job(
retryable_assets=saved_story.retryable_assets,
)
if request.output_mode == "story" and not request.generate_images:
return await _execute_story_without_assets_plan(request, user_id, db, job=job)
generate_request = GenerateRequest(
type=request.type,
data=request.data,
@@ -1164,6 +1261,11 @@ async def _generate_generation_service_with_job(
)
if request.generate_images:
await record_workflow_plan(
db,
job=job,
plan=build_story_plan(generate_images=True),
)
story = await generate_full_story_service(generate_request, user_id, db, job=job)
saved_story = await get_story_detail(story.id, user_id, db)
await _record_postprocessing_event_if_needed(db, job=job, story=saved_story)
@@ -1222,6 +1324,54 @@ async def _generate_generation_service_with_job(
universe_id=story.universe_id,
retryable_assets=story.retryable_assets,
)
async def _execute_story_without_assets_plan(
request: GenerationRequest,
user_id: str,
db: AsyncSession,
*,
job,
) -> GenerationResponse:
"""Execute the minimal text-story workflow through an explicit plan."""
plan = build_story_plan(generate_images=False)
await record_workflow_plan(db, job=job, plan=plan)
generate_request = GenerateRequest(
type=request.type,
data=request.data,
education_theme=request.education_theme,
child_profile_id=request.child_profile_id,
universe_id=request.universe_id,
)
story = await generate_and_save_story(generate_request, user_id, db, job=job)
await _record_postprocessing_event_if_needed(db, job=job, story=story)
await finish_generation_job(
db,
job=job,
story=story,
current_step="generation_completed",
message="Story generation completed with a persisted readable narrative.",
)
return GenerationResponse(
id=story.id,
generation_job_id=job.id,
title=story.title,
mode=story.mode,
story_text=story.story_text,
cover_prompt=story.cover_prompt,
image_url=story.image_url,
cover_url=story.image_url,
generation_status=story.generation_status,
text_status=story.text_status,
image_status=story.image_status,
audio_status=story.audio_status,
last_error=story.last_error,
child_profile_id=story.child_profile_id,
universe_id=story.universe_id,
retryable_assets=story.retryable_assets,
)
async def list_stories(
@@ -1321,36 +1471,7 @@ async def queue_story_asset_generation(
)
await _dispatch_generation_job(db, job=job)
await db.refresh(job)
return generation_job_to_summary(job)
async def _retry_cover_image_asset(story: Story, db: AsyncSession, *, job=None) -> None:
"""Retry cover generation for a text story."""
await _complete_cover_image_asset(
story,
db,
last_error_prefix="封面生成失败",
log_event="cover_asset_retry_failed",
job=job,
)
async def _retry_storybook_image_assets(
story: Story,
db: AsyncSession,
*,
job=None,
) -> None:
"""Retry missing storybook cover/page images."""
await _complete_storybook_image_assets(story, db, job=job)
async def _retry_audio_asset(story: Story, db: AsyncSession, *, job=None) -> None:
"""Retry audio generation while preserving persisted status on provider failure."""
await _complete_audio_asset(story, db, raise_on_failure=False, job=job)
return public_generation_job_to_summary(job)
async def retry_story_assets(
@@ -1374,6 +1495,15 @@ async def retry_story_assets(
try:
story = await get_story_detail(story_id, user_id, db)
plan = build_asset_plan(
output_mode="asset_retry",
assets=requested_assets,
)
await record_workflow_plan(
db,
job=job,
plan=plan,
)
await record_generation_event(
db,
job=job,
@@ -1384,14 +1514,37 @@ async def retry_story_assets(
metadata={"assets": requested_assets},
)
if "image" in requested_assets:
async def retry_image() -> AssetCompletionResult:
if story.mode == "storybook":
await _retry_storybook_image_assets(story, db, job=job)
else:
await _retry_cover_image_asset(story, db, job=job)
return await _complete_storybook_image_assets(story, db, job=job)
if "audio" in requested_assets:
await _retry_audio_asset(story, db, job=job)
return await _complete_cover_image_asset(
story,
db,
last_error_prefix="封面生成失败",
log_event="cover_asset_retry_failed",
job=job,
)
async def retry_audio() -> AssetCompletionResult:
return await _complete_audio_asset(
story,
db,
raise_on_failure=False,
job=job,
)
asset_plan_result = await run_asset_plan(
plan,
image_task=retry_image if "image" in requested_assets else None,
audio_task=retry_audio if "audio" in requested_assets else None,
)
await record_executor_result(
db,
job=job,
plan=plan,
result=asset_plan_result,
)
story = await get_story_detail(story_id, user_id, db)
await finish_generation_job(
@@ -1448,13 +1601,29 @@ async def generate_story_cover(
try:
story = await get_story_detail(story_id, user_id, db)
image_result = await _complete_cover_image_asset(
story,
plan = build_asset_plan(output_mode="asset_generation", assets=["image"])
await record_workflow_plan(
db,
raise_on_failure=True,
log_event="cover_generation_failed",
job=job,
plan=plan,
)
asset_result = await run_asset_plan(
plan,
image_task=lambda: _complete_cover_image_asset(
story,
db,
raise_on_failure=True,
log_event="cover_generation_failed",
job=job,
),
)
await record_executor_result(
db,
job=job,
plan=plan,
result=asset_result,
)
image_result = asset_result.task_results[0] if asset_result.task_results else None
story = await get_story_detail(story_id, user_id, db)
await finish_generation_job(
db,
@@ -1464,7 +1633,11 @@ async def generate_story_cover(
message="Cover image generation completed.",
metadata={"assets": ["image"]},
)
if image_result.succeeded and isinstance(image_result.value, str):
if (
image_result is not None
and image_result.succeeded
and isinstance(image_result.value, str)
):
return image_result.value
except HTTPException as exc:
await finish_generation_job(
@@ -1501,12 +1674,28 @@ async def generate_story_audio(
try:
story = await get_story_detail(story_id, user_id, db)
audio_result = await _complete_audio_asset(
story,
plan = build_asset_plan(output_mode="asset_generation", assets=["audio"])
await record_workflow_plan(
db,
raise_on_failure=True,
job=job,
plan=plan,
)
asset_result = await run_asset_plan(
plan,
audio_task=lambda: _complete_audio_asset(
story,
db,
raise_on_failure=True,
job=job,
),
)
await record_executor_result(
db,
job=job,
plan=plan,
result=asset_result,
)
audio_result = asset_result.task_results[0] if asset_result.task_results else None
story = await get_story_detail(story_id, user_id, db)
await finish_generation_job(
db,
@@ -1516,7 +1705,11 @@ async def generate_story_audio(
message="Story audio generation completed.",
metadata={"assets": ["audio"]},
)
if audio_result.succeeded and isinstance(audio_result.value, bytes):
if (
audio_result is not None
and audio_result.succeeded
and isinstance(audio_result.value, bytes)
):
return audio_result.value
except HTTPException as exc:
await finish_generation_job(