Expand generation harness observability
This commit is contained in:
@@ -36,8 +36,8 @@ from app.services.generation_jobs import (
|
||||
ensure_no_active_story_generation_job,
|
||||
finish_generation_job,
|
||||
generation_job_can_retry,
|
||||
generation_job_to_summary,
|
||||
get_generation_job_for_user,
|
||||
public_generation_job_to_summary,
|
||||
record_generation_event,
|
||||
)
|
||||
from app.services.harness.artifacts import (
|
||||
@@ -57,12 +57,27 @@ from app.services.harness.control import (
|
||||
ExecutionControl,
|
||||
GenerationJobCanceledError,
|
||||
)
|
||||
from app.services.harness.evaluators import (
|
||||
EvaluationResult,
|
||||
evaluate_story_output,
|
||||
evaluate_storybook_output,
|
||||
)
|
||||
from app.services.harness.executor import (
|
||||
record_evaluation_result,
|
||||
record_executor_result,
|
||||
record_workflow_plan,
|
||||
run_asset_plan,
|
||||
)
|
||||
from app.services.harness.plans import (
|
||||
build_asset_plan,
|
||||
build_story_plan,
|
||||
build_storybook_plan,
|
||||
)
|
||||
from app.services.harness.quality_gates import (
|
||||
QualityGateError,
|
||||
validate_story_output,
|
||||
validate_storybook_output,
|
||||
)
|
||||
from app.services.harness.trace import TraceRecorder
|
||||
from app.services.harness.types import ArtifactKind
|
||||
from app.services.memory_service import build_enhanced_memory_context
|
||||
from app.services.provider_router import (
|
||||
generate_image,
|
||||
@@ -129,6 +144,24 @@ async def _record_quality_gate_failure_if_present(
|
||||
)
|
||||
|
||||
|
||||
async def _record_evaluation_result_if_present(
|
||||
db: AsyncSession,
|
||||
*,
|
||||
job,
|
||||
evaluation: EvaluationResult,
|
||||
artifact: ArtifactKind | str = ArtifactKind.STORY_TEXT,
|
||||
) -> None:
|
||||
"""Append deterministic evaluation metadata for tracked worker jobs."""
|
||||
|
||||
await record_evaluation_result(
|
||||
db,
|
||||
job=job,
|
||||
metadata=evaluation.to_metadata(),
|
||||
status="succeeded" if evaluation.passed else "failed",
|
||||
artifact=artifact,
|
||||
)
|
||||
|
||||
|
||||
def _asset_result_metadata(result: AssetCompletionResult) -> dict:
|
||||
"""Build JSON-safe metadata for asset workflow events."""
|
||||
|
||||
@@ -643,18 +676,33 @@ async def generate_and_save_story(
|
||||
user_id=user_id,
|
||||
generation_job=job,
|
||||
)
|
||||
validate_story_output(result)
|
||||
except QualityGateError as exc:
|
||||
await _record_quality_gate_failure_if_present(db, job=job, error=exc)
|
||||
raise HTTPException(
|
||||
status_code=502,
|
||||
detail="Story generation failed quality checks, please try again.",
|
||||
) from exc
|
||||
except Exception as exc:
|
||||
raise HTTPException(
|
||||
status_code=502,
|
||||
detail="Story generation failed, please try again.",
|
||||
) from exc
|
||||
|
||||
evaluation = evaluate_story_output(
|
||||
result,
|
||||
education_theme=request.education_theme,
|
||||
)
|
||||
if evaluation.gate_error is not None:
|
||||
await _record_quality_gate_failure_if_present(
|
||||
db,
|
||||
job=job,
|
||||
error=evaluation.gate_error,
|
||||
)
|
||||
await _record_evaluation_result_if_present(
|
||||
db,
|
||||
job=job,
|
||||
evaluation=evaluation,
|
||||
)
|
||||
if evaluation.blocking:
|
||||
raise HTTPException(
|
||||
status_code=502,
|
||||
detail="Story generation failed quality checks, please try again.",
|
||||
)
|
||||
|
||||
await _record_job_event_if_present(
|
||||
db,
|
||||
job=job,
|
||||
@@ -758,13 +806,32 @@ async def generate_storybook_service(
|
||||
user_id=user_id,
|
||||
generation_job=job,
|
||||
)
|
||||
validate_storybook_output(storybook)
|
||||
except QualityGateError as exc:
|
||||
await _record_quality_gate_failure_if_present(db, job=job, error=exc)
|
||||
raise HTTPException(status_code=500, detail=f"故事书质量检查失败: {exc}") from exc
|
||||
except Exception as e:
|
||||
logger.error("storybook_generation_failed", error=str(e))
|
||||
raise HTTPException(status_code=500, detail=f"故事书生成失败: {e}")
|
||||
|
||||
evaluation = evaluate_storybook_output(
|
||||
storybook,
|
||||
education_theme=request.education_theme,
|
||||
)
|
||||
if evaluation.gate_error is not None:
|
||||
await _record_quality_gate_failure_if_present(
|
||||
db,
|
||||
job=job,
|
||||
error=evaluation.gate_error,
|
||||
)
|
||||
await _record_evaluation_result_if_present(
|
||||
db,
|
||||
job=job,
|
||||
evaluation=evaluation,
|
||||
artifact=ArtifactKind.STORYBOOK_PAGES,
|
||||
)
|
||||
if evaluation.blocking:
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"故事书质量检查失败: {evaluation.gate_error or 'evaluation blocked'}",
|
||||
)
|
||||
|
||||
await _record_job_event_if_present(
|
||||
db,
|
||||
job=job,
|
||||
@@ -1025,28 +1092,50 @@ async def _generate_asset_generation_service_with_job(
|
||||
if not requested_assets:
|
||||
raise HTTPException(status_code=400, detail="资源任务缺少 assets。")
|
||||
|
||||
plan = build_asset_plan(
|
||||
output_mode="asset_generation",
|
||||
assets=requested_assets,
|
||||
)
|
||||
await record_workflow_plan(
|
||||
db,
|
||||
job=job,
|
||||
plan=plan,
|
||||
)
|
||||
|
||||
story = await get_story_detail(int(story_id), job.user_id, db)
|
||||
|
||||
if "image" in requested_assets:
|
||||
async def complete_image() -> AssetCompletionResult:
|
||||
if story.mode == "storybook":
|
||||
await _complete_storybook_image_assets(story, db, job=job)
|
||||
else:
|
||||
await _complete_cover_image_asset(
|
||||
story,
|
||||
db,
|
||||
raise_on_failure=True,
|
||||
log_event="cover_generation_failed",
|
||||
job=job,
|
||||
)
|
||||
return await _complete_storybook_image_assets(story, db, job=job)
|
||||
|
||||
if "audio" in requested_assets:
|
||||
await _complete_audio_asset(
|
||||
return await _complete_cover_image_asset(
|
||||
story,
|
||||
db,
|
||||
raise_on_failure=True,
|
||||
log_event="cover_generation_failed",
|
||||
job=job,
|
||||
)
|
||||
|
||||
async def complete_audio() -> AssetCompletionResult:
|
||||
return await _complete_audio_asset(
|
||||
story,
|
||||
db,
|
||||
raise_on_failure=True,
|
||||
job=job,
|
||||
)
|
||||
|
||||
asset_plan_result = await run_asset_plan(
|
||||
plan,
|
||||
image_task=complete_image if "image" in requested_assets else None,
|
||||
audio_task=complete_audio if "audio" in requested_assets else None,
|
||||
)
|
||||
await record_executor_result(
|
||||
db,
|
||||
job=job,
|
||||
plan=plan,
|
||||
result=asset_plan_result,
|
||||
)
|
||||
|
||||
story = await get_story_detail(story.id, job.user_id, db)
|
||||
await finish_generation_job(
|
||||
db,
|
||||
@@ -1096,7 +1185,7 @@ async def retry_generation_job_service(
|
||||
)
|
||||
await _dispatch_generation_job(db, job=retry_job)
|
||||
await db.refresh(retry_job)
|
||||
return generation_job_to_summary(retry_job)
|
||||
return public_generation_job_to_summary(retry_job)
|
||||
|
||||
|
||||
async def _generate_generation_service_with_job(
|
||||
@@ -1109,6 +1198,11 @@ async def _generate_generation_service_with_job(
|
||||
"""Run the unified generation workflow after the tracking job has been created."""
|
||||
|
||||
if request.output_mode == "storybook":
|
||||
await record_workflow_plan(
|
||||
db,
|
||||
job=job,
|
||||
plan=build_storybook_plan(generate_images=request.generate_images),
|
||||
)
|
||||
storybook = await generate_storybook_service(
|
||||
StorybookRequest(
|
||||
keywords=request.data,
|
||||
@@ -1155,6 +1249,9 @@ async def _generate_generation_service_with_job(
|
||||
retryable_assets=saved_story.retryable_assets,
|
||||
)
|
||||
|
||||
if request.output_mode == "story" and not request.generate_images:
|
||||
return await _execute_story_without_assets_plan(request, user_id, db, job=job)
|
||||
|
||||
generate_request = GenerateRequest(
|
||||
type=request.type,
|
||||
data=request.data,
|
||||
@@ -1164,6 +1261,11 @@ async def _generate_generation_service_with_job(
|
||||
)
|
||||
|
||||
if request.generate_images:
|
||||
await record_workflow_plan(
|
||||
db,
|
||||
job=job,
|
||||
plan=build_story_plan(generate_images=True),
|
||||
)
|
||||
story = await generate_full_story_service(generate_request, user_id, db, job=job)
|
||||
saved_story = await get_story_detail(story.id, user_id, db)
|
||||
await _record_postprocessing_event_if_needed(db, job=job, story=saved_story)
|
||||
@@ -1222,6 +1324,54 @@ async def _generate_generation_service_with_job(
|
||||
universe_id=story.universe_id,
|
||||
retryable_assets=story.retryable_assets,
|
||||
)
|
||||
|
||||
|
||||
async def _execute_story_without_assets_plan(
|
||||
request: GenerationRequest,
|
||||
user_id: str,
|
||||
db: AsyncSession,
|
||||
*,
|
||||
job,
|
||||
) -> GenerationResponse:
|
||||
"""Execute the minimal text-story workflow through an explicit plan."""
|
||||
|
||||
plan = build_story_plan(generate_images=False)
|
||||
await record_workflow_plan(db, job=job, plan=plan)
|
||||
|
||||
generate_request = GenerateRequest(
|
||||
type=request.type,
|
||||
data=request.data,
|
||||
education_theme=request.education_theme,
|
||||
child_profile_id=request.child_profile_id,
|
||||
universe_id=request.universe_id,
|
||||
)
|
||||
story = await generate_and_save_story(generate_request, user_id, db, job=job)
|
||||
await _record_postprocessing_event_if_needed(db, job=job, story=story)
|
||||
await finish_generation_job(
|
||||
db,
|
||||
job=job,
|
||||
story=story,
|
||||
current_step="generation_completed",
|
||||
message="Story generation completed with a persisted readable narrative.",
|
||||
)
|
||||
return GenerationResponse(
|
||||
id=story.id,
|
||||
generation_job_id=job.id,
|
||||
title=story.title,
|
||||
mode=story.mode,
|
||||
story_text=story.story_text,
|
||||
cover_prompt=story.cover_prompt,
|
||||
image_url=story.image_url,
|
||||
cover_url=story.image_url,
|
||||
generation_status=story.generation_status,
|
||||
text_status=story.text_status,
|
||||
image_status=story.image_status,
|
||||
audio_status=story.audio_status,
|
||||
last_error=story.last_error,
|
||||
child_profile_id=story.child_profile_id,
|
||||
universe_id=story.universe_id,
|
||||
retryable_assets=story.retryable_assets,
|
||||
)
|
||||
|
||||
|
||||
async def list_stories(
|
||||
@@ -1321,36 +1471,7 @@ async def queue_story_asset_generation(
|
||||
)
|
||||
await _dispatch_generation_job(db, job=job)
|
||||
await db.refresh(job)
|
||||
return generation_job_to_summary(job)
|
||||
|
||||
|
||||
async def _retry_cover_image_asset(story: Story, db: AsyncSession, *, job=None) -> None:
|
||||
"""Retry cover generation for a text story."""
|
||||
|
||||
await _complete_cover_image_asset(
|
||||
story,
|
||||
db,
|
||||
last_error_prefix="封面生成失败",
|
||||
log_event="cover_asset_retry_failed",
|
||||
job=job,
|
||||
)
|
||||
|
||||
|
||||
async def _retry_storybook_image_assets(
|
||||
story: Story,
|
||||
db: AsyncSession,
|
||||
*,
|
||||
job=None,
|
||||
) -> None:
|
||||
"""Retry missing storybook cover/page images."""
|
||||
|
||||
await _complete_storybook_image_assets(story, db, job=job)
|
||||
|
||||
|
||||
async def _retry_audio_asset(story: Story, db: AsyncSession, *, job=None) -> None:
|
||||
"""Retry audio generation while preserving persisted status on provider failure."""
|
||||
|
||||
await _complete_audio_asset(story, db, raise_on_failure=False, job=job)
|
||||
return public_generation_job_to_summary(job)
|
||||
|
||||
|
||||
async def retry_story_assets(
|
||||
@@ -1374,6 +1495,15 @@ async def retry_story_assets(
|
||||
|
||||
try:
|
||||
story = await get_story_detail(story_id, user_id, db)
|
||||
plan = build_asset_plan(
|
||||
output_mode="asset_retry",
|
||||
assets=requested_assets,
|
||||
)
|
||||
await record_workflow_plan(
|
||||
db,
|
||||
job=job,
|
||||
plan=plan,
|
||||
)
|
||||
await record_generation_event(
|
||||
db,
|
||||
job=job,
|
||||
@@ -1384,14 +1514,37 @@ async def retry_story_assets(
|
||||
metadata={"assets": requested_assets},
|
||||
)
|
||||
|
||||
if "image" in requested_assets:
|
||||
async def retry_image() -> AssetCompletionResult:
|
||||
if story.mode == "storybook":
|
||||
await _retry_storybook_image_assets(story, db, job=job)
|
||||
else:
|
||||
await _retry_cover_image_asset(story, db, job=job)
|
||||
return await _complete_storybook_image_assets(story, db, job=job)
|
||||
|
||||
if "audio" in requested_assets:
|
||||
await _retry_audio_asset(story, db, job=job)
|
||||
return await _complete_cover_image_asset(
|
||||
story,
|
||||
db,
|
||||
last_error_prefix="封面生成失败",
|
||||
log_event="cover_asset_retry_failed",
|
||||
job=job,
|
||||
)
|
||||
|
||||
async def retry_audio() -> AssetCompletionResult:
|
||||
return await _complete_audio_asset(
|
||||
story,
|
||||
db,
|
||||
raise_on_failure=False,
|
||||
job=job,
|
||||
)
|
||||
|
||||
asset_plan_result = await run_asset_plan(
|
||||
plan,
|
||||
image_task=retry_image if "image" in requested_assets else None,
|
||||
audio_task=retry_audio if "audio" in requested_assets else None,
|
||||
)
|
||||
await record_executor_result(
|
||||
db,
|
||||
job=job,
|
||||
plan=plan,
|
||||
result=asset_plan_result,
|
||||
)
|
||||
|
||||
story = await get_story_detail(story_id, user_id, db)
|
||||
await finish_generation_job(
|
||||
@@ -1448,13 +1601,29 @@ async def generate_story_cover(
|
||||
|
||||
try:
|
||||
story = await get_story_detail(story_id, user_id, db)
|
||||
image_result = await _complete_cover_image_asset(
|
||||
story,
|
||||
plan = build_asset_plan(output_mode="asset_generation", assets=["image"])
|
||||
await record_workflow_plan(
|
||||
db,
|
||||
raise_on_failure=True,
|
||||
log_event="cover_generation_failed",
|
||||
job=job,
|
||||
plan=plan,
|
||||
)
|
||||
asset_result = await run_asset_plan(
|
||||
plan,
|
||||
image_task=lambda: _complete_cover_image_asset(
|
||||
story,
|
||||
db,
|
||||
raise_on_failure=True,
|
||||
log_event="cover_generation_failed",
|
||||
job=job,
|
||||
),
|
||||
)
|
||||
await record_executor_result(
|
||||
db,
|
||||
job=job,
|
||||
plan=plan,
|
||||
result=asset_result,
|
||||
)
|
||||
image_result = asset_result.task_results[0] if asset_result.task_results else None
|
||||
story = await get_story_detail(story_id, user_id, db)
|
||||
await finish_generation_job(
|
||||
db,
|
||||
@@ -1464,7 +1633,11 @@ async def generate_story_cover(
|
||||
message="Cover image generation completed.",
|
||||
metadata={"assets": ["image"]},
|
||||
)
|
||||
if image_result.succeeded and isinstance(image_result.value, str):
|
||||
if (
|
||||
image_result is not None
|
||||
and image_result.succeeded
|
||||
and isinstance(image_result.value, str)
|
||||
):
|
||||
return image_result.value
|
||||
except HTTPException as exc:
|
||||
await finish_generation_job(
|
||||
@@ -1501,12 +1674,28 @@ async def generate_story_audio(
|
||||
|
||||
try:
|
||||
story = await get_story_detail(story_id, user_id, db)
|
||||
audio_result = await _complete_audio_asset(
|
||||
story,
|
||||
plan = build_asset_plan(output_mode="asset_generation", assets=["audio"])
|
||||
await record_workflow_plan(
|
||||
db,
|
||||
raise_on_failure=True,
|
||||
job=job,
|
||||
plan=plan,
|
||||
)
|
||||
asset_result = await run_asset_plan(
|
||||
plan,
|
||||
audio_task=lambda: _complete_audio_asset(
|
||||
story,
|
||||
db,
|
||||
raise_on_failure=True,
|
||||
job=job,
|
||||
),
|
||||
)
|
||||
await record_executor_result(
|
||||
db,
|
||||
job=job,
|
||||
plan=plan,
|
||||
result=asset_result,
|
||||
)
|
||||
audio_result = asset_result.task_results[0] if asset_result.task_results else None
|
||||
story = await get_story_detail(story_id, user_id, db)
|
||||
await finish_generation_job(
|
||||
db,
|
||||
@@ -1516,7 +1705,11 @@ async def generate_story_audio(
|
||||
message="Story audio generation completed.",
|
||||
metadata={"assets": ["audio"]},
|
||||
)
|
||||
if audio_result.succeeded and isinstance(audio_result.value, bytes):
|
||||
if (
|
||||
audio_result is not None
|
||||
and audio_result.succeeded
|
||||
and isinstance(audio_result.value, bytes)
|
||||
):
|
||||
return audio_result.value
|
||||
except HTTPException as exc:
|
||||
await finish_generation_job(
|
||||
|
||||
Reference in New Issue
Block a user