Add generation harness runtime

This commit is contained in:
2026-06-21 22:31:38 +08:00
parent 7ebdfb2582
commit 459ca9edef
18 changed files with 2846 additions and 419 deletions

View File

@@ -1,9 +1,7 @@
"""Story business logic service."""
import asyncio
from dataclasses import dataclass
from datetime import datetime, timedelta, timezone
from typing import Literal
from fastapi import HTTPException
from sqlalchemy import desc, select
@@ -42,6 +40,29 @@ from app.services.generation_jobs import (
get_generation_job_for_user,
record_generation_event,
)
from app.services.harness.artifacts import (
AssetCompletionResult,
asset_result_metadata,
)
from app.services.harness.asset_workflows import (
build_storybook_error_message,
complete_audio_asset,
complete_cover_image_asset,
complete_storybook_image_assets,
get_storybook_pages_data,
read_cached_audio_asset,
resolve_storybook_image_status,
)
from app.services.harness.control import (
ExecutionControl,
GenerationJobCanceledError,
)
from app.services.harness.quality_gates import (
QualityGateError,
validate_story_output,
validate_storybook_output,
)
from app.services.harness.trace import TraceRecorder
from app.services.memory_service import build_enhanced_memory_context
from app.services.provider_router import (
generate_image,
@@ -56,29 +77,6 @@ from app.tasks.achievements import extract_story_achievements
logger = get_logger(__name__)
AssetCompletionKind = Literal["cover_image", "storybook_images", "audio"]
@dataclass(frozen=True)
class AssetCompletionResult:
"""Service-level result for a generated asset completion attempt."""
asset: AssetCompletionKind
status: StoryAssetStatus
value: str | bytes | None = None
error: str | None = None
blocks_main_result: bool = False
@property
def succeeded(self) -> bool:
"""Whether the asset reached a usable ready state."""
return self.status == StoryAssetStatus.READY and self.error is None
class GenerationJobCanceledError(Exception):
"""Raised when a running worker job has been canceled by the user."""
async def _record_job_event_if_present(
db: AsyncSession,
@@ -92,11 +90,7 @@ async def _record_job_event_if_present(
) -> None:
"""Append a workflow event when the caller is running under a tracked job."""
if job is None:
return
await record_generation_event(
db,
await TraceRecorder(db).record_step(
job=job,
story_id=story_id,
event_type=event_type,
@@ -114,34 +108,31 @@ async def _stop_if_job_cancel_requested(
) -> None:
"""Stop a worker-owned job at the next safe checkpoint after cancellation."""
if job is None:
return
await ExecutionControl(db).stop_if_cancel_requested(job=job, story=story)
await db.refresh(job)
if job.current_step != "cancel_requested":
return
await finish_generation_job(
async def _record_quality_gate_failure_if_present(
db: AsyncSession,
*,
job,
error: QualityGateError,
) -> None:
"""Append a quality gate failure event for tracked worker jobs."""
await _record_job_event_if_present(
db,
job=job,
story=story,
status="canceled",
current_step="generation_canceled",
error_message="Generation canceled by user.",
message="Generation job was canceled after a user request.",
event_type="quality_gate_failed",
status="failed",
message="Generated content failed deterministic quality gates.",
metadata=error.to_metadata(),
)
raise GenerationJobCanceledError()
def _asset_result_metadata(result: AssetCompletionResult) -> dict:
"""Build JSON-safe metadata for asset workflow events."""
return {
"asset": result.asset,
"status": result.status.value,
"error": result.error,
"blocks_main_result": result.blocks_main_result,
}
return asset_result_metadata(result)
def _build_storybook_error_message(
@@ -151,13 +142,10 @@ def _build_storybook_error_message(
) -> str | None:
"""Summarize storybook image generation errors for the latest attempt."""
parts: list[str] = []
if cover_failed:
parts.append("封面生成失败")
if failed_pages:
pages = "".join(str(page) for page in sorted(failed_pages))
parts.append(f"{pages} 页插图生成失败")
return "".join(parts) if parts else None
return build_storybook_error_message(
cover_failed=cover_failed,
failed_pages=failed_pages,
)
def _resolve_storybook_image_status(
@@ -169,31 +157,12 @@ def _resolve_storybook_image_status(
) -> StoryAssetStatus:
"""Resolve the persisted image status for a storybook."""
if not generate_images:
return StoryAssetStatus.NOT_REQUESTED
expected_assets = 0
ready_assets = 0
if cover_prompt or cover_url:
expected_assets += 1
if cover_url:
ready_assets += 1
for page in pages_data:
if not page.get("image_prompt") and not page.get("image_url"):
continue
expected_assets += 1
if page.get("image_url"):
ready_assets += 1
if expected_assets == 0:
return StoryAssetStatus.NOT_REQUESTED
if ready_assets == expected_assets:
return StoryAssetStatus.READY
return StoryAssetStatus.FAILED
return resolve_storybook_image_status(
generate_images=generate_images,
cover_prompt=cover_prompt,
cover_url=cover_url,
pages_data=pages_data,
)
async def _prepare_generation_context(
@@ -539,93 +508,21 @@ async def _complete_cover_image_asset(
) -> AssetCompletionResult:
"""Generate or retry a text story cover through one asset workflow."""
if not story.cover_prompt:
raise HTTPException(status_code=400, detail="Story has no cover prompt")
sync_story_status(story, image_status=StoryAssetStatus.GENERATING)
await db.commit()
await _stop_if_job_cancel_requested(db, job=job, story=story)
await _record_job_event_if_present(
return await complete_cover_image_asset(
story,
db,
generate_image_func=generate_image,
raise_on_failure=raise_on_failure,
last_error_prefix=last_error_prefix,
log_event=log_event,
job=job,
story_id=story.id,
event_type="cover_image_started",
status="running",
message="Cover image generation started.",
metadata={"asset": "image", "cover_prompt_present": True},
)
try:
await _stop_if_job_cancel_requested(db, job=job, story=story)
image_url = await generate_image(
story.cover_prompt,
db=db,
user_id=story.user_id,
generation_job=job,
story_id=story.id,
)
story.image_url = image_url
sync_story_status(story, image_status=StoryAssetStatus.READY)
await db.commit()
result = AssetCompletionResult(
asset="cover_image",
status=StoryAssetStatus.READY,
value=image_url,
blocks_main_result=raise_on_failure,
)
await _record_job_event_if_present(
db,
job=job,
story_id=story.id,
event_type="cover_image_succeeded",
status="succeeded",
message="Cover image was generated.",
metadata=_asset_result_metadata(result),
)
return result
except Exception as exc:
provider_error = str(exc)
last_error = (
f"{last_error_prefix}: {provider_error}"
if last_error_prefix
else provider_error
)
sync_story_status(
story,
image_status=StoryAssetStatus.FAILED,
last_error=last_error,
)
await db.commit()
logger.warning(log_event, story_id=story.id, error=provider_error)
result = AssetCompletionResult(
asset="cover_image",
status=StoryAssetStatus.FAILED,
error=provider_error,
blocks_main_result=raise_on_failure,
)
await _record_job_event_if_present(
db,
job=job,
story_id=story.id,
event_type="cover_image_failed",
status="failed",
message="Cover image generation failed.",
metadata=_asset_result_metadata(result),
)
if raise_on_failure:
raise HTTPException(
status_code=500,
detail=f"Image generation failed: {provider_error}",
) from exc
return result
def _get_storybook_pages_data(story: Story) -> list[dict]:
"""Return mutable storybook page data from the persisted JSON field."""
return [dict(page) for page in story.pages or [] if isinstance(page, dict)]
return get_storybook_pages_data(story)
async def _complete_storybook_image_assets(
@@ -636,176 +533,23 @@ async def _complete_storybook_image_assets(
) -> AssetCompletionResult:
"""Complete missing cover/page images for a persisted storybook."""
pages_data = _get_storybook_pages_data(story)
has_image_prompt = bool(story.cover_prompt) or any(
page.get("image_prompt") for page in pages_data
)
if not has_image_prompt:
raise HTTPException(status_code=400, detail="Storybook has no image prompts")
sync_story_status(story, image_status=StoryAssetStatus.GENERATING)
await db.commit()
await _stop_if_job_cancel_requested(db, job=job, story=story)
await _record_job_event_if_present(
db,
job=job,
story_id=story.id,
event_type="storybook_images_started",
status="running",
message="Storybook missing image completion started.",
metadata={"asset": "image"},
)
cover_failed = False
failed_pages: list[int] = []
completed_pages: list[int] = []
if story.cover_prompt and not story.image_url:
await _stop_if_job_cancel_requested(db, job=job, story=story)
try:
story.image_url = await generate_image(
story.cover_prompt,
db=db,
user_id=story.user_id,
generation_job=job,
story_id=story.id,
)
await _record_job_event_if_present(
db,
job=job,
story_id=story.id,
event_type="storybook_cover_image_succeeded",
status="succeeded",
message="Storybook cover image was generated.",
metadata={"asset": "image", "scope": "cover"},
)
except Exception as exc:
cover_failed = True
logger.warning(
"storybook_cover_asset_completion_failed",
story_id=story.id,
error=str(exc),
)
await _record_job_event_if_present(
db,
job=job,
story_id=story.id,
event_type="storybook_cover_image_failed",
status="failed",
message="Storybook cover image generation failed.",
metadata={"asset": "image", "scope": "cover", "error": str(exc)},
)
for page in pages_data:
if not page.get("image_prompt") or page.get("image_url"):
continue
await _stop_if_job_cancel_requested(db, job=job, story=story)
try:
page["image_url"] = await generate_image(
page["image_prompt"],
db=db,
user_id=story.user_id,
generation_job=job,
story_id=story.id,
)
page_number = page.get("page_number")
if isinstance(page_number, int):
completed_pages.append(page_number)
await _record_job_event_if_present(
db,
job=job,
story_id=story.id,
event_type="storybook_page_image_succeeded",
status="succeeded",
message="Storybook page image was generated.",
metadata={"asset": "image", "scope": "page", "page_number": page_number},
)
except Exception as exc:
page_number = page.get("page_number")
if isinstance(page_number, int):
failed_pages.append(page_number)
logger.warning(
"storybook_page_asset_completion_failed",
story_id=story.id,
page=page_number,
error=str(exc),
)
await _record_job_event_if_present(
db,
job=job,
story_id=story.id,
event_type="storybook_page_image_failed",
status="failed",
message="Storybook page image generation failed.",
metadata={
"asset": "image",
"scope": "page",
"page_number": page_number,
"error": str(exc),
},
)
story.pages = pages_data
error_message = _build_storybook_error_message(
cover_failed=cover_failed,
failed_pages=failed_pages,
)
image_status = _resolve_storybook_image_status(
generate_images=True,
cover_prompt=story.cover_prompt,
cover_url=story.image_url,
pages_data=pages_data,
)
sync_story_status(
return await complete_storybook_image_assets(
story,
image_status=image_status,
last_error=error_message,
)
await db.commit()
result = AssetCompletionResult(
asset="storybook_images",
status=image_status,
value=story.image_url,
error=error_message,
)
await _record_job_event_if_present(
db,
generate_image_func=generate_image,
job=job,
story_id=story.id,
event_type="storybook_images_completed",
status="failed" if error_message else "succeeded",
message="Storybook image completion finished.",
metadata={
**_asset_result_metadata(result),
"completed_pages": sorted(completed_pages),
"failed_pages": sorted(failed_pages),
},
)
return result
async def _read_cached_audio_asset(story: Story, db: AsyncSession) -> bytes | None:
"""Read cached audio or repair stale audio cache metadata."""
if story.audio_path and audio_cache_exists(story.audio_path):
if story.audio_status != StoryAssetStatus.READY.value:
sync_story_status(story, audio_status=StoryAssetStatus.READY)
await db.commit()
return read_audio_cache(story.audio_path)
if story.audio_path and not audio_cache_exists(story.audio_path):
logger.warning(
"story_audio_cache_missing",
story_id=story.id,
audio_path=story.audio_path,
)
story.audio_path = None
if story.audio_status == StoryAssetStatus.READY.value:
sync_story_status(story, audio_status=StoryAssetStatus.NOT_REQUESTED)
await db.commit()
return None
return await read_cached_audio_asset(
story,
db,
audio_cache_exists_func=audio_cache_exists,
read_audio_cache_func=read_audio_cache,
)
async def _complete_audio_asset(
@@ -817,107 +561,18 @@ async def _complete_audio_asset(
) -> AssetCompletionResult:
"""Complete TTS audio generation through one asset workflow."""
if not story.story_text:
raise HTTPException(status_code=400, detail="Story has no text")
cached_audio = await _read_cached_audio_asset(story, db)
if cached_audio is not None:
result = AssetCompletionResult(
asset="audio",
status=StoryAssetStatus.READY,
value=cached_audio,
blocks_main_result=raise_on_failure,
)
await _record_job_event_if_present(
db,
job=job,
story_id=story.id,
event_type="audio_cache_hit",
status="succeeded",
message="Cached story audio was reused.",
metadata=_asset_result_metadata(result),
)
return result
from app.services.provider_router import text_to_speech
sync_story_status(story, audio_status=StoryAssetStatus.GENERATING)
await db.commit()
await _stop_if_job_cancel_requested(db, job=job, story=story)
await _record_job_event_if_present(
return await complete_audio_asset(
story,
db,
text_to_speech_func=text_to_speech,
audio_cache_exists_func=audio_cache_exists,
read_audio_cache_func=read_audio_cache,
write_story_audio_cache_func=write_story_audio_cache,
raise_on_failure=raise_on_failure,
job=job,
story_id=story.id,
event_type="audio_started",
status="running",
message="Story audio generation started.",
metadata={"asset": "audio"},
)
try:
await _stop_if_job_cancel_requested(db, job=job, story=story)
audio_data = await text_to_speech(
story.story_text,
db=db,
user_id=story.user_id,
generation_job=job,
story_id=story.id,
)
story.audio_path = write_story_audio_cache(story.id, audio_data)
sync_story_status(
story,
audio_status=StoryAssetStatus.READY,
)
await db.commit()
result = AssetCompletionResult(
asset="audio",
status=StoryAssetStatus.READY,
value=audio_data,
blocks_main_result=raise_on_failure,
)
await _record_job_event_if_present(
db,
job=job,
story_id=story.id,
event_type="audio_succeeded",
status="succeeded",
message="Story audio was generated and cached.",
metadata=_asset_result_metadata(result),
)
return result
except Exception as exc:
provider_error = str(exc)
story.audio_path = None
sync_story_status(
story,
audio_status=StoryAssetStatus.FAILED,
last_error=provider_error,
)
await db.commit()
logger.error("audio_generation_failed", story_id=story.id, error=provider_error)
result = AssetCompletionResult(
asset="audio",
status=StoryAssetStatus.FAILED,
error=provider_error,
blocks_main_result=raise_on_failure,
)
await _record_job_event_if_present(
db,
job=job,
story_id=story.id,
event_type="audio_failed",
status="failed",
message="Story audio generation failed.",
metadata=_asset_result_metadata(result),
)
if raise_on_failure:
raise HTTPException(
status_code=500,
detail=f"Audio generation failed: {provider_error}",
) from exc
return result
async def validate_profile_and_universe(
@@ -988,6 +643,13 @@ async def generate_and_save_story(
user_id=user_id,
generation_job=job,
)
validate_story_output(result)
except QualityGateError as exc:
await _record_quality_gate_failure_if_present(db, job=job, error=exc)
raise HTTPException(
status_code=502,
detail="Story generation failed quality checks, please try again.",
) from exc
except Exception as exc:
raise HTTPException(
status_code=502,
@@ -1096,6 +758,10 @@ async def generate_storybook_service(
user_id=user_id,
generation_job=job,
)
validate_storybook_output(storybook)
except QualityGateError as exc:
await _record_quality_gate_failure_if_present(db, job=job, error=exc)
raise HTTPException(status_code=500, detail=f"故事书质量检查失败: {exc}") from exc
except Exception as e:
logger.error("storybook_generation_failed", error=str(e))
raise HTTPException(status_code=500, detail=f"故事书生成失败: {e}")