feat: add generation job cancel and retry queue

This commit is contained in:
2026-04-19 18:45:34 +08:00
parent 6fb128955f
commit b89ca96e4b
18 changed files with 756 additions and 51 deletions

View File

@@ -43,7 +43,10 @@ const outputMode = ref<'full_story' | 'storybook'>('full_story')
const inputData = ref('') const inputData = ref('')
const educationTheme = ref('') const educationTheme = ref('')
const loading = ref(false) const loading = ref(false)
const canceling = ref(false)
const cancelRequested = ref(false)
const error = ref('') const error = ref('')
const activeGenerationJobId = ref<string | null>(null)
// Data // Data
interface ChildProfile { interface ChildProfile {
@@ -110,10 +113,17 @@ interface GenerationAcceptedResponse {
interface GenerationJobDetail { interface GenerationJobDetail {
story_id: number | null story_id: number | null
status: string
current_step: string
is_terminal: boolean is_terminal: boolean
error_message: string | null error_message: string | null
} }
interface GenerationJobActionResponse {
status: string
current_step: string
}
const JOB_POLL_INTERVAL_MS = 1500 const JOB_POLL_INTERVAL_MS = 1500
const JOB_POLL_MAX_ATTEMPTS = 80 const JOB_POLL_MAX_ATTEMPTS = 80
@@ -121,6 +131,9 @@ const JOB_POLL_MAX_ATTEMPTS = 80
function close() { function close() {
emit('update:modelValue', false) emit('update:modelValue', false)
error.value = '' error.value = ''
activeGenerationJobId.value = null
cancelRequested.value = false
canceling.value = false
} }
function sleep(ms: number) { function sleep(ms: number) {
@@ -132,6 +145,9 @@ function sleep(ms: number) {
async function waitForStoryId(jobId: string) { async function waitForStoryId(jobId: string) {
for (let attempt = 0; attempt < JOB_POLL_MAX_ATTEMPTS; attempt += 1) { for (let attempt = 0; attempt < JOB_POLL_MAX_ATTEMPTS; attempt += 1) {
const detail = await api.get<GenerationJobDetail>(`/api/generations/jobs/${jobId}`) const detail = await api.get<GenerationJobDetail>(`/api/generations/jobs/${jobId}`)
if (detail.status === 'canceled' || detail.current_step === 'generation_canceled') {
return null
}
if (detail.story_id) { if (detail.story_id) {
return detail.story_id return detail.story_id
} }
@@ -144,6 +160,27 @@ async function waitForStoryId(jobId: string) {
throw new Error('任务已提交,但主内容落库超时,请稍后到故事库查看最新结果') throw new Error('任务已提交,但主内容落库超时,请稍后到故事库查看最新结果')
} }
async function cancelGenerationJob() {
if (!activeGenerationJobId.value || canceling.value || cancelRequested.value) return
canceling.value = true
error.value = ''
try {
const result = await api.post<GenerationJobActionResponse>(
`/api/generations/jobs/${activeGenerationJobId.value}/cancel`,
)
cancelRequested.value = true
if (result.status === 'canceled' || result.current_step === 'generation_canceled') {
loading.value = false
close()
}
} catch (e) {
error.value = e instanceof Error ? e.message : '取消任务失败'
} finally {
canceling.value = false
}
}
async function fetchProfiles() { async function fetchProfiles() {
if (!userStore.user) return if (!userStore.user) return
profileError.value = '' profileError.value = ''
@@ -192,6 +229,8 @@ async function generateStory() {
} }
loading.value = true loading.value = true
cancelRequested.value = false
activeGenerationJobId.value = null
error.value = '' error.value = ''
try { try {
@@ -211,8 +250,13 @@ async function generateStory() {
if (!jobId) { if (!jobId) {
throw new Error('生成任务已创建,但缺少任务编号') throw new Error('生成任务已创建,但缺少任务编号')
} }
activeGenerationJobId.value = jobId
const storyId = accepted.id ?? await waitForStoryId(jobId) const storyId = accepted.id ?? await waitForStoryId(jobId)
if (storyId === null) {
close()
return
}
close() close()
if (requestedOutputMode.value === 'storybook') { if (requestedOutputMode.value === 'storybook') {
router.push(`/storybook/view/${storyId}`) router.push(`/storybook/view/${storyId}`)
@@ -223,6 +267,8 @@ async function generateStory() {
error.value = e instanceof Error ? e.message : '生成失败' error.value = e instanceof Error ? e.message : '生成失败'
} finally { } finally {
loading.value = false loading.value = false
activeGenerationJobId.value = null
cancelRequested.value = false
} }
} }
</script> </script>
@@ -253,6 +299,22 @@ async function generateStory() {
:title="generationTitle" :title="generationTitle"
:steps="generationSteps" :steps="generationSteps"
/> />
<div
v-if="loading && activeGenerationJobId"
class="fixed bottom-10 z-[110] flex flex-col items-center gap-3"
>
<BaseButton
variant="secondary"
:loading="canceling"
:disabled="cancelRequested"
@click="cancelGenerationJob"
>
{{ cancelRequested ? '正在取消任务...' : '取消任务' }}
</BaseButton>
<p class="text-sm text-white/70">
{{ cancelRequested ? '已提交取消请求,会在安全检查点停止任务。' : '如果是误触发起,可以现在取消后台任务。' }}
</p>
</div>
<!-- 模态框内容 --> <!-- 模态框内容 -->
<div v-else class="relative w-full max-w-2xl max-h-[90vh] overflow-y-auto bg-[#1C2035] border border-gray-700/50 rounded-3xl shadow-2xl p-6 md:p-8"> <div v-else class="relative w-full max-w-2xl max-h-[90vh] overflow-y-auto bg-[#1C2035] border border-gray-700/50 rounded-3xl shadow-2xl p-6 md:p-8">

View File

@@ -14,6 +14,8 @@ interface GenerationJobSummary {
progress_percent: number progress_percent: number
progress_label: string progress_label: string
is_terminal: boolean is_terminal: boolean
can_cancel: boolean
can_retry: boolean
result_snapshot: Record<string, unknown> result_snapshot: Record<string, unknown>
error_message: string | null error_message: string | null
created_at: string created_at: string
@@ -63,6 +65,7 @@ const jobs = ref<GenerationJobSummary[]>([])
const activeJob = ref<GenerationJobDetail | null>(null) const activeJob = ref<GenerationJobDetail | null>(null)
const providerStats = ref<GenerationProviderStats | null>(null) const providerStats = ref<GenerationProviderStats | null>(null)
const loading = ref(false) const loading = ref(false)
const actionLoading = ref(false)
const error = ref('') const error = ref('')
let refreshTimer: ReturnType<typeof setInterval> | null = null let refreshTimer: ReturnType<typeof setInterval> | null = null
@@ -94,6 +97,7 @@ const statusClassMap: Record<string, string> = {
succeeded: 'border-emerald-200 bg-emerald-50 text-emerald-700', succeeded: 'border-emerald-200 bg-emerald-50 text-emerald-700',
completed: 'border-emerald-200 bg-emerald-50 text-emerald-700', completed: 'border-emerald-200 bg-emerald-50 text-emerald-700',
degraded_completed: 'border-orange-200 bg-orange-50 text-orange-700', degraded_completed: 'border-orange-200 bg-orange-50 text-orange-700',
canceled: 'border-slate-200 bg-slate-100 text-slate-700',
failed: 'border-rose-200 bg-rose-50 text-rose-700', failed: 'border-rose-200 bg-rose-50 text-rose-700',
} }
@@ -108,6 +112,7 @@ function statusLabel(status?: string) {
succeeded: '成功', succeeded: '成功',
completed: '已完成', completed: '已完成',
degraded_completed: '降级完成', degraded_completed: '降级完成',
canceled: '已取消',
failed: '失败', failed: '失败',
} }
return labels[status ?? ''] ?? '未知' return labels[status ?? ''] ?? '未知'
@@ -117,6 +122,8 @@ function eventLabel(eventType: string) {
const labels: Record<string, string> = { const labels: Record<string, string> = {
request_accepted: '请求接收', request_accepted: '请求接收',
worker_started: '后台任务开始', worker_started: '后台任务开始',
retry_queued: '重新排队',
cancel_requested: '已请求取消',
context_prepared: '上下文准备', context_prepared: '上下文准备',
narrative_generated: '正文生成', narrative_generated: '正文生成',
story_saved: '故事保存', story_saved: '故事保存',
@@ -137,6 +144,7 @@ function eventLabel(eventType: string) {
asset_retry_started: '资源重试开始', asset_retry_started: '资源重试开始',
asset_retry_completed: '资源重试完成', asset_retry_completed: '资源重试完成',
asset_retry_failed: '资源重试失败', asset_retry_failed: '资源重试失败',
generation_canceled: '任务已取消',
generation_completed: '生成完成', generation_completed: '生成完成',
generation_failed: '生成失败', generation_failed: '生成失败',
} }
@@ -213,6 +221,36 @@ async function refresh() {
} }
} }
async function cancelActiveJob() {
if (!activeJob.value || actionLoading.value) return
actionLoading.value = true
error.value = ''
try {
await api.post(`/api/generations/jobs/${activeJob.value.id}/cancel`)
await refresh()
} catch (e) {
error.value = e instanceof Error ? e.message : '取消任务失败'
} finally {
actionLoading.value = false
}
}
async function retryActiveJob() {
if (!activeJob.value || actionLoading.value) return
actionLoading.value = true
error.value = ''
try {
await api.post(`/api/generations/jobs/${activeJob.value.id}/retry`)
await refresh()
} catch (e) {
error.value = e instanceof Error ? e.message : '重新排队失败'
} finally {
actionLoading.value = false
}
}
function stopAutoRefresh() { function stopAutoRefresh() {
if (refreshTimer) { if (refreshTimer) {
clearInterval(refreshTimer) clearInterval(refreshTimer)
@@ -334,10 +372,30 @@ defineExpose({ refresh })
当前步骤{{ eventLabel(activeJob.current_step) }} 当前步骤{{ eventLabel(activeJob.current_step) }}
</div> </div>
</div> </div>
<div class="flex flex-wrap items-center justify-end gap-2">
<button
v-if="activeJob.can_cancel"
type="button"
class="rounded-full border border-amber-200 bg-amber-50 px-3 py-1 text-xs font-medium text-amber-700 transition hover:bg-amber-100 disabled:cursor-not-allowed disabled:opacity-60"
:disabled="actionLoading"
@click="cancelActiveJob"
>
{{ actionLoading ? '处理中...' : '取消任务' }}
</button>
<button
v-if="activeJob.can_retry"
type="button"
class="rounded-full border border-sky-200 bg-sky-50 px-3 py-1 text-xs font-medium text-sky-700 transition hover:bg-sky-100 disabled:cursor-not-allowed disabled:opacity-60"
:disabled="actionLoading"
@click="retryActiveJob"
>
{{ actionLoading ? '处理中...' : '重新排队' }}
</button>
<span class="rounded-full border px-3 py-1 text-xs font-medium" :class="statusClass(activeJob.status)"> <span class="rounded-full border px-3 py-1 text-xs font-medium" :class="statusClass(activeJob.status)">
{{ statusLabel(activeJob.status) }} {{ statusLabel(activeJob.status) }}
</span> </span>
</div> </div>
</div>
<div> <div>
<div class="mb-1 flex items-center justify-between text-xs" :class="mutedClass"> <div class="mb-1 flex items-center justify-between text-xs" :class="mutedClass">

View File

@@ -40,6 +40,7 @@ from app.services.generation_jobs import (
get_user_generation_ops_summary, get_user_generation_ops_summary,
get_user_provider_analytics, get_user_provider_analytics,
list_story_generation_jobs, list_story_generation_jobs,
request_generation_job_cancel,
) )
from app.services.memory_service import build_enhanced_memory_context from app.services.memory_service import build_enhanced_memory_context
from app.services.provider_router import ( from app.services.provider_router import (
@@ -88,6 +89,32 @@ async def get_generation_job(
return await get_generation_job_detail(db, job_id=job_id, user_id=user.id) return await get_generation_job_detail(db, job_id=job_id, user_id=user.id)
@router.post(
"/generations/jobs/{job_id}/cancel",
response_model=GenerationJobSummaryResponse,
)
async def cancel_generation_job(
job_id: str,
user: User = Depends(require_user),
db: AsyncSession = Depends(get_db),
):
"""Request cancellation for one queued/running generation job."""
return await request_generation_job_cancel(db, job_id=job_id, user_id=user.id)
@router.post(
"/generations/jobs/{job_id}/retry",
response_model=GenerationJobSummaryResponse,
)
async def retry_generation_job(
job_id: str,
user: User = Depends(require_user),
db: AsyncSession = Depends(get_db),
):
"""Queue one new generation job from a failed/canceled terminal job."""
return await story_service.retry_generation_job_service(job_id, user.id, db)
@router.get( @router.get(
"/generations/ops-summary", "/generations/ops-summary",
response_model=GenerationOpsSummaryResponse, response_model=GenerationOpsSummaryResponse,

View File

@@ -195,6 +195,8 @@ class GenerationJobSummaryResponse(BaseModel):
progress_percent: int progress_percent: int
progress_label: str progress_label: str
is_terminal: bool is_terminal: bool
can_cancel: bool = False
can_retry: bool = False
result_snapshot: dict[str, Any] = Field(default_factory=dict) result_snapshot: dict[str, Any] = Field(default_factory=dict)
error_message: str | None = None error_message: str | None = None
created_at: datetime created_at: datetime

View File

@@ -16,6 +16,26 @@ from app.db.models import GenerationJob, GenerationJobEvent, Story
logger = get_logger(__name__) logger = get_logger(__name__)
def _is_terminal_status(status: str) -> bool:
return status in {"completed", "degraded_completed", "failed", "canceled"}
def _job_supports_queue_control(job: GenerationJob) -> bool:
return job.output_mode in {"story", "storybook"}
def generation_job_can_cancel(job: GenerationJob) -> bool:
return (
_job_supports_queue_control(job)
and job.status == "running"
and job.current_step != "cancel_requested"
)
def generation_job_can_retry(job: GenerationJob) -> bool:
return _job_supports_queue_control(job) and job.status in {"failed", "canceled"}
def _story_snapshot(story: Story | None) -> dict[str, Any]: def _story_snapshot(story: Story | None) -> dict[str, Any]:
if story is None: if story is None:
return {} return {}
@@ -50,6 +70,13 @@ def _job_progress(job: GenerationJob) -> dict[str, Any]:
"is_terminal": True, "is_terminal": True,
} }
if job.status == "canceled":
return {
"progress_percent": 100,
"progress_label": "已取消",
"is_terminal": True,
}
if job.status in {"completed", "degraded_completed"}: if job.status in {"completed", "degraded_completed"}:
return { return {
"progress_percent": 100, "progress_percent": 100,
@@ -59,7 +86,9 @@ def _job_progress(job: GenerationJob) -> dict[str, Any]:
progress_map: dict[str, tuple[int, str]] = { progress_map: dict[str, tuple[int, str]] = {
"request_accepted": (5, "已接收请求"), "request_accepted": (5, "已接收请求"),
"retry_queued": (8, "重新排队中"),
"worker_started": (12, "后台任务已开始"), "worker_started": (12, "后台任务已开始"),
"cancel_requested": (15, "已请求取消"),
"context_prepared": (20, "上下文已准备"), "context_prepared": (20, "上下文已准备"),
"narrative_generated": (45, "正文已生成"), "narrative_generated": (45, "正文已生成"),
"story_saved": (60, "主记录已保存"), "story_saved": (60, "主记录已保存"),
@@ -83,6 +112,7 @@ def _job_progress(job: GenerationJob) -> dict[str, Any]:
"postprocessing_queued": (90, "后处理已排队"), "postprocessing_queued": (90, "后处理已排队"),
"asset_generation_completed": (100, "资源已完成"), "asset_generation_completed": (100, "资源已完成"),
"asset_retry_completed": (100, "资源重试完成"), "asset_retry_completed": (100, "资源重试完成"),
"generation_canceled": (100, "任务已取消"),
"generation_completed": (100, "生成完成"), "generation_completed": (100, "生成完成"),
"generation_stale_failed": (100, "任务超时已收敛"), "generation_stale_failed": (100, "任务超时已收敛"),
} }
@@ -106,6 +136,8 @@ def _is_stale_job(job: GenerationJob, *, stale_after_minutes: int) -> bool:
def _failure_label(job: GenerationJob) -> str: def _failure_label(job: GenerationJob) -> str:
if job.status == "canceled":
return "任务已取消"
if job.current_step == "generation_stale_failed": if job.current_step == "generation_stale_failed":
return "任务超时" return "任务超时"
if job.output_mode == "asset_retry": if job.output_mode == "asset_retry":
@@ -196,7 +228,7 @@ async def claim_generation_job_for_worker(
.where( .where(
GenerationJob.id == job_id, GenerationJob.id == job_id,
GenerationJob.status == "running", GenerationJob.status == "running",
GenerationJob.current_step == "request_accepted", GenerationJob.current_step.in_(["request_accepted", "retry_queued"]),
) )
.values(current_step="worker_started") .values(current_step="worker_started")
) )
@@ -283,6 +315,8 @@ def generation_job_to_summary(job: GenerationJob) -> dict[str, Any]:
"status": job.status, "status": job.status,
"current_step": job.current_step, "current_step": job.current_step,
**progress, **progress,
"can_cancel": generation_job_can_cancel(job),
"can_retry": generation_job_can_retry(job),
"result_snapshot": job.result_snapshot or {}, "result_snapshot": job.result_snapshot or {},
"error_message": job.error_message, "error_message": job.error_message,
"created_at": job.created_at, "created_at": job.created_at,
@@ -290,6 +324,88 @@ def generation_job_to_summary(job: GenerationJob) -> dict[str, Any]:
} }
async def get_generation_job_for_user(
db: AsyncSession,
*,
job_id: str,
user_id: str,
) -> GenerationJob:
"""Load one generation job owned by the current user."""
result = await db.execute(
select(GenerationJob).where(
GenerationJob.id == job_id,
GenerationJob.user_id == user_id,
)
)
job = result.scalar_one_or_none()
if job is None:
raise HTTPException(status_code=404, detail="Generation job not found")
return job
async def request_generation_job_cancel(
db: AsyncSession,
*,
job_id: str,
user_id: str,
) -> dict[str, Any]:
"""Request cancellation for one queued/running generation job."""
job = await get_generation_job_for_user(db, job_id=job_id, user_id=user_id)
if not _job_supports_queue_control(job):
raise HTTPException(status_code=409, detail="当前任务不支持取消")
if job.status == "canceled":
return generation_job_to_summary(job)
if _is_terminal_status(job.status):
raise HTTPException(status_code=409, detail="当前任务已终止,无法取消")
if job.current_step == "cancel_requested":
return generation_job_to_summary(job)
if job.current_step in {"request_accepted", "retry_queued"}:
story = None
if job.story_id is not None:
story = (
await db.execute(
select(Story).where(
Story.id == job.story_id,
Story.user_id == job.user_id,
)
)
).scalar_one_or_none()
await finish_generation_job(
db,
job=job,
story=story,
status="canceled",
current_step="generation_canceled",
error_message="Generation canceled by user before worker execution started.",
message="Generation job was canceled before worker execution started.",
)
return generation_job_to_summary(job)
previous_step = job.current_step
job.error_message = "Cancellation requested by user."
await record_generation_event(
db,
job=job,
story_id=job.story_id,
event_type="cancel_requested",
status="running",
message="Cancellation requested; worker will stop at the next safe checkpoint.",
metadata={"requested_from_step": previous_step},
commit=False,
)
await db.commit()
await db.refresh(job)
return generation_job_to_summary(job)
async def get_generation_job_detail( async def get_generation_job_detail(
db: AsyncSession, db: AsyncSession,
*, *,

View File

@@ -37,6 +37,9 @@ from app.services.generation_jobs import (
create_generation_job, create_generation_job,
ensure_no_active_story_generation_job, ensure_no_active_story_generation_job,
finish_generation_job, finish_generation_job,
generation_job_can_retry,
generation_job_to_summary,
get_generation_job_for_user,
record_generation_event, record_generation_event,
) )
from app.services.memory_service import build_enhanced_memory_context from app.services.memory_service import build_enhanced_memory_context
@@ -73,6 +76,10 @@ class AssetCompletionResult:
return self.status == StoryAssetStatus.READY and self.error is None return self.status == StoryAssetStatus.READY and self.error is None
class GenerationJobCanceledError(Exception):
"""Raised when a running worker job has been canceled by the user."""
async def _record_job_event_if_present( async def _record_job_event_if_present(
db: AsyncSession, db: AsyncSession,
*, *,
@@ -99,6 +106,33 @@ async def _record_job_event_if_present(
) )
async def _stop_if_job_cancel_requested(
db: AsyncSession,
*,
job,
story: Story | None = None,
) -> None:
"""Stop a worker-owned job at the next safe checkpoint after cancellation."""
if job is None:
return
await db.refresh(job)
if job.current_step != "cancel_requested":
return
await finish_generation_job(
db,
job=job,
story=story,
status="canceled",
current_step="generation_canceled",
error_message="Generation canceled by user.",
message="Generation job was canceled after a user request.",
)
raise GenerationJobCanceledError()
def _asset_result_metadata(result: AssetCompletionResult) -> dict: def _asset_result_metadata(result: AssetCompletionResult) -> dict:
"""Build JSON-safe metadata for asset workflow events.""" """Build JSON-safe metadata for asset workflow events."""
@@ -192,6 +226,7 @@ async def _prepare_generation_context(
"has_memory_context": bool(memory_context), "has_memory_context": bool(memory_context),
}, },
) )
await _stop_if_job_cancel_requested(db, job=job)
return resolved_profile_id, resolved_universe_id, memory_context return resolved_profile_id, resolved_universe_id, memory_context
@@ -318,6 +353,7 @@ async def _generate_storybook_image_assets(
] ]
logger.info("storybook_parallel_generation_start", page_count=len(storybook.pages)) logger.info("storybook_parallel_generation_start", page_count=len(storybook.pages))
await _stop_if_job_cancel_requested(db, job=job)
await _record_job_event_if_present( await _record_job_event_if_present(
db, db,
job=job, job=job,
@@ -334,6 +370,7 @@ async def _generate_storybook_image_assets(
nonlocal cover_failed nonlocal cover_failed
if storybook.cover_prompt and not storybook.cover_url: if storybook.cover_prompt and not storybook.cover_url:
await _stop_if_job_cancel_requested(db, job=job)
try: try:
return await generate_image( return await generate_image(
storybook.cover_prompt, storybook.cover_prompt,
@@ -350,6 +387,7 @@ async def _generate_storybook_image_assets(
if not page.image_prompt or page.image_url: if not page.image_prompt or page.image_url:
return return
await _stop_if_job_cancel_requested(db, job=job)
try: try:
page.image_url = await generate_image( page.image_url = await generate_image(
page.image_prompt, page.image_prompt,
@@ -506,6 +544,7 @@ async def _complete_cover_image_asset(
sync_story_status(story, image_status=StoryAssetStatus.GENERATING) sync_story_status(story, image_status=StoryAssetStatus.GENERATING)
await db.commit() await db.commit()
await _stop_if_job_cancel_requested(db, job=job, story=story)
await _record_job_event_if_present( await _record_job_event_if_present(
db, db,
job=job, job=job,
@@ -517,6 +556,7 @@ async def _complete_cover_image_asset(
) )
try: try:
await _stop_if_job_cancel_requested(db, job=job, story=story)
image_url = await generate_image( image_url = await generate_image(
story.cover_prompt, story.cover_prompt,
db=db, db=db,
@@ -605,6 +645,7 @@ async def _complete_storybook_image_assets(
sync_story_status(story, image_status=StoryAssetStatus.GENERATING) sync_story_status(story, image_status=StoryAssetStatus.GENERATING)
await db.commit() await db.commit()
await _stop_if_job_cancel_requested(db, job=job, story=story)
await _record_job_event_if_present( await _record_job_event_if_present(
db, db,
job=job, job=job,
@@ -620,6 +661,7 @@ async def _complete_storybook_image_assets(
completed_pages: list[int] = [] completed_pages: list[int] = []
if story.cover_prompt and not story.image_url: if story.cover_prompt and not story.image_url:
await _stop_if_job_cancel_requested(db, job=job, story=story)
try: try:
story.image_url = await generate_image( story.image_url = await generate_image(
story.cover_prompt, story.cover_prompt,
@@ -658,6 +700,7 @@ async def _complete_storybook_image_assets(
if not page.get("image_prompt") or page.get("image_url"): if not page.get("image_prompt") or page.get("image_url"):
continue continue
await _stop_if_job_cancel_requested(db, job=job, story=story)
try: try:
page["image_url"] = await generate_image( page["image_url"] = await generate_image(
page["image_prompt"], page["image_prompt"],
@@ -800,6 +843,7 @@ async def _complete_audio_asset(
sync_story_status(story, audio_status=StoryAssetStatus.GENERATING) sync_story_status(story, audio_status=StoryAssetStatus.GENERATING)
await db.commit() await db.commit()
await _stop_if_job_cancel_requested(db, job=job, story=story)
await _record_job_event_if_present( await _record_job_event_if_present(
db, db,
job=job, job=job,
@@ -811,6 +855,7 @@ async def _complete_audio_asset(
) )
try: try:
await _stop_if_job_cancel_requested(db, job=job, story=story)
audio_data = await text_to_speech( audio_data = await text_to_speech(
story.story_text, story.story_text,
db=db, db=db,
@@ -933,6 +978,7 @@ async def generate_and_save_story(
) )
try: try:
await _stop_if_job_cancel_requested(db, job=job)
result = await generate_story_content( result = await generate_story_content(
input_type=request.type, input_type=request.type,
data=request.data, data=request.data,
@@ -955,8 +1001,9 @@ async def generate_and_save_story(
message="Story narrative was generated.", message="Story narrative was generated.",
metadata={"mode": result.mode, "title": result.title}, metadata={"mode": result.mode, "title": result.title},
) )
await _stop_if_job_cancel_requested(db, job=job)
return await _persist_text_story_result( story = await _persist_text_story_result(
result=result, result=result,
user_id=user_id, user_id=user_id,
profile_id=profile_id, profile_id=profile_id,
@@ -964,6 +1011,8 @@ async def generate_and_save_story(
db=db, db=db,
job=job, job=job,
) )
await _stop_if_job_cancel_requested(db, job=job, story=story)
return story
async def generate_full_story_service( async def generate_full_story_service(
@@ -975,6 +1024,7 @@ async def generate_full_story_service(
) -> FullStoryResponse: ) -> FullStoryResponse:
"""Generate story with parallel image generation.""" """Generate story with parallel image generation."""
story = await generate_and_save_story(request, user_id, db, job=job) story = await generate_and_save_story(request, user_id, db, job=job)
await _stop_if_job_cancel_requested(db, job=job, story=story)
image_url: str | None = None image_url: str | None = None
errors: dict[str, str | None] = {} errors: dict[str, str | None] = {}
@@ -1036,6 +1086,7 @@ async def generate_storybook_service(
) )
try: try:
await _stop_if_job_cancel_requested(db, job=job)
storybook = await generate_storybook( storybook = await generate_storybook(
keywords=request.keywords, keywords=request.keywords,
page_count=request.page_count, page_count=request.page_count,
@@ -1060,12 +1111,14 @@ async def generate_storybook_service(
"page_count": len(storybook.pages), "page_count": len(storybook.pages),
}, },
) )
await _stop_if_job_cancel_requested(db, job=job)
final_cover_url = storybook.cover_url final_cover_url = storybook.cover_url
cover_failed = False cover_failed = False
failed_pages: list[int] = [] failed_pages: list[int] = []
if request.generate_images: if request.generate_images:
await _stop_if_job_cancel_requested(db, job=job)
( (
final_cover_url, final_cover_url,
cover_failed, cover_failed,
@@ -1089,6 +1142,7 @@ async def generate_storybook_service(
db=db, db=db,
job=job, job=job,
) )
await _stop_if_job_cancel_requested(db, job=job, story=story)
response_pages = _storybook_pages_to_response(pages_data) response_pages = _storybook_pages_to_response(pages_data)
@@ -1124,6 +1178,18 @@ async def generate_generation_service(
request_payload=request.model_dump(mode="json"), request_payload=request.model_dump(mode="json"),
) )
await _dispatch_generation_job(db, job=job)
return _build_queued_generation_response(request, job_id=job.id)
async def _dispatch_generation_job(
db: AsyncSession,
*,
job: GenerationJob,
) -> None:
"""Dispatch one accepted generation job to the background worker."""
try: try:
from app.tasks.generation_workflow import run_generation_workflow_task from app.tasks.generation_workflow import run_generation_workflow_task
@@ -1144,8 +1210,6 @@ async def generate_generation_service(
detail="后台生成任务派发失败,请确认 worker 可用后重试。", detail="后台生成任务派发失败,请确认 worker 可用后重试。",
) from exc ) from exc
return _build_queued_generation_response(request, job_id=job.id)
def _build_queued_generation_response( def _build_queued_generation_response(
request: GenerationRequest, request: GenerationRequest,
@@ -1184,6 +1248,8 @@ async def execute_generation_job_service(
db, db,
job=job, job=job,
) )
except GenerationJobCanceledError:
return _build_canceled_generation_response(job)
except HTTPException as exc: except HTTPException as exc:
await finish_generation_job( await finish_generation_job(
db, db,
@@ -1210,6 +1276,24 @@ async def execute_generation_job_service(
return response return response
def _build_canceled_generation_response(job: GenerationJob) -> GenerationResponse:
"""Build a compact response for a worker job that ended as canceled."""
snapshot = job.result_snapshot or {}
return GenerationResponse(
id=snapshot.get("story_id"),
generation_job_id=job.id,
title="生成任务已取消",
mode="storybook" if job.output_mode == "storybook" else "generated",
generation_status=str(snapshot.get("generation_status") or "failed"),
text_status=str(snapshot.get("text_status") or "failed"),
image_status=str(snapshot.get("image_status") or "not_requested"),
audio_status=str(snapshot.get("audio_status") or "not_requested"),
last_error=str(snapshot.get("last_error") or "Generation canceled by user."),
retryable_assets=list(snapshot.get("retryable_assets") or []),
)
async def run_generation_job_service( async def run_generation_job_service(
job_id: str, job_id: str,
db: AsyncSession, db: AsyncSession,
@@ -1225,6 +1309,46 @@ async def run_generation_job_service(
return job return job
async def retry_generation_job_service(
job_id: str,
user_id: str,
db: AsyncSession,
) -> dict:
"""Clone one failed/canceled generation job and queue it again."""
source_job = await get_generation_job_for_user(db, job_id=job_id, user_id=user_id)
if not generation_job_can_retry(source_job):
raise HTTPException(status_code=409, detail="当前任务还不能重新排队")
if source_job.story_id is not None:
await ensure_no_active_story_generation_job(
db,
story_id=source_job.story_id,
user_id=user_id,
)
retry_job = await create_generation_job(
db,
user_id=user_id,
output_mode=source_job.output_mode,
input_type=source_job.input_type,
request_payload=source_job.request_payload or {},
story_id=source_job.story_id,
)
await record_generation_event(
db,
job=retry_job,
story_id=retry_job.story_id,
event_type="retry_queued",
status="queued",
message="Retry job accepted from a previous terminal generation.",
metadata={"source_job_id": source_job.id},
)
await _dispatch_generation_job(db, job=retry_job)
await db.refresh(retry_job)
return generation_job_to_summary(retry_job)
async def _generate_generation_service_with_job( async def _generate_generation_service_with_job(
request: GenerationRequest, request: GenerationRequest,
user_id: str, user_id: str,

View File

@@ -15,6 +15,8 @@ from app.services.adapters.storybook.primary import Storybook, StorybookPage
from app.services.adapters.text.models import StoryOutput from app.services.adapters.text.models import StoryOutput
from app.services.generation_jobs import ( from app.services.generation_jobs import (
create_generation_job, create_generation_job,
finish_generation_job,
get_generation_job_detail,
mark_stale_generation_jobs, mark_stale_generation_jobs,
record_generation_event, record_generation_event,
) )
@@ -847,3 +849,187 @@ async def test_retry_assets_rejects_when_story_has_active_job(
assert "已有运行中的任务" in response.json()["detail"] assert "已有运行中的任务" in response.json()["detail"]
finally: finally:
app.dependency_overrides.clear() app.dependency_overrides.clear()
async def test_cancel_queued_generation_job_marks_it_canceled(
db_session,
auth_token,
test_user,
):
async def override_get_db():
yield db_session
app.dependency_overrides[get_db] = override_get_db
transport = ASGITransport(app=app)
task_delay_path = "app.tasks.generation_workflow.run_generation_workflow_task.delay"
try:
with patch(task_delay_path) as mock_delay:
async with AsyncClient(transport=transport, base_url="http://test") as client:
client.cookies.set("access_token", auth_token)
response = await client.post(
"/api/generations",
json={
"output_mode": "story",
"type": "keywords",
"data": "小狐狸, 月亮船",
"generate_images": False,
},
)
assert response.status_code == 202
job_id = response.json()["generation_job_id"]
mock_delay.assert_called_once_with(job_id)
cancel_response = await client.post(f"/api/generations/jobs/{job_id}/cancel")
assert cancel_response.status_code == 200
canceled_job = cancel_response.json()
assert canceled_job["status"] == "canceled"
assert canceled_job["current_step"] == "generation_canceled"
assert canceled_job["can_cancel"] is False
assert canceled_job["can_retry"] is True
detail = await get_generation_job_detail(
db_session,
job_id=job_id,
user_id=test_user.id,
)
assert [event["event_type"] for event in detail["events"]] == [
"request_accepted",
"generation_canceled",
]
finally:
app.dependency_overrides.clear()
async def test_cancel_running_generation_job_marks_cancel_requested(
db_session,
auth_token,
test_user,
):
async def override_get_db():
yield db_session
app.dependency_overrides[get_db] = override_get_db
transport = ASGITransport(app=app)
job = await create_generation_job(
db_session,
user_id=test_user.id,
output_mode="story",
input_type="keywords",
request_payload={
"output_mode": "story",
"type": "keywords",
"data": "小熊, 森林",
"generate_images": False,
},
)
await record_generation_event(
db_session,
job=job,
event_type="worker_started",
status="running",
message="Generation worker started processing the accepted request.",
)
try:
async with AsyncClient(transport=transport, base_url="http://test") as client:
client.cookies.set("access_token", auth_token)
response = await client.post(f"/api/generations/jobs/{job.id}/cancel")
assert response.status_code == 200
data = response.json()
assert data["status"] == "running"
assert data["current_step"] == "cancel_requested"
assert data["can_cancel"] is False
assert data["can_retry"] is False
refreshed_job = (
await db_session.execute(select(GenerationJob).where(GenerationJob.id == job.id))
).scalar_one()
assert refreshed_job.current_step == "cancel_requested"
assert refreshed_job.error_message == "Cancellation requested by user."
finally:
app.dependency_overrides.clear()
async def test_retry_failed_generation_job_requeues_new_worker_job(
db_session,
auth_token,
test_user,
mock_text_provider,
):
async def override_get_db():
yield db_session
app.dependency_overrides[get_db] = override_get_db
transport = ASGITransport(app=app)
task_delay_path = "app.tasks.generation_workflow.run_generation_workflow_task.delay"
failed_job = await create_generation_job(
db_session,
user_id=test_user.id,
output_mode="story",
input_type="keywords",
request_payload={
"output_mode": "story",
"type": "keywords",
"data": "小鹿, 星星",
"generate_images": False,
},
)
await finish_generation_job(
db_session,
job=failed_job,
story=None,
status="failed",
current_step="generation_failed",
error_message="upstream timeout",
message="Generation failed before a durable story result was available.",
)
try:
with patch(task_delay_path) as mock_delay:
async with AsyncClient(transport=transport, base_url="http://test") as client:
client.cookies.set("access_token", auth_token)
response = await client.post(f"/api/generations/jobs/{failed_job.id}/retry")
assert response.status_code == 200
data = response.json()
assert data["id"] != failed_job.id
assert data["status"] == "running"
assert data["current_step"] == "retry_queued"
assert data["can_cancel"] is True
assert data["can_retry"] is False
mock_delay.assert_called_once_with(data["id"])
retried_job_id = data["id"]
await run_generation_job_service(retried_job_id, db_session)
retried_job = (
await db_session.execute(
select(GenerationJob).where(GenerationJob.id == retried_job_id)
)
).scalar_one()
assert retried_job.status == "completed"
assert retried_job.current_step == "generation_completed"
events = (
await db_session.execute(
select(GenerationJobEvent)
.where(GenerationJobEvent.job_id == retried_job_id)
.order_by(GenerationJobEvent.id)
)
).scalars().all()
assert [event.event_type for event in events[:3]] == [
"request_accepted",
"retry_queued",
"worker_started",
]
finally:
app.dependency_overrides.clear()

View File

@@ -126,7 +126,7 @@ DreamWeaver 是面向 3-8 岁亲子场景的个性化 AI 绘本与陪伴式讲
### 2:20 - 3:00 取舍与下一步 ### 2:20 - 3:00 取舍与下一步
求职版优先稳定闭环和可解释性,不做支付、多租户和复杂监控。现在 job/event 已能查询 workflow、资产补全、provider 调用轨迹和聚合指标,用户端和管理端也能展示生成轨迹与跨故事 Provider 运营摘要;下一步会迁移到后台 worker。 求职版优先稳定闭环和可解释性,不做支付、多租户和复杂监控。现在 job/event 已能查询 workflow、资产补全、provider 调用轨迹和聚合指标,用户端和管理端也能展示生成轨迹与跨故事 Provider 运营摘要;统一生成也已经迁移到后台 worker,下一步是补取消/重试队列
--- ---

View File

@@ -51,7 +51,7 @@ SMOKE_AUDIO=1 ./scripts/demo_smoke.sh
- **AI 不确定性处理**:主内容和资产拆开,图片/音频失败不阻塞阅读。 - **AI 不确定性处理**:主内容和资产拆开,图片/音频失败不阻塞阅读。
- **Provider 产品化**:用户看到稳定能力,系统内部用 Capability / Provider / Adapter / Routing Policy 管供应链。 - **Provider 产品化**:用户看到稳定能力,系统内部用 Capability / Provider / Adapter / Routing Policy 管供应链。
- **可观测性**generation job/event 让生成过程、失败恢复和 Provider 成本可解释。 - **可观测性**generation job/event 让生成过程、失败恢复和 Provider 成本可解释。
- **可继续生产化**前端已有轮询形态,后端已有任务事件模型,下一步可以迁移到 worker - **可继续生产化**统一生成已经迁移到 worker前端轮询和任务事件模型也已打通下一步是补取消/重试队列和更完整监控
--- ---
@@ -63,4 +63,4 @@ SMOKE_AUDIO=1 ./scripts/demo_smoke.sh
| 图片生成失败 | 展示 `degraded_completed` 与资源重试 | | 图片生成失败 | 展示 `degraded_completed` 与资源重试 |
| Docker 冷启动慢 | 演示前先跑 smoke 并保持容器运行 | | Docker 冷启动慢 | 演示前先跑 smoke 并保持容器运行 |
| Provider 追问过深 | 回到 Capability / Provider / Adapter / Routing Policy 四层解释 | | Provider 追问过深 | 回到 Capability / Provider / Adapter / Routing Policy 四层解释 |
| 生产化追问 | 说明下一步是 worker 化、监控告警、密钥治理和 Provider analytics 扩展 | | 生产化追问 | 说明下一步是取消/重试队列、监控告警、密钥治理和 Provider analytics 扩展 |

View File

@@ -83,4 +83,4 @@ AI 生成产品最大的问题不是“能不能调模型”,而是结果不
### 这个项目下一步怎么上线? ### 这个项目下一步怎么上线?
会先把当前轻量 job/event 模型迁移到后台 worker 和进度轮询,再补跨时间窗口的 provider 运营分析。生产上线前还需要补真实用户鉴权配置、密钥管理、监控告警和部署策略。 已经把当前轻量 job/event 模型迁移到后台 worker,并打通了前端进度轮询;下一步会补取消/重试队列,再继续扩展跨时间窗口和跨用户维度的 provider 运营分析。生产上线前还需要补真实用户鉴权配置、密钥管理、监控告警和部署策略。

View File

@@ -71,6 +71,7 @@ Week 2 已完成演示闭环、统一生成工作流、generation job/event、
| W4-08 | Ops | 任务运行概览与失败摘要 | `GET /api/generations/ops-summary` + 最近失败列表 | P1 | Done | | W4-08 | Ops | 任务运行概览与失败摘要 | `GET /api/generations/ops-summary` + 最近失败列表 | P1 | Done |
| W4-09 | Workflow | 卡住任务自动收敛 | `GENERATION_JOB_STALE_MINUTES` + Celery beat stale job maintenance | P1 | Done | | W4-09 | Workflow | 卡住任务自动收敛 | `GENERATION_JOB_STALE_MINUTES` + Celery beat stale job maintenance | P1 | Done |
| W4-10 | Workflow | 防止重复资产任务 | 运行中故事拒绝重复封面/音频/资产重试请求 | P1 | Done | | W4-10 | Workflow | 防止重复资产任务 | 运行中故事拒绝重复封面/音频/资产重试请求 | P1 | Done |
| W4-11 | Workflow | 生成任务取消与重新排队 | 取消已提交任务,失败/取消任务可重新排队 | P1 | Done |
--- ---

View File

@@ -18,6 +18,11 @@
- 时间线能展示阅读记录与记忆沉淀 - 时间线能展示阅读记录与记忆沉淀
- Week 4 已补齐绘本阅读位置恢复。 - Week 4 已补齐绘本阅读位置恢复。
- Week 4 已输出架构说明和 Demo 包装文档。 - Week 4 已输出架构说明和 Demo 包装文档。
- 生产化主线已继续推进:
- `POST /api/generations` 已迁移到后台 worker
- 创建弹窗会先拿到 `generation_job_id`,再轮询主记录落库
- 统一生成链路的 smoke、测试和前端构建已跟进到异步语义
- 首版取消/重试队列已落地,支持取消已提交任务和从失败/取消任务重新排队
--- ---
@@ -43,7 +48,7 @@ DreamWeaver 已经具备求职演示所需的完整闭环:
最近一轮验证包括: 最近一轮验证包括:
- 后端全量测试91 passed - 后端全量测试94 passed
- 后端 ruff通过 - 后端 ruff通过
- 用户端生产构建:通过 - 用户端生产构建:通过
- 管理端生产构建:通过 - 管理端生产构建:通过
@@ -56,10 +61,9 @@ DreamWeaver 已经具备求职演示所需的完整闭环:
| Priority | Task | Why | | Priority | Task | Why |
| --- | --- | --- | | --- | --- | --- |
| P0 | 将同步生成迁移到 Celery worker | 支持真实长任务、断点恢复和后台进度 | | P0 | 跨用户 / 跨环境 Provider dashboard | 当前已支持单用户摘要,后续要支持运营视角 |
| P1 | 生成任务取消与重试队列 | 防止重复任务和用户误触造成浪费 |
| P1 | 跨用户 / 跨环境 Provider dashboard | 当前已支持单用户摘要,后续要支持运营视角 |
| P1 | 监控告警与结构化 dashboard | 目前已有故事库级概览,后续要接入更完整观测体系 | | P1 | 监控告警与结构化 dashboard | 目前已有故事库级概览,后续要接入更完整观测体系 |
| P1 | 断点续跑与更细粒度任务控制 | 让取消、重试和 worker 恢复更稳 |
| P2 | 更细粒度叙事风格与音色策略 | 扩展体验,但不影响当前求职版主线 | | P2 | 更细粒度叙事风格与音色策略 | 扩展体验,但不影响当前求职版主线 |
--- ---

View File

@@ -68,21 +68,25 @@ DreamWeaver 当前同时支持普通故事生成、完整故事生成和绘本
- 已新增任务运行概览 `GET /api/generations/ops-summary`,故事库可展示最近失败、运行中任务和超时待收敛任务 - 已新增任务运行概览 `GET /api/generations/ops-summary`,故事库可展示最近失败、运行中任务和超时待收敛任务
- 重复资产任务已加入保护:同一故事存在运行中 job 时,不再重复触发封面、音频或统一资产重试 - 重复资产任务已加入保护:同一故事存在运行中 job 时,不再重复触发封面、音频或统一资产重试
- Celery beat 已支持定时收敛卡住的 generation job避免任务长期停在 running - Celery beat 已支持定时收敛卡住的 generation job避免任务长期停在 running
- 用户端与管理端生成轨迹组件会在任务未终止时自动轮询,为后续后台 worker 进度流保留前端形态 - 用户端与管理端生成轨迹组件会在任务未终止时自动轮询,已经可直接消费后台 worker 进度流
- `POST /api/generations` 响应已返回 `generation_job_id`smoke 脚本会验证 job 查询与 story job history - `POST /api/generations` 响应已返回 `generation_job_id`smoke 脚本会验证 job 查询与 story job history
- 用户端与管理端的故事详情页、绘本阅读页已接入生成轨迹,展示生成/重试任务、关键事件、Provider 调用结果和聚合指标 - 用户端与管理端的故事详情页、绘本阅读页已接入生成轨迹,展示生成/重试任务、关键事件、Provider 调用结果和聚合指标
- 故事详情页封面补全已切换到统一资产重试入口 - 故事详情页封面补全已切换到统一资产重试入口
- 管理端前端构建阻塞已修复,主前端与 admin 前端均可完成生产构建 - 管理端前端构建阻塞已修复,主前端与 admin 前端均可完成生产构建
- 已补首版生成任务控制能力:
- `POST /api/generations/jobs/{job_id}/cancel`
- `POST /api/generations/jobs/{job_id}/retry`
- 创建弹窗与生成轨迹都可触发取消或重新排队
### Remaining Production Work ### Remaining Production Work
- 普通故事、完整生成、绘本生成已有统一外部入口,内部 workflow 仍可继续减少兼容层分支 - 普通故事、完整生成、绘本生成已有统一外部入口,内部 workflow 仍可继续减少兼容层分支
- 统一资产重试入口已覆盖普通故事封面、绘本缺失插图和故事音频,后续可继续扩展更细的资产级审计 - 统一资产重试入口已覆盖普通故事封面、绘本缺失插图和故事音频,后续可继续扩展更细的资产级审计
- 后台异步 worker 执行、断点续跑、跨用户/跨环境 Provider 分析,以及真正的取消/重试队列仍属于后续生产化增强 - 断点续跑、跨用户/跨环境 Provider 分析,以及更细粒度的任务控制策略仍属于后续生产化增强
### What This Means ### What This Means
这份 PRD 仍然保留目标态设计,但主干能力已经可在当前代码中演示。当前最适合的继续方式,是继续把同步请求迁移到后台 worker把当前首版运营摘要扩展为可筛选、可对比的分析视角,而不是继续扩大功能范围。 这份 PRD 仍然保留目标态设计,但主干能力已经可在当前代码中演示。当前最适合的继续方式,是在已落地的 worker 化与任务控制基础上,把当前首版运营摘要扩展为可筛选、可对比的分析视角,并逐步补断点续跑和更完整监控,而不是继续扩大功能范围。
--- ---
@@ -93,13 +97,13 @@ DreamWeaver 当前同时支持普通故事生成、完整故事生成和绘本
DreamWeaver 当前存在以下工作流层面问题: DreamWeaver 当前存在以下工作流层面问题:
1. **生成入口已建立,内部路径正在收束** 1. **生成入口已建立,内部路径正在收束**
当前前端已切到 `/api/generations`,旧的 `/api/stories/generate``/api/stories/generate/full``/api/storybook/generate` 仍作为兼容入口保留。service 内部已抽取上下文准备、主记录保存、封面补全、绘本插图补全和音频补全 helper并用 `AssetCompletionResult` 表达资产补全结果。generation job/event 已落库并可查询Provider 调用轨迹、单故事聚合指标和跨故事运营摘要也已进入用户端与管理端展示。下一步重点是为后台异步 worker 复用这些事件 当前前端已切到 `/api/generations`,旧的 `/api/stories/generate``/api/stories/generate/full``/api/storybook/generate` 仍作为兼容入口保留。service 内部已抽取上下文准备、主记录保存、封面补全、绘本插图补全和音频补全 helper并用 `AssetCompletionResult` 表达资产补全结果。generation job/event 已落库并可查询Provider 调用轨迹、单故事聚合指标和跨故事运营摘要也已进入用户端与管理端展示;统一生成请求现在已经交给后台 worker 执行。下一步重点是把取消/重试队列也接到这套事件模型上
2. **保存与资产补全过程正在统一** 2. **保存与资产补全过程正在统一**
文本故事和绘本已拥有更清晰的主记录保存 helper普通故事封面、绘本缺失插图、故事音频生成/缓存已共用各自的 asset completion helper。服务层已经能表达资产任务结果并会把统一入口、资产重试、绘本逐页插图和音频生成的关键节点写入 job event。 文本故事和绘本已拥有更清晰的主记录保存 helper普通故事封面、绘本缺失插图、故事音频生成/缓存已共用各自的 asset completion helper。服务层已经能表达资产任务结果并会把统一入口、资产重试、绘本逐页插图和音频生成的关键节点写入 job event。
3. **状态表达已基本统一,仍需生产化扩展** 3. **状态表达已基本统一,仍需生产化扩展**
当前已经能用 `generation_status``text_status``image_status``audio_status``retryable_assets` 表达生成中、部分可读、完成、降级完成、失败和可重试。后续重点是让后台 worker、运营分析和通知系统复用同一套状态语义。 当前已经能用 `generation_status``text_status``image_status``audio_status``retryable_assets` 表达生成中、部分可读、完成、降级完成、失败和可重试。后续重点是让取消请求、重新排队、运营分析和通知系统复用同一套状态语义。
4. **失败处理策略不统一** 4. **失败处理策略不统一**
图片、音频、绘本页生成失败时,系统没有统一的降级定义,用户体验和技术行为都不够稳定。 图片、音频、绘本页生成失败时,系统没有统一的降级定义,用户体验和技术行为都不够稳定。

View File

@@ -100,8 +100,7 @@ flowchart LR
当前仍是求职版 MVP不引入复杂工作流引擎。下一步生产化优先级 当前仍是求职版 MVP不引入复杂工作流引擎。下一步生产化优先级
1. 把同步生成迁移到后台 worker 1. 补齐生成任务取消与重新排队能力,减少误触和重复消耗
2. 基于现有 job 查询和前端轮询展真实异步进度。 2. 基于现有 job 查询和前端轮询继续扩展真实异步进度与任务控制
3. 扩展 Provider analytics 的时间窗口、失败原因和跨用户维度。 3. 扩展 Provider analytics 的时间窗口、失败原因和跨用户维度。
4. 为音频缓存增加过期策略和后台清理任务。 4. 继续补充部署、监控告警和密钥治理策略。
5. 补充部署、监控告警和密钥治理策略。

View File

@@ -6,7 +6,7 @@
已新增轻量 `generation_jobs``generation_job_events` 表,但不引入复杂工作流引擎。 已新增轻量 `generation_jobs``generation_job_events` 表,但不引入复杂工作流引擎。
原因是当前 MVP 的生成方式仍然以同步请求为主:后端在一次请求中完成主内容保存,再补全封面、绘本插图或语音。用户最关心的是“这个故事现在能不能读、哪些资产可补全”;系统侧则需要有足够的轨迹说明“这次生成做到了哪一步、哪里失败、哪些资产还能重试”。 原因是当前 MVP 已经进入“请求接收与后台执行分离”的阶段:`POST /api/generations` 先接受请求并返回 `generation_job_id`,再由 Celery worker 完成主内容保存和后续资产补全。用户最关心的是“这个故事现在能不能读、任务跑到哪一步、哪些资产可补全”;系统侧则需要有足够的轨迹说明“这次生成做到了哪一步、哪里失败、是否被取消、哪些资产还能重试”。
因此当前采用轻量落库策略: 因此当前采用轻量落库策略:
@@ -43,22 +43,24 @@ job 响应会返回 `progress_percent`、`progress_label` 和 `is_terminal`
- 音频缓存由 `STORY_AUDIO_CACHE_TTL_DAYS` 控制过期时间Celery beat 会每日清理。 - 音频缓存由 `STORY_AUDIO_CACHE_TTL_DAYS` 控制过期时间Celery beat 会每日清理。
- 生成任务由 `GENERATION_JOB_STALE_MINUTES` 控制卡住阈值Celery beat 会每 30 分钟扫描一次,将超时运行中的任务标记为 `generation_stale_failed` - 生成任务由 `GENERATION_JOB_STALE_MINUTES` 控制卡住阈值Celery beat 会每 30 分钟扫描一次,将超时运行中的任务标记为 `generation_stale_failed`
- 当某个故事已经有运行中的 job 时,封面补全、音频生成和统一资产重试会直接拒绝重复请求,避免用户连点造成重复成本。 - 当某个故事已经有运行中的 job 时,封面补全、音频生成和统一资产重试会直接拒绝重复请求,避免用户连点造成重复成本。
- 统一生成请求已由 Celery worker 执行,前端会先拿到 `generation_job_id`,再轮询 job 详情直到主记录落库或任务终止。
- 当前已支持首版任务控制:队列中的任务可直接取消,运行中的任务可在安全检查点取消,失败或已取消任务可重新排队。
## 什么时候需要落库 job ## 什么当前仍然需要扩展 job 模型
如果续进入真实生产化,需要扩展当前 job/event 模型: 虽然 worker 化已经完成,但如果续进入真实生产化,仍然需要扩展当前 job/event 模型:
- 生成流程改成真正异步,前端需要轮询后台 worker 的实时进度 - 单个故事会产生多次生成尝试,需要对比每次任务的 provider 表现、取消原因、重试原因和资产结果
- 单个故事会产生多次生成尝试,需要对比每次任务的 provider 表现、重试原因和资产结果。
- 需要展示比当前事件更细颗粒度的步骤,例如 prompt 构建、provider 选择依据、provider failover 原因、每次调用 token/图片/语音成本。 - 需要展示比当前事件更细颗粒度的步骤,例如 prompt 构建、provider 选择依据、provider failover 原因、每次调用 token/图片/语音成本。
- 需要按 provider、成本、延迟和失败原因做运营分析。 - 需要按 provider、成本、延迟和失败原因做运营分析。
- 需要继续扩展取消与重试队列的颗粒度,例如更细的中断点、任务依赖和断点续跑策略。
- 需要断点续跑,避免 Worker 重启后丢失中间状态。 - 需要断点续跑,避免 Worker 重启后丢失中间状态。
## 推荐未来扩展 ## 推荐未来扩展
当前已有两层记录,未来可以继续扩展字段和事件颗粒度: 当前已有两层记录,未来可以继续扩展字段和事件颗粒度:
- 将同步生成请求迁移到真正异步 worker 后,继续复用现有 job 查询和前端轮询进度条。 - 继续复用现有 job 查询和前端轮询进度条,为取消请求、重新排队和长任务通知提供统一入口
- 将当前跨故事 provider 指标扩展为跨用户、跨环境和更细颗粒度的失败原因维度分析。 - 将当前跨故事 provider 指标扩展为跨用户、跨环境和更细颗粒度的失败原因维度分析。
## 面试表达 ## 面试表达

View File

@@ -43,7 +43,10 @@ const outputMode = ref<'full_story' | 'storybook'>('full_story')
const inputData = ref('') const inputData = ref('')
const educationTheme = ref('') const educationTheme = ref('')
const loading = ref(false) const loading = ref(false)
const canceling = ref(false)
const cancelRequested = ref(false)
const error = ref('') const error = ref('')
const activeGenerationJobId = ref<string | null>(null)
// Data // Data
interface ChildProfile { interface ChildProfile {
@@ -110,10 +113,17 @@ interface GenerationAcceptedResponse {
interface GenerationJobDetail { interface GenerationJobDetail {
story_id: number | null story_id: number | null
status: string
current_step: string
is_terminal: boolean is_terminal: boolean
error_message: string | null error_message: string | null
} }
interface GenerationJobActionResponse {
status: string
current_step: string
}
const JOB_POLL_INTERVAL_MS = 1500 const JOB_POLL_INTERVAL_MS = 1500
const JOB_POLL_MAX_ATTEMPTS = 80 const JOB_POLL_MAX_ATTEMPTS = 80
@@ -121,6 +131,9 @@ const JOB_POLL_MAX_ATTEMPTS = 80
function close() { function close() {
emit('update:modelValue', false) emit('update:modelValue', false)
error.value = '' error.value = ''
activeGenerationJobId.value = null
cancelRequested.value = false
canceling.value = false
} }
function sleep(ms: number) { function sleep(ms: number) {
@@ -132,6 +145,9 @@ function sleep(ms: number) {
async function waitForStoryId(jobId: string) { async function waitForStoryId(jobId: string) {
for (let attempt = 0; attempt < JOB_POLL_MAX_ATTEMPTS; attempt += 1) { for (let attempt = 0; attempt < JOB_POLL_MAX_ATTEMPTS; attempt += 1) {
const detail = await api.get<GenerationJobDetail>(`/api/generations/jobs/${jobId}`) const detail = await api.get<GenerationJobDetail>(`/api/generations/jobs/${jobId}`)
if (detail.status === 'canceled' || detail.current_step === 'generation_canceled') {
return null
}
if (detail.story_id) { if (detail.story_id) {
return detail.story_id return detail.story_id
} }
@@ -144,6 +160,27 @@ async function waitForStoryId(jobId: string) {
throw new Error('任务已提交,但主内容落库超时,请稍后到故事库查看最新结果') throw new Error('任务已提交,但主内容落库超时,请稍后到故事库查看最新结果')
} }
async function cancelGenerationJob() {
if (!activeGenerationJobId.value || canceling.value || cancelRequested.value) return
canceling.value = true
error.value = ''
try {
const result = await api.post<GenerationJobActionResponse>(
`/api/generations/jobs/${activeGenerationJobId.value}/cancel`,
)
cancelRequested.value = true
if (result.status === 'canceled' || result.current_step === 'generation_canceled') {
loading.value = false
close()
}
} catch (e) {
error.value = e instanceof Error ? e.message : '取消任务失败'
} finally {
canceling.value = false
}
}
async function fetchProfiles() { async function fetchProfiles() {
if (!userStore.user) return if (!userStore.user) return
profileError.value = '' profileError.value = ''
@@ -192,6 +229,8 @@ async function generateStory() {
} }
loading.value = true loading.value = true
cancelRequested.value = false
activeGenerationJobId.value = null
error.value = '' error.value = ''
try { try {
@@ -211,8 +250,13 @@ async function generateStory() {
if (!jobId) { if (!jobId) {
throw new Error('生成任务已创建,但缺少任务编号') throw new Error('生成任务已创建,但缺少任务编号')
} }
activeGenerationJobId.value = jobId
const storyId = accepted.id ?? await waitForStoryId(jobId) const storyId = accepted.id ?? await waitForStoryId(jobId)
if (storyId === null) {
close()
return
}
close() close()
if (requestedOutputMode.value === 'storybook') { if (requestedOutputMode.value === 'storybook') {
router.push(`/storybook/view/${storyId}`) router.push(`/storybook/view/${storyId}`)
@@ -223,6 +267,8 @@ async function generateStory() {
error.value = e instanceof Error ? e.message : '生成失败' error.value = e instanceof Error ? e.message : '生成失败'
} finally { } finally {
loading.value = false loading.value = false
activeGenerationJobId.value = null
cancelRequested.value = false
} }
} }
</script> </script>
@@ -253,6 +299,22 @@ async function generateStory() {
:title="generationTitle" :title="generationTitle"
:steps="generationSteps" :steps="generationSteps"
/> />
<div
v-if="loading && activeGenerationJobId"
class="fixed bottom-10 z-[110] flex flex-col items-center gap-3"
>
<BaseButton
variant="secondary"
:loading="canceling"
:disabled="cancelRequested"
@click="cancelGenerationJob"
>
{{ cancelRequested ? '正在取消任务...' : '取消任务' }}
</BaseButton>
<p class="text-sm text-white/70">
{{ cancelRequested ? '已提交取消请求,会在安全检查点停止任务。' : '如果是误触发起,可以现在取消后台任务。' }}
</p>
</div>
<!-- 模态框内容 --> <!-- 模态框内容 -->
<div v-else class="relative w-full max-w-2xl max-h-[90vh] overflow-y-auto bg-[#1C2035] border border-gray-700/50 rounded-3xl shadow-2xl p-6 md:p-8"> <div v-else class="relative w-full max-w-2xl max-h-[90vh] overflow-y-auto bg-[#1C2035] border border-gray-700/50 rounded-3xl shadow-2xl p-6 md:p-8">

View File

@@ -28,6 +28,7 @@ const jobHistory = ref<GenerationJobSummary[]>([])
const activeJob = ref<GenerationJobDetail | null>(null) const activeJob = ref<GenerationJobDetail | null>(null)
const providerStats = ref<GenerationProviderStats | null>(null) const providerStats = ref<GenerationProviderStats | null>(null)
const loading = ref(false) const loading = ref(false)
const actionLoading = ref(false)
const error = ref('') const error = ref('')
let refreshTimer: ReturnType<typeof setInterval> | null = null let refreshTimer: ReturnType<typeof setInterval> | null = null
@@ -79,6 +80,7 @@ const jobStatusClassMap: Record<string, string> = {
succeeded: 'border-emerald-200 bg-emerald-50 text-emerald-700', succeeded: 'border-emerald-200 bg-emerald-50 text-emerald-700',
completed: 'border-emerald-200 bg-emerald-50 text-emerald-700', completed: 'border-emerald-200 bg-emerald-50 text-emerald-700',
degraded_completed: 'border-orange-200 bg-orange-50 text-orange-700', degraded_completed: 'border-orange-200 bg-orange-50 text-orange-700',
canceled: 'border-slate-200 bg-slate-100 text-slate-700',
failed: 'border-rose-200 bg-rose-50 text-rose-700', failed: 'border-rose-200 bg-rose-50 text-rose-700',
} }
@@ -93,6 +95,7 @@ function getJobStatusLabel(status?: string) {
succeeded: '成功', succeeded: '成功',
completed: '已完成', completed: '已完成',
degraded_completed: '降级完成', degraded_completed: '降级完成',
canceled: '已取消',
failed: '失败', failed: '失败',
} }
return labels[status ?? ''] ?? '未知' return labels[status ?? ''] ?? '未知'
@@ -102,6 +105,8 @@ function getEventLabel(eventType: string) {
const labels: Record<string, string> = { const labels: Record<string, string> = {
request_accepted: '请求接收', request_accepted: '请求接收',
worker_started: '后台任务开始', worker_started: '后台任务开始',
retry_queued: '重新排队',
cancel_requested: '已请求取消',
context_prepared: '上下文准备', context_prepared: '上下文准备',
narrative_generated: '正文生成', narrative_generated: '正文生成',
story_saved: '故事保存', story_saved: '故事保存',
@@ -124,6 +129,7 @@ function getEventLabel(eventType: string) {
asset_retry_started: '资源重试开始', asset_retry_started: '资源重试开始',
asset_retry_completed: '资源重试完成', asset_retry_completed: '资源重试完成',
asset_retry_failed: '资源重试失败', asset_retry_failed: '资源重试失败',
generation_canceled: '任务已取消',
generation_completed: '生成完成', generation_completed: '生成完成',
generation_failed: '生成失败', generation_failed: '生成失败',
asset_generation_completed: '资源生成完成', asset_generation_completed: '资源生成完成',
@@ -202,6 +208,36 @@ async function refresh() {
} }
} }
async function cancelActiveJob() {
if (!activeJob.value || actionLoading.value) return
actionLoading.value = true
error.value = ''
try {
await api.post(`/api/generations/jobs/${activeJob.value.id}/cancel`)
await refresh()
} catch (e) {
error.value = e instanceof Error ? e.message : '取消任务失败'
} finally {
actionLoading.value = false
}
}
async function retryActiveJob() {
if (!activeJob.value || actionLoading.value) return
actionLoading.value = true
error.value = ''
try {
await api.post(`/api/generations/jobs/${activeJob.value.id}/retry`)
await refresh()
} catch (e) {
error.value = e instanceof Error ? e.message : '重新排队失败'
} finally {
actionLoading.value = false
}
}
function stopAutoRefresh() { function stopAutoRefresh() {
if (refreshTimer) { if (refreshTimer) {
clearInterval(refreshTimer) clearInterval(refreshTimer)
@@ -323,10 +359,30 @@ defineExpose({ refresh })
当前步骤{{ getEventLabel(activeJob.current_step) }} 当前步骤{{ getEventLabel(activeJob.current_step) }}
</div> </div>
</div> </div>
<div class="flex flex-wrap items-center justify-end gap-2">
<button
v-if="activeJob.can_cancel"
type="button"
class="rounded-full border border-amber-200 bg-amber-50 px-3 py-1 text-xs font-medium text-amber-700 transition hover:bg-amber-100 disabled:cursor-not-allowed disabled:opacity-60"
:disabled="actionLoading"
@click="cancelActiveJob"
>
{{ actionLoading ? '处理中...' : '取消任务' }}
</button>
<button
v-if="activeJob.can_retry"
type="button"
class="rounded-full border border-sky-200 bg-sky-50 px-3 py-1 text-xs font-medium text-sky-700 transition hover:bg-sky-100 disabled:cursor-not-allowed disabled:opacity-60"
:disabled="actionLoading"
@click="retryActiveJob"
>
{{ actionLoading ? '处理中...' : '重新排队' }}
</button>
<span class="rounded-full border px-3 py-1 text-xs font-medium" :class="getJobStatusClass(activeJob.status)"> <span class="rounded-full border px-3 py-1 text-xs font-medium" :class="getJobStatusClass(activeJob.status)">
{{ getJobStatusLabel(activeJob.status) }} {{ getJobStatusLabel(activeJob.status) }}
</span> </span>
</div> </div>
</div>
<div> <div>
<div class="mb-1 flex items-center justify-between text-xs" :class="mutedTextClass"> <div class="mb-1 flex items-center justify-between text-xs" :class="mutedTextClass">

View File

@@ -8,6 +8,8 @@ export interface GenerationJobSummary {
progress_percent: number progress_percent: number
progress_label: string progress_label: string
is_terminal: boolean is_terminal: boolean
can_cancel: boolean
can_retry: boolean
result_snapshot: Record<string, unknown> result_snapshot: Record<string, unknown>
error_message: string | null error_message: string | null
created_at: string created_at: string