diff --git a/admin-frontend/src/views/AdminProviders.vue b/admin-frontend/src/views/AdminProviders.vue
index dca49aa..6127f63 100644
--- a/admin-frontend/src/views/AdminProviders.vue
+++ b/admin-frontend/src/views/AdminProviders.vue
@@ -29,6 +29,248 @@
+
+
+
+
+
当前环境 Provider 运营摘要
+
+ 跨用户 / 当前环境
+
+
+
+ 这里展示的是当前部署环境内所有生成任务留下的 Provider 调用轨迹,便于运营和排障。
+ 跨环境对比仍需要后续独立汇聚层。
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
活跃用户
+
+ {{ analytics?.user_count ?? 0 }}
+
+
+
+
总调用
+
+ {{ analytics?.total_calls ?? 0 }}
+
+
+
+
成功率
+
+ {{ providerSuccessRate ?? '--' }}%
+
+
+
+
预估成本
+
+ {{ formatCost(analytics?.estimated_cost_usd) }}
+
+
+
+
+
+
+ 正在更新运营摘要...
+
+
+ {{ analyticsError }}
+
+
+
+
+
覆盖故事
+
{{ analytics.story_count }}
+
+
+
覆盖任务
+
{{ analytics.job_count }}
+
+
+
平均耗时
+
{{ formatLatency(analytics.avg_latency_ms) }}
+
+
+
配置中 Provider
+
{{ enabledProviderCount }}/{{ providers.length }}
+
+
+
+
+
+
+
+
Provider 调用明细
+
按能力和 adapter 聚合的当前环境视图
+
+
{{ analyticsProviderRows.length }} 个组合
+
+
+
+
+
+
+ {{ formatCapability(row.capability) }}
+
+ {{ row.adapter }}
+
+
+
+
调用
+
{{ row.call_count }}
+
+
+
成功率
+
+ {{ getSuccessRate(row.success_count, row.call_count) ?? '--' }}%
+
+
+
+
耗时
+
{{ formatLatency(row.avg_latency_ms) }}
+
+
+
成本
+
{{ formatCost(row.estimated_cost_usd) }}
+
+
+
+ 当前筛选条件下还没有 Provider 调用样本。
+
+
+
+
+
+
+
+
跨用户分布
+
按调用量排序,帮助快速识别主要使用者
+
+
+
+
+
+
{{ row.user_id }}
+
+ {{ row.story_count }} 个故事 · {{ row.job_count }} 个任务
+
+
+
+
{{ row.call_count }} 次
+
{{ formatCost(row.estimated_cost_usd) }}
+
+
+
+
+ 当前还没有跨用户样本。
+
+
+
+
+
+
+
最近失败原因
+
先看最常见原因,再决定是否调整路由或优先级
+
+
+
+ {{ reason.reason }}
+
+ {{ reason.count }} 次
+
+
+
+ 当前窗口内还没有失败调用。
+
+
+
+
+
+
+
+
@@ -275,7 +517,7 @@
diff --git a/backend/app/api/admin_providers.py b/backend/app/api/admin_providers.py
index 663fa45..1a1c7cb 100644
--- a/backend/app/api/admin_providers.py
+++ b/backend/app/api/admin_providers.py
@@ -1,4 +1,4 @@
-from fastapi import APIRouter, Depends, HTTPException
+from fastapi import APIRouter, Depends, HTTPException, Query
from pydantic import BaseModel, ConfigDict, Field
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
@@ -8,6 +8,7 @@ from app.db.admin_models import Provider
from app.db.database import get_db
from app.services.adapters.registry import AdapterRegistry
from app.services.cost_tracker import cost_tracker
+from app.services.generation_jobs import get_admin_provider_analytics
from app.services.provider_policy import DEFAULT_PROVIDERS, list_capability_policies
from app.services.secret_service import SecretService
@@ -56,6 +57,48 @@ class ProviderResponse(BaseModel):
model_config = ConfigDict(from_attributes=True)
+
+class ProviderAnalyticsBucket(BaseModel):
+ capability: str
+ adapter: str
+ call_count: int
+ success_count: int
+ failure_count: int
+ avg_latency_ms: float | None = None
+ estimated_cost_usd: float
+
+
+class ProviderAnalyticsUserBucket(BaseModel):
+ user_id: str
+ call_count: int
+ success_count: int
+ failure_count: int
+ job_count: int
+ story_count: int
+ estimated_cost_usd: float
+
+
+class ProviderAnalyticsFailureReason(BaseModel):
+ reason: str
+ count: int
+
+
+class ProviderAnalyticsResponse(BaseModel):
+ scope: str
+ window_days: int | None = None
+ capability: str | None = None
+ total_calls: int
+ successful_calls: int
+ failed_calls: int
+ avg_latency_ms: float | None = None
+ estimated_cost_usd: float
+ user_count: int
+ job_count: int
+ story_count: int
+ by_provider: list[ProviderAnalyticsBucket]
+ by_user: list[ProviderAnalyticsUserBucket]
+ failure_reasons: list[ProviderAnalyticsFailureReason]
+
@router.get("/providers/adapters")
async def list_available_adapters():
"""获取所有可用的适配器类型 (定义的类)。"""
@@ -74,6 +117,20 @@ async def list_provider_capabilities():
return list_capability_policies()
+@router.get("/providers/analytics", response_model=ProviderAnalyticsResponse)
+async def get_provider_analytics(
+ days: int | None = Query(default=None, ge=1, le=365),
+ capability: str | None = Query(default=None),
+ db: AsyncSession = Depends(get_db),
+):
+ """获取当前环境跨用户的 Provider 运营摘要。"""
+ return await get_admin_provider_analytics(
+ db,
+ days=days,
+ capability=capability,
+ )
+
+
@router.get("/providers", response_model=list[ProviderResponse])
async def list_providers(db: AsyncSession = Depends(get_db)):
result = await db.execute(select(Provider))
diff --git a/backend/app/services/generation_jobs.py b/backend/app/services/generation_jobs.py
index ace1975..2555492 100644
--- a/backend/app/services/generation_jobs.py
+++ b/backend/app/services/generation_jobs.py
@@ -606,23 +606,37 @@ def _aggregate_provider_events(
}
+def _event_matches_capability(
+ event: GenerationJobEvent,
+ capability: str | None = None,
+) -> bool:
+ event_capability = str((event.event_metadata or {}).get("capability") or "unknown")
+ return capability is None or event_capability == capability
+
+
def _provider_events_query(
*,
- user_id: str,
+ user_id: str | None = None,
story_id: int | None = None,
days: int | None = None,
):
query = (
- select(GenerationJobEvent)
+ select(
+ GenerationJobEvent,
+ GenerationJob.user_id,
+ GenerationJob.story_id,
+ )
.join(GenerationJob, GenerationJobEvent.job_id == GenerationJob.id)
.where(
- GenerationJob.user_id == user_id,
GenerationJobEvent.event_type.in_(
["provider_call_succeeded", "provider_call_failed"]
),
)
)
+ if user_id is not None:
+ query = query.where(GenerationJob.user_id == user_id)
+
if story_id is not None:
query = query.where(GenerationJob.story_id == story_id)
@@ -681,17 +695,12 @@ async def get_user_provider_analytics(
filtered_event_job_ids = {
event.job_id
for event in events
- if capability is None
- or str((event.event_metadata or {}).get("capability") or "unknown") == capability
+ if _event_matches_capability(event, capability)
}
filtered_story_ids = {
event.story_id
for event in events
- if event.story_id is not None
- and (
- capability is None
- or str((event.event_metadata or {}).get("capability") or "unknown") == capability
- )
+ if event.story_id is not None and _event_matches_capability(event, capability)
}
return {
@@ -703,6 +712,87 @@ async def get_user_provider_analytics(
}
+async def get_admin_provider_analytics(
+ db: AsyncSession,
+ *,
+ days: int | None = None,
+ capability: str | None = None,
+) -> dict[str, Any]:
+ """Aggregate provider telemetry across every user in the current environment."""
+
+ rows = (await db.execute(_provider_events_query(days=days))).all()
+ events = [event for event, _, _ in rows]
+ filtered_rows = [
+ (event, user_id, story_id)
+ for event, user_id, story_id in rows
+ if _event_matches_capability(event, capability)
+ ]
+
+ by_user: dict[str, dict[str, Any]] = {}
+ filtered_job_ids = {event.job_id for event, _, _ in filtered_rows}
+ filtered_story_ids = {
+ story_id for _, _, story_id in filtered_rows if story_id is not None
+ }
+ filtered_user_ids = {user_id for _, user_id, _ in filtered_rows}
+
+ for event, user_id, story_id in filtered_rows:
+ bucket = by_user.setdefault(
+ user_id,
+ {
+ "user_id": user_id,
+ "call_count": 0,
+ "success_count": 0,
+ "failure_count": 0,
+ "estimated_cost_usd": 0.0,
+ "job_ids": set(),
+ "story_ids": set(),
+ },
+ )
+ bucket["call_count"] += 1
+ bucket["job_ids"].add(event.job_id)
+ if story_id is not None:
+ bucket["story_ids"].add(story_id)
+
+ if event.event_type == "provider_call_succeeded":
+ bucket["success_count"] += 1
+ bucket["estimated_cost_usd"] += (
+ _as_float((event.event_metadata or {}).get("estimated_cost_usd")) or 0.0
+ )
+ else:
+ bucket["failure_count"] += 1
+
+ serialized_users = [
+ {
+ "user_id": user_id,
+ "call_count": bucket["call_count"],
+ "success_count": bucket["success_count"],
+ "failure_count": bucket["failure_count"],
+ "job_count": len(bucket["job_ids"]),
+ "story_count": len(bucket["story_ids"]),
+ "estimated_cost_usd": round(bucket["estimated_cost_usd"], 6),
+ }
+ for user_id, bucket in by_user.items()
+ ]
+ serialized_users.sort(
+ key=lambda item: (
+ -int(item["call_count"]),
+ -float(item["estimated_cost_usd"]),
+ str(item["user_id"]),
+ )
+ )
+
+ return {
+ "scope": "current_environment",
+ "window_days": days,
+ "capability": capability,
+ **_aggregate_provider_events(events, capability=capability),
+ "user_count": len(filtered_user_ids),
+ "job_count": len(filtered_job_ids),
+ "story_count": len(filtered_story_ids),
+ "by_user": serialized_users,
+ }
+
+
async def get_user_generation_ops_summary(
db: AsyncSession,
*,
diff --git a/backend/tests/test_admin_providers.py b/backend/tests/test_admin_providers.py
new file mode 100644
index 0000000..bcbb755
--- /dev/null
+++ b/backend/tests/test_admin_providers.py
@@ -0,0 +1,285 @@
+from datetime import datetime, timedelta, timezone
+
+from fastapi import FastAPI
+from httpx import ASGITransport, AsyncClient
+
+from app.api import admin_providers
+from app.core.admin_auth import admin_guard
+from app.db.database import get_db
+from app.db.models import Story, User
+from app.services.generation_jobs import create_generation_job, record_generation_event
+
+
+def _build_admin_test_app(db_session) -> FastAPI:
+ app = FastAPI()
+ app.include_router(admin_providers.router, prefix="/admin")
+
+ async def override_get_db():
+ yield db_session
+
+ async def override_admin_guard():
+ return True
+
+ app.dependency_overrides[get_db] = override_get_db
+ app.dependency_overrides[admin_guard] = override_admin_guard
+ return app
+
+
+async def _create_story(
+ db_session,
+ *,
+ user_id: str,
+ title: str,
+ mode: str = "generated",
+) -> Story:
+ story = Story(
+ user_id=user_id,
+ title=title,
+ story_text="测试内容",
+ cover_prompt="A gentle moonlit forest",
+ mode=mode,
+ generation_status="partial_ready",
+ text_status="ready",
+ image_status="not_requested",
+ audio_status="not_requested",
+ )
+ db_session.add(story)
+ await db_session.commit()
+ await db_session.refresh(story)
+ return story
+
+
+async def test_admin_provider_analytics_aggregate_across_users(db_session, test_user):
+ second_user = User(
+ id="github:67890",
+ name="Another User",
+ avatar_url="https://example.com/avatar-2.png",
+ provider="github",
+ )
+ db_session.add(second_user)
+ await db_session.commit()
+
+ first_story = await _create_story(db_session, user_id=test_user.id, title="第一位用户的故事")
+ second_story = await _create_story(
+ db_session,
+ user_id=second_user.id,
+ title="第二位用户的故事",
+ )
+
+ image_job = await create_generation_job(
+ db_session,
+ user_id=test_user.id,
+ output_mode="asset_retry",
+ input_type="image",
+ request_payload={"assets": ["image"]},
+ story_id=first_story.id,
+ )
+ await record_generation_event(
+ db_session,
+ job=image_job,
+ story_id=first_story.id,
+ event_type="provider_call_succeeded",
+ status="succeeded",
+ metadata={
+ "capability": "image",
+ "adapter": "demo",
+ "strategy": "priority",
+ "latency_ms": 42,
+ "estimated_cost_usd": 0.01,
+ },
+ )
+ await record_generation_event(
+ db_session,
+ job=image_job,
+ story_id=first_story.id,
+ event_type="provider_call_failed",
+ status="failed",
+ metadata={
+ "capability": "image",
+ "adapter": "cqtai",
+ "strategy": "priority",
+ "latency_ms": 120,
+ "error": "timeout",
+ },
+ )
+
+ audio_job = await create_generation_job(
+ db_session,
+ user_id=second_user.id,
+ output_mode="asset_retry",
+ input_type="audio",
+ request_payload={"assets": ["audio"]},
+ story_id=second_story.id,
+ )
+ await record_generation_event(
+ db_session,
+ job=audio_job,
+ story_id=second_story.id,
+ event_type="provider_call_succeeded",
+ status="succeeded",
+ metadata={
+ "capability": "tts",
+ "adapter": "edge_tts",
+ "strategy": "priority",
+ "latency_ms": 18,
+ "estimated_cost_usd": 0.003,
+ },
+ )
+
+ admin_app = _build_admin_test_app(db_session)
+ transport = ASGITransport(app=admin_app)
+
+ async with AsyncClient(transport=transport, base_url="http://test") as client:
+ response = await client.get("/admin/providers/analytics")
+
+ assert response.status_code == 200
+ data = response.json()
+ assert data["scope"] == "current_environment"
+ assert data["user_count"] == 2
+ assert data["job_count"] == 2
+ assert data["story_count"] == 2
+ assert data["total_calls"] == 3
+ assert data["successful_calls"] == 2
+ assert data["failed_calls"] == 1
+ assert data["avg_latency_ms"] == 60.0
+ assert data["estimated_cost_usd"] == 0.013
+ assert data["failure_reasons"] == [{"reason": "timeout", "count": 1}]
+ assert data["by_provider"] == [
+ {
+ "capability": "image",
+ "adapter": "cqtai",
+ "call_count": 1,
+ "success_count": 0,
+ "failure_count": 1,
+ "avg_latency_ms": 120.0,
+ "estimated_cost_usd": 0.0,
+ },
+ {
+ "capability": "image",
+ "adapter": "demo",
+ "call_count": 1,
+ "success_count": 1,
+ "failure_count": 0,
+ "avg_latency_ms": 42.0,
+ "estimated_cost_usd": 0.01,
+ },
+ {
+ "capability": "tts",
+ "adapter": "edge_tts",
+ "call_count": 1,
+ "success_count": 1,
+ "failure_count": 0,
+ "avg_latency_ms": 18.0,
+ "estimated_cost_usd": 0.003,
+ },
+ ]
+ assert data["by_user"] == [
+ {
+ "user_id": test_user.id,
+ "call_count": 2,
+ "success_count": 1,
+ "failure_count": 1,
+ "job_count": 1,
+ "story_count": 1,
+ "estimated_cost_usd": 0.01,
+ },
+ {
+ "user_id": second_user.id,
+ "call_count": 1,
+ "success_count": 1,
+ "failure_count": 0,
+ "job_count": 1,
+ "story_count": 1,
+ "estimated_cost_usd": 0.003,
+ },
+ ]
+
+
+async def test_admin_provider_analytics_support_days_and_capability_filters(
+ db_session,
+ test_user,
+):
+ second_user = User(
+ id="google:22222",
+ name="Filter User",
+ avatar_url="https://example.com/avatar-3.png",
+ provider="google",
+ )
+ db_session.add(second_user)
+ await db_session.commit()
+
+ first_story = await _create_story(db_session, user_id=test_user.id, title="旧事件故事")
+ second_story = await _create_story(db_session, user_id=second_user.id, title="最近事件故事")
+
+ image_job = await create_generation_job(
+ db_session,
+ user_id=test_user.id,
+ output_mode="asset_retry",
+ input_type="image",
+ request_payload={"assets": ["image"]},
+ story_id=first_story.id,
+ )
+ old_event = await record_generation_event(
+ db_session,
+ job=image_job,
+ story_id=first_story.id,
+ event_type="provider_call_failed",
+ status="failed",
+ metadata={
+ "capability": "image",
+ "adapter": "cqtai",
+ "strategy": "priority",
+ "latency_ms": 120,
+ "error": "timeout",
+ },
+ )
+ old_event.created_at = datetime.now(timezone.utc) - timedelta(days=10)
+ await db_session.commit()
+
+ audio_job = await create_generation_job(
+ db_session,
+ user_id=second_user.id,
+ output_mode="asset_retry",
+ input_type="audio",
+ request_payload={"assets": ["audio"]},
+ story_id=second_story.id,
+ )
+ await record_generation_event(
+ db_session,
+ job=audio_job,
+ story_id=second_story.id,
+ event_type="provider_call_succeeded",
+ status="succeeded",
+ metadata={
+ "capability": "tts",
+ "adapter": "edge_tts",
+ "strategy": "priority",
+ "latency_ms": 18,
+ "estimated_cost_usd": 0.003,
+ },
+ )
+
+ admin_app = _build_admin_test_app(db_session)
+ transport = ASGITransport(app=admin_app)
+
+ async with AsyncClient(transport=transport, base_url="http://test") as client:
+ response = await client.get("/admin/providers/analytics?days=7")
+ assert response.status_code == 200
+ data = response.json()
+ assert data["window_days"] == 7
+ assert data["total_calls"] == 1
+ assert data["user_count"] == 1
+ assert data["job_count"] == 1
+ assert data["story_count"] == 1
+ assert data["failure_reasons"] == []
+
+ response = await client.get("/admin/providers/analytics?capability=image")
+ assert response.status_code == 200
+ data = response.json()
+ assert data["capability"] == "image"
+ assert data["total_calls"] == 1
+ assert data["failed_calls"] == 1
+ assert data["user_count"] == 1
+ assert data["job_count"] == 1
+ assert data["story_count"] == 1
+ assert data["failure_reasons"] == [{"reason": "timeout", "count": 1}]
diff --git a/docs/planning/demo-checklist.md b/docs/planning/demo-checklist.md
index 1f6d1e1..f31a0f4 100644
--- a/docs/planning/demo-checklist.md
+++ b/docs/planning/demo-checklist.md
@@ -126,7 +126,7 @@ DreamWeaver 是面向 3-8 岁亲子场景的个性化 AI 绘本与陪伴式讲
### 2:20 - 3:00 取舍与下一步
-求职版优先稳定闭环和可解释性,不做支付、多租户和复杂监控。现在 job/event 已能查询 workflow、资产补全、provider 调用轨迹和聚合指标,用户端和管理端也能展示生成轨迹与跨故事 Provider 运营摘要;统一生成也已经迁移到后台 worker,下一步是补取消/重试队列。
+求职版优先稳定闭环和可解释性,不做支付、多租户和复杂监控。现在 job/event 已能查询 workflow、资产补全、provider 调用轨迹和聚合指标,统一生成已迁移到后台 worker,取消/重试队列也已打通;用户端可看跨故事运营摘要,管理端可看当前环境跨用户 Provider dashboard。下一步应补跨环境汇聚、断点续跑和更完整监控。
---
diff --git a/docs/planning/demo-package.md b/docs/planning/demo-package.md
index b2877d9..f4cd747 100644
--- a/docs/planning/demo-package.md
+++ b/docs/planning/demo-package.md
@@ -51,7 +51,7 @@ SMOKE_AUDIO=1 ./scripts/demo_smoke.sh
- **AI 不确定性处理**:主内容和资产拆开,图片/音频失败不阻塞阅读。
- **Provider 产品化**:用户看到稳定能力,系统内部用 Capability / Provider / Adapter / Routing Policy 管供应链。
- **可观测性**:generation job/event 让生成过程、失败恢复和 Provider 成本可解释。
-- **可继续生产化**:统一生成已经迁移到 worker,前端轮询和任务事件模型也已打通,下一步是补取消/重试队列和更完整监控。
+- **可继续生产化**:统一生成已迁移到 worker,前端轮询、任务事件模型、取消/重试队列和管理台当前环境 dashboard 也已打通,下一步是补跨环境汇聚、断点续跑和更完整监控。
---
@@ -63,4 +63,4 @@ SMOKE_AUDIO=1 ./scripts/demo_smoke.sh
| 图片生成失败 | 展示 `degraded_completed` 与资源重试 |
| Docker 冷启动慢 | 演示前先跑 smoke 并保持容器运行 |
| Provider 追问过深 | 回到 Capability / Provider / Adapter / Routing Policy 四层解释 |
-| 生产化追问 | 说明下一步是取消/重试队列、监控告警、密钥治理和 Provider analytics 扩展 |
+| 生产化追问 | 说明下一步是跨环境 Provider 汇聚、断点续跑、监控告警和密钥治理 |
diff --git a/docs/planning/interview-pitch.md b/docs/planning/interview-pitch.md
index 65d2c67..e5c8bbb 100644
--- a/docs/planning/interview-pitch.md
+++ b/docs/planning/interview-pitch.md
@@ -83,4 +83,4 @@ AI 生成产品最大的问题不是“能不能调模型”,而是结果不
### 这个项目下一步怎么上线?
-我已经把当前轻量 job/event 模型迁移到后台 worker,并打通了前端进度轮询;下一步会补取消/重试队列,再继续扩展跨时间窗口和跨用户维度的 provider 运营分析。生产上线前还需要补真实用户鉴权配置、密钥管理、监控告警和部署策略。
+我已经把当前轻量 job/event 模型迁移到后台 worker,并打通了前端进度轮询、取消/重试队列和管理台当前环境运营视图;下一步会补跨环境 Provider 汇聚、断点续跑和更完整监控。生产上线前还需要补真实用户鉴权配置、密钥管理和部署策略。
diff --git a/docs/planning/week-2-to-4-execution-backlog.md b/docs/planning/week-2-to-4-execution-backlog.md
index 34cc3e0..fe0f9f6 100644
--- a/docs/planning/week-2-to-4-execution-backlog.md
+++ b/docs/planning/week-2-to-4-execution-backlog.md
@@ -72,6 +72,7 @@ Week 2 已完成演示闭环、统一生成工作流、generation job/event、
| W4-09 | Workflow | 卡住任务自动收敛 | `GENERATION_JOB_STALE_MINUTES` + Celery beat stale job maintenance | P1 | Done |
| W4-10 | Workflow | 防止重复资产任务 | 运行中故事拒绝重复封面/音频/资产重试请求 | P1 | Done |
| W4-11 | Workflow | 生成任务取消与重新排队 | 取消已提交任务,失败/取消任务可重新排队 | P1 | Done |
+| W4-12 | Ops | 管理台当前环境跨用户 Provider dashboard | `GET /admin/providers/analytics` + admin console 运营摘要面板 | P1 | Done |
---
diff --git a/docs/planning/week-4-sprint-review.md b/docs/planning/week-4-sprint-review.md
index 0002e23..f15ec3a 100644
--- a/docs/planning/week-4-sprint-review.md
+++ b/docs/planning/week-4-sprint-review.md
@@ -40,6 +40,7 @@ DreamWeaver 已经具备求职演示所需的完整闭环:
- Provider failover 和聚合指标
- 跨故事 Provider analytics
- 任务运行概览、最近失败摘要与卡住任务收敛
+- 当前环境跨用户 Provider dashboard
- 前端生成轨迹和自动轮询形态
---
@@ -61,7 +62,7 @@ DreamWeaver 已经具备求职演示所需的完整闭环:
| Priority | Task | Why |
| --- | --- | --- |
-| P0 | 跨用户 / 跨环境 Provider dashboard | 当前已支持单用户摘要,后续要支持运营视角 |
+| P0 | 跨环境 Provider dashboard | 当前环境跨用户摘要已落地,后续需要多部署汇聚视角 |
| P1 | 监控告警与结构化 dashboard | 目前已有故事库级概览,后续要接入更完整观测体系 |
| P1 | 断点续跑与更细粒度任务控制 | 让取消、重试和 worker 恢复更稳 |
| P2 | 更细粒度叙事风格与音色策略 | 扩展体验,但不影响当前求职版主线 |
diff --git a/docs/product/unified-generation-workflow-prd.md b/docs/product/unified-generation-workflow-prd.md
index c754728..8c98940 100644
--- a/docs/product/unified-generation-workflow-prd.md
+++ b/docs/product/unified-generation-workflow-prd.md
@@ -77,16 +77,20 @@ DreamWeaver 当前同时支持普通故事生成、完整故事生成和绘本
- `POST /api/generations/jobs/{job_id}/cancel`
- `POST /api/generations/jobs/{job_id}/retry`
- 创建弹窗与生成轨迹都可触发取消或重新排队
+- 管理台已补当前环境的跨用户 Provider dashboard:
+ - `GET /admin/providers/analytics`
+ - 支持 `days` / `capability` 筛选
+ - 可查看跨用户调用量、成功率、平均耗时、预估成本、失败原因和 Top 用户分布
### Remaining Production Work
- 普通故事、完整生成、绘本生成已有统一外部入口,内部 workflow 仍可继续减少兼容层分支
- 统一资产重试入口已覆盖普通故事封面、绘本缺失插图和故事音频,后续可继续扩展更细的资产级审计
-- 断点续跑、跨用户/跨环境 Provider 分析,以及更细粒度的任务控制策略仍属于后续生产化增强
+- 断点续跑、跨环境 Provider 汇聚视图,以及更细粒度的任务控制策略仍属于后续生产化增强
### What This Means
-这份 PRD 仍然保留目标态设计,但主干能力已经可在当前代码中演示。当前最适合的继续方式,是在已落地的 worker 化与任务控制基础上,把当前首版运营摘要扩展为可筛选、可对比的分析视角,并逐步补断点续跑和更完整监控,而不是继续扩大功能范围。
+这份 PRD 仍然保留目标态设计,但主干能力已经可在当前代码中演示。当前最适合的继续方式,是在已落地的 worker 化、任务控制和当前环境运营 dashboard 基础上,继续补断点续跑、跨环境汇聚和更完整监控,而不是继续扩大功能范围。
---
@@ -97,7 +101,7 @@ DreamWeaver 当前同时支持普通故事生成、完整故事生成和绘本
DreamWeaver 当前存在以下工作流层面问题:
1. **生成入口已建立,内部路径正在收束**
- 当前前端已切到 `/api/generations`,旧的 `/api/stories/generate`、`/api/stories/generate/full`、`/api/storybook/generate` 仍作为兼容入口保留。service 内部已抽取上下文准备、主记录保存、封面补全、绘本插图补全和音频补全 helper,并用 `AssetCompletionResult` 表达资产补全结果。generation job/event 已落库并可查询,Provider 调用轨迹、单故事聚合指标和跨故事运营摘要也已进入用户端与管理端展示;统一生成请求现在已经交给后台 worker 执行。下一步重点是把取消/重试队列也接到这套事件模型上。
+ 当前前端已切到 `/api/generations`,旧的 `/api/stories/generate`、`/api/stories/generate/full`、`/api/storybook/generate` 仍作为兼容入口保留。service 内部已抽取上下文准备、主记录保存、封面补全、绘本插图补全和音频补全 helper,并用 `AssetCompletionResult` 表达资产补全结果。generation job/event 已落库并可查询,Provider 调用轨迹、单故事聚合指标、跨故事运营摘要和管理台跨用户 dashboard 都已进入前端展示;统一生成请求现在已经交给后台 worker 执行。下一步重点应转向断点续跑和跨环境汇聚,而不是再补一套新的入口。
2. **保存与资产补全过程正在统一**
文本故事和绘本已拥有更清晰的主记录保存 helper;普通故事封面、绘本缺失插图、故事音频生成/缓存已共用各自的 asset completion helper。服务层已经能表达资产任务结果,并会把统一入口、资产重试、绘本逐页插图和音频生成的关键节点写入 job event。
diff --git a/docs/technical/architecture.md b/docs/technical/architecture.md
index f030498..0ef96cb 100644
--- a/docs/technical/architecture.md
+++ b/docs/technical/architecture.md
@@ -100,7 +100,7 @@ flowchart LR
当前仍是求职版 MVP,不引入复杂工作流引擎。下一步生产化优先级:
-1. 补齐生成任务取消与重新排队能力,减少误触和重复消耗。
-2. 基于现有 job 查询和前端轮询继续扩展真实异步进度与任务控制。
-3. 扩展 Provider analytics 的时间窗口、失败原因和跨用户维度。
-4. 继续补充部署、监控告警和密钥治理策略。
+1. 补跨环境 Provider 汇聚视图,避免每个部署环境各自成孤岛。
+2. 基于现有 job 查询和前端轮询继续扩展断点续跑与更细粒度任务控制。
+3. 在当前环境 dashboard 基础上继续扩展失败原因、监控告警和结构化观测能力。
+4. 继续补充部署与密钥治理策略。
diff --git a/docs/technical/generation-job-state.md b/docs/technical/generation-job-state.md
index f2a7130..13c2689 100644
--- a/docs/technical/generation-job-state.md
+++ b/docs/technical/generation-job-state.md
@@ -61,7 +61,7 @@ job 响应会返回 `progress_percent`、`progress_label` 和 `is_terminal`,
当前已有两层记录,未来可以继续扩展字段和事件颗粒度:
- 继续复用现有 job 查询和前端轮询进度条,为取消请求、重新排队和长任务通知提供统一入口。
-- 将当前跨故事 provider 指标扩展为跨用户、跨环境和更细颗粒度的失败原因维度分析。
+- 当前环境的跨用户 provider dashboard 已在 admin 端落地,下一步应补跨环境汇聚和更细颗粒度的失败原因维度分析。
## 面试表达