feat: add generation trace and partial-ready workflow status
This commit is contained in:
@@ -10,6 +10,7 @@ from app.core.logging import get_logger
|
||||
from app.services.adapters import AdapterConfig, AdapterRegistry
|
||||
from app.services.adapters.text.models import StoryOutput
|
||||
from app.services.cost_tracker import cost_tracker
|
||||
from app.services.generation_jobs import record_generation_event
|
||||
from app.services.provider_cache import get_providers
|
||||
from app.services.provider_metrics import health_checker, metrics_collector
|
||||
from app.services.provider_policy import (
|
||||
@@ -22,6 +23,7 @@ from app.services.provider_policy import (
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from app.db.admin_models import Provider
|
||||
from app.db.models import GenerationJob
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
@@ -36,6 +38,58 @@ _round_robin_counters: dict[ProviderType, int] = {
|
||||
_latency_cache: dict[str, float] = {}
|
||||
|
||||
|
||||
def _safe_estimated_cost(adapter) -> float:
|
||||
"""Return an adapter cost value that is safe to serialize in job events."""
|
||||
|
||||
try:
|
||||
return float(adapter.estimated_cost)
|
||||
except Exception:
|
||||
return 0.0
|
||||
|
||||
|
||||
async def _record_provider_event_if_present(
|
||||
db: AsyncSession | None,
|
||||
*,
|
||||
job: "GenerationJob | None",
|
||||
event_type: str,
|
||||
status: str,
|
||||
provider_type: ProviderType,
|
||||
adapter_name: str,
|
||||
strategy: RoutingStrategy,
|
||||
provider_id: str | None = None,
|
||||
story_id: int | None = None,
|
||||
latency_ms: int | None = None,
|
||||
estimated_cost: float | None = None,
|
||||
error: str | None = None,
|
||||
) -> None:
|
||||
"""Append provider call telemetry to the active generation job."""
|
||||
|
||||
if db is None or job is None:
|
||||
return
|
||||
|
||||
await record_generation_event(
|
||||
db,
|
||||
job=job,
|
||||
story_id=story_id,
|
||||
event_type=event_type,
|
||||
status=status,
|
||||
message=(
|
||||
f"{provider_type} provider {adapter_name} {status}."
|
||||
if error is None
|
||||
else f"{provider_type} provider {adapter_name} failed."
|
||||
),
|
||||
metadata={
|
||||
"capability": provider_type,
|
||||
"adapter": adapter_name,
|
||||
"provider_id": provider_id,
|
||||
"strategy": strategy.value,
|
||||
"latency_ms": latency_ms,
|
||||
"estimated_cost_usd": estimated_cost,
|
||||
"error": error,
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
def _get_api_key(config_ref: str | None, adapter_name: str) -> str:
|
||||
"""根据 config_ref 或适配器名称获取 API Key。"""
|
||||
# 优先使用 config_ref
|
||||
@@ -228,6 +282,8 @@ async def _route_with_failover(
|
||||
strategy: RoutingStrategy = RoutingStrategy.PRIORITY,
|
||||
db: AsyncSession | None = None,
|
||||
user_id: str | None = None,
|
||||
generation_job: "GenerationJob | None" = None,
|
||||
story_id: int | None = None,
|
||||
**kwargs,
|
||||
) -> T:
|
||||
"""通用 provider failover 路由。
|
||||
@@ -237,6 +293,8 @@ async def _route_with_failover(
|
||||
strategy: 路由策略
|
||||
db: 数据库会话(可选,用于指标收集和熔断检查)
|
||||
user_id: 用户 ID(可选,用于成本追踪和预算检查)
|
||||
generation_job: 生成任务(可选,用于记录 provider 调用轨迹)
|
||||
story_id: 故事 ID(可选,用于关联 provider 事件)
|
||||
**kwargs: 传递给适配器的参数
|
||||
"""
|
||||
providers = await _get_providers_with_config(provider_type)
|
||||
@@ -274,7 +332,9 @@ async def _route_with_failover(
|
||||
errors.append(f"{name}: 适配器未注册")
|
||||
continue
|
||||
|
||||
provider_id = db_provider.id if db_provider else None
|
||||
provider_id = str(db_provider.id) if db_provider else None
|
||||
estimated_cost: float | None = None
|
||||
start_time: float | None = None
|
||||
|
||||
try:
|
||||
logger.debug(
|
||||
@@ -285,6 +345,20 @@ async def _route_with_failover(
|
||||
)
|
||||
|
||||
adapter = adapter_class(config)
|
||||
estimated_cost = _safe_estimated_cost(adapter)
|
||||
|
||||
await _record_provider_event_if_present(
|
||||
db,
|
||||
job=generation_job,
|
||||
story_id=story_id,
|
||||
event_type="provider_call_started",
|
||||
status="running",
|
||||
provider_type=provider_type,
|
||||
adapter_name=name,
|
||||
provider_id=provider_id,
|
||||
strategy=strategy,
|
||||
estimated_cost=estimated_cost,
|
||||
)
|
||||
|
||||
# 执行并计时
|
||||
start_time = time.time()
|
||||
@@ -301,7 +375,7 @@ async def _route_with_failover(
|
||||
provider_id=provider_id,
|
||||
success=True,
|
||||
latency_ms=latency_ms,
|
||||
cost_usd=adapter.estimated_cost,
|
||||
cost_usd=estimated_cost,
|
||||
)
|
||||
await health_checker.record_call_result(db, provider_id, success=True)
|
||||
|
||||
@@ -312,10 +386,24 @@ async def _route_with_failover(
|
||||
user_id=user_id,
|
||||
provider_name=name,
|
||||
capability=provider_type,
|
||||
estimated_cost=adapter.estimated_cost,
|
||||
estimated_cost=estimated_cost,
|
||||
provider_id=provider_id,
|
||||
)
|
||||
|
||||
await _record_provider_event_if_present(
|
||||
db,
|
||||
job=generation_job,
|
||||
story_id=story_id,
|
||||
event_type="provider_call_succeeded",
|
||||
status="succeeded",
|
||||
provider_type=provider_type,
|
||||
adapter_name=name,
|
||||
provider_id=provider_id,
|
||||
strategy=strategy,
|
||||
latency_ms=latency_ms,
|
||||
estimated_cost=estimated_cost,
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"provider_success",
|
||||
provider_type=provider_type,
|
||||
@@ -326,6 +414,11 @@ async def _route_with_failover(
|
||||
|
||||
except Exception as exc:
|
||||
error_msg = str(exc)
|
||||
latency_ms = (
|
||||
int((time.time() - start_time) * 1000)
|
||||
if start_time is not None
|
||||
else None
|
||||
)
|
||||
logger.warning(
|
||||
"provider_failed",
|
||||
provider_type=provider_type,
|
||||
@@ -346,6 +439,21 @@ async def _route_with_failover(
|
||||
db, provider_id, success=False, error=error_msg
|
||||
)
|
||||
|
||||
await _record_provider_event_if_present(
|
||||
db,
|
||||
job=generation_job,
|
||||
story_id=story_id,
|
||||
event_type="provider_call_failed",
|
||||
status="failed",
|
||||
provider_type=provider_type,
|
||||
adapter_name=name,
|
||||
provider_id=provider_id,
|
||||
strategy=strategy,
|
||||
latency_ms=latency_ms,
|
||||
estimated_cost=estimated_cost,
|
||||
error=error_msg,
|
||||
)
|
||||
|
||||
raise ValueError(f"No {provider_type} provider succeeded. Errors: {' | '.join(errors)}")
|
||||
|
||||
|
||||
@@ -356,12 +464,16 @@ async def generate_story_content(
|
||||
memory_context: str | None = None,
|
||||
strategy: RoutingStrategy = RoutingStrategy.PRIORITY,
|
||||
db: AsyncSession | None = None,
|
||||
user_id: str | None = None,
|
||||
generation_job: "GenerationJob | None" = None,
|
||||
) -> StoryOutput:
|
||||
"""生成或润色故事,支持 failover。"""
|
||||
return await _route_with_failover(
|
||||
"text",
|
||||
strategy=strategy,
|
||||
db=db,
|
||||
user_id=user_id,
|
||||
generation_job=generation_job,
|
||||
input_type=input_type,
|
||||
data=data,
|
||||
education_theme=education_theme,
|
||||
@@ -373,19 +485,42 @@ async def generate_image(
|
||||
prompt: str,
|
||||
strategy: RoutingStrategy = RoutingStrategy.PRIORITY,
|
||||
db: AsyncSession | None = None,
|
||||
user_id: str | None = None,
|
||||
generation_job: "GenerationJob | None" = None,
|
||||
story_id: int | None = None,
|
||||
**kwargs,
|
||||
) -> str:
|
||||
"""生成图片,返回 URL,支持 failover。"""
|
||||
return await _route_with_failover("image", strategy=strategy, db=db, prompt=prompt, **kwargs)
|
||||
return await _route_with_failover(
|
||||
"image",
|
||||
strategy=strategy,
|
||||
db=db,
|
||||
user_id=user_id,
|
||||
generation_job=generation_job,
|
||||
story_id=story_id,
|
||||
prompt=prompt,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
|
||||
async def text_to_speech(
|
||||
text: str,
|
||||
strategy: RoutingStrategy = RoutingStrategy.PRIORITY,
|
||||
db: AsyncSession | None = None,
|
||||
user_id: str | None = None,
|
||||
generation_job: "GenerationJob | None" = None,
|
||||
story_id: int | None = None,
|
||||
) -> bytes:
|
||||
"""文本转语音,返回 MP3 bytes,支持 failover。"""
|
||||
return await _route_with_failover("tts", strategy=strategy, db=db, text=text)
|
||||
return await _route_with_failover(
|
||||
"tts",
|
||||
strategy=strategy,
|
||||
db=db,
|
||||
user_id=user_id,
|
||||
generation_job=generation_job,
|
||||
story_id=story_id,
|
||||
text=text,
|
||||
)
|
||||
|
||||
|
||||
async def generate_storybook(
|
||||
@@ -395,6 +530,8 @@ async def generate_storybook(
|
||||
memory_context: str | None = None,
|
||||
strategy: RoutingStrategy = RoutingStrategy.PRIORITY,
|
||||
db: AsyncSession | None = None,
|
||||
user_id: str | None = None,
|
||||
generation_job: "GenerationJob | None" = None,
|
||||
):
|
||||
"""生成分页故事书,支持 failover。"""
|
||||
from app.services.adapters.storybook.primary import Storybook
|
||||
@@ -403,6 +540,8 @@ async def generate_storybook(
|
||||
"storybook",
|
||||
strategy=strategy,
|
||||
db=db,
|
||||
user_id=user_id,
|
||||
generation_job=generation_job,
|
||||
keywords=keywords,
|
||||
page_count=page_count,
|
||||
education_theme=education_theme,
|
||||
|
||||
Reference in New Issue
Block a user