"""Deterministic evaluation helpers for generated child-facing content.""" from dataclasses import dataclass, field from enum import StrEnum from typing import Any from app.services.adapters.storybook.primary import Storybook from app.services.adapters.text.models import StoryOutput from app.services.harness.quality_gates import ( QualityGateError, validate_story_output, validate_storybook_output, ) class EvaluationDimension(StrEnum): """Stable dimensions used by harness evaluations.""" STRUCTURE = "structure" SAFETY = "safety" AGE_FIT = "age_fit" EDUCATIONAL_VALUE = "educational_value" READABILITY = "readability" @dataclass(frozen=True) class EvaluationScore: """One scored evaluation dimension.""" dimension: EvaluationDimension score: float reason: str def to_metadata(self) -> dict[str, Any]: """Return a JSON-safe metadata payload.""" return { "dimension": self.dimension.value, "score": self.score, "reason": self.reason, } @dataclass(frozen=True) class EvaluationResult: """Deterministic evaluation result for one generated artifact.""" overall_score: float passed: bool blocking: bool scores: tuple[EvaluationScore, ...] gate_error: QualityGateError | None = None warnings: tuple[str, ...] = field(default_factory=tuple) def to_metadata(self) -> dict[str, Any]: """Return a JSON-safe metadata payload.""" metadata: dict[str, Any] = { "overall_score": self.overall_score, "passed": self.passed, "blocking": self.blocking, "scores": [score.to_metadata() for score in self.scores], "warnings": list(self.warnings), } if self.gate_error is not None: metadata["quality_gate"] = self.gate_error.to_metadata() return metadata def _clamp_score(value: float) -> float: return max(0.0, min(1.0, round(value, 2))) def _story_text_readability_score(story_text: str) -> float: """Score text length with a conservative 3-8 age readability heuristic.""" normalized_length = len(story_text.strip()) if normalized_length < 30: return 0.45 if normalized_length > 2500: return 0.72 if normalized_length > 1800: return 0.84 return 0.96 def _educational_value_score(story_text: str, education_theme: str | None) -> float: if not education_theme: return 0.82 return 0.96 if education_theme.strip() in story_text else 0.88 def _storybook_readability_score(page_texts: list[str]) -> float: if not page_texts: return 0.0 page_lengths = [len(text.strip()) for text in page_texts] if any(length < 8 for length in page_lengths): return 0.62 if any(length > 320 for length in page_lengths): return 0.78 if any(length > 220 for length in page_lengths): return 0.88 return 0.96 def _storybook_educational_value_score( page_texts: list[str], education_theme: str | None, ) -> float: if not education_theme: return 0.82 combined_text = " ".join(page_texts) return 0.96 if education_theme.strip() in combined_text else 0.88 def evaluate_story_output( output: StoryOutput, *, education_theme: str | None = None, minimum_score: float = 0.7, ) -> EvaluationResult: """Evaluate a generated text story before persistence.""" try: validate_story_output(output) except QualityGateError as exc: scores = ( EvaluationScore( dimension=EvaluationDimension.STRUCTURE, score=0.0, reason="故事结构未通过质量门。", ), EvaluationScore( dimension=EvaluationDimension.SAFETY, score=0.0, reason="内容未通过儿童安全或结构完整性检查。", ), ) return EvaluationResult( overall_score=0.0, passed=False, blocking=True, scores=scores, gate_error=exc, ) readability_score = _story_text_readability_score(output.story_text) educational_score = _educational_value_score(output.story_text, education_theme) warnings: list[str] = [] if readability_score < 0.8: warnings.append("故事正文长度可能不适合 3-8 岁儿童的完整阅读体验。") scores = ( EvaluationScore( dimension=EvaluationDimension.STRUCTURE, score=1.0, reason="标题、正文和封面提示词完整。", ), EvaluationScore( dimension=EvaluationDimension.SAFETY, score=1.0, reason="未命中确定性儿童安全风险词。", ), EvaluationScore( dimension=EvaluationDimension.AGE_FIT, score=readability_score, reason="根据正文长度估算低龄儿童阅读适配度。", ), EvaluationScore( dimension=EvaluationDimension.EDUCATIONAL_VALUE, score=educational_score, reason="根据教育主题是否清晰融入正文估算。", ), EvaluationScore( dimension=EvaluationDimension.READABILITY, score=readability_score, reason="根据正文长度估算朗读和亲子共读流畅度。", ), ) overall_score = _clamp_score(sum(score.score for score in scores) / len(scores)) return EvaluationResult( overall_score=overall_score, passed=overall_score >= minimum_score, blocking=overall_score < minimum_score, scores=scores, warnings=tuple(warnings), ) def evaluate_storybook_output( output: Storybook, *, education_theme: str | None = None, minimum_score: float = 0.7, ) -> EvaluationResult: """Evaluate generated storybook structure before persistence.""" try: validate_storybook_output(output) except QualityGateError as exc: scores = ( EvaluationScore( dimension=EvaluationDimension.STRUCTURE, score=0.0, reason="绘本结构未通过质量门。", ), EvaluationScore( dimension=EvaluationDimension.SAFETY, score=0.0, reason="绘本内容未通过儿童安全或结构完整性检查。", ), ) return EvaluationResult( overall_score=0.0, passed=False, blocking=True, scores=scores, gate_error=exc, ) page_texts = [page.text for page in output.pages] readability_score = _storybook_readability_score(page_texts) educational_score = _storybook_educational_value_score(page_texts, education_theme) warnings: list[str] = [] if readability_score < 0.8: warnings.append("绘本分页正文长度可能不适合 3-8 岁儿童的翻页阅读体验。") scores = ( EvaluationScore( dimension=EvaluationDimension.STRUCTURE, score=1.0, reason="绘本标题、分页和页码结构完整。", ), EvaluationScore( dimension=EvaluationDimension.SAFETY, score=1.0, reason="未命中确定性儿童安全风险词。", ), EvaluationScore( dimension=EvaluationDimension.AGE_FIT, score=readability_score, reason="根据每页正文长度估算低龄儿童翻页阅读适配度。", ), EvaluationScore( dimension=EvaluationDimension.EDUCATIONAL_VALUE, score=educational_score, reason="根据教育主题是否清晰融入分页正文估算。", ), EvaluationScore( dimension=EvaluationDimension.READABILITY, score=readability_score, reason="根据分页正文长度估算亲子共读流畅度。", ), ) overall_score = _clamp_score(sum(score.score for score in scores) / len(scores)) return EvaluationResult( overall_score=overall_score, passed=overall_score >= minimum_score, blocking=overall_score < minimum_score, scores=scores, warnings=tuple(warnings), )