Initial commit: clean project structure

- Backend: FastAPI + SQLAlchemy + Celery (Python 3.11+) - Frontend: Vue 3 + TypeScript + Pinia + Tailwind - Admin Frontend: separate Vue 3 app for management - Docker Compose: 9 services orchestration - Specs: design prototypes, memory system PRD, product roadmap Cleanup performed: - Removed temporary debug scripts from backend root - Removed deprecated admin_app.py (embedded UI) - Removed duplicate docs from admin-frontend - Updated .gitignore for Vite cache and egg-info
2026-01-20 18:20:03 +08:00
commit e9d7f8832a
241 changed files with 33070 additions and 0 deletions
--- a/backend/app/services/adapters/tts/init.py
+++ b/backend/app/services/adapters/tts/init.py
@@ -0,0 +1,5 @@
+"""TTS 语音合成适配器。"""
+
+from app.services.adapters.tts import edge_tts as _tts_edge_tts_adapter  # noqa: F401
+from app.services.adapters.tts import elevenlabs as _tts_elevenlabs_adapter  # noqa: F401
+from app.services.adapters.tts import minimax as _tts_minimax_adapter  # noqa: F401
--- a/backend/app/services/adapters/tts/edge_tts.py
+++ b/backend/app/services/adapters/tts/edge_tts.py
@@ -0,0 +1,66 @@
+"""EdgeTTS 免费语音生成适配器。"""
+
+import time
+
+import edge_tts
+
+from app.core.logging import get_logger
+from app.services.adapters.base import BaseAdapter
+from app.services.adapters.registry import AdapterRegistry
+
+logger = get_logger(__name__)
+
+# 默认中文女声 (晓晓)
+DEFAULT_VOICE = "zh-CN-XiaoxiaoNeural"
+
+
+@AdapterRegistry.register("tts", "edge_tts")
+class EdgeTTSAdapter(BaseAdapter[bytes]):
+    """EdgeTTS 语音生成适配器 (Free)。
+
+    不需要 API Key。
+    """
+
+    adapter_type = "tts"
+    adapter_name = "edge_tts"
+
+    async def execute(self, text: str, **kwargs) -> bytes:
+        """生成语音。"""
+        # 支持动态指定音色
+        voice = kwargs.get("voice") or self.config.model or DEFAULT_VOICE
+
+        start_time = time.time()
+        logger.info("edge_tts_generate_start", text_length=len(text), voice=voice)
+
+        # EdgeTTS 只能输出到文件，我们需要用临时文件周转一下
+        # 或者直接 capture stream (communicate) 但 edge-tts 库主要面向文件
+
+        # 优化: 使用 communicate 直接获取 bytes，无需磁盘IO
+        communicate = edge_tts.Communicate(text, voice)
+
+        audio_data = b""
+        async for chunk in communicate.stream():
+            if chunk["type"] == "audio":
+                audio_data += chunk["data"]
+
+        elapsed = time.time() - start_time
+        logger.info(
+            "edge_tts_generate_success",
+            elapsed_seconds=round(elapsed, 2),
+            audio_size_bytes=len(audio_data),
+        )
+
+        return audio_data
+
+    async def health_check(self) -> bool:
+        """检查 EdgeTTS 是否可用 (网络连通性)。"""
+        try:
+            # 简单生成一个词
+            await self.execute("Hi")
+            return True
+        except Exception:
+            return False
+
+    @property
+    def estimated_cost(self) -> float:
+        return 0.0  # Free!
--- a/backend/app/services/adapters/tts/elevenlabs.py
+++ b/backend/app/services/adapters/tts/elevenlabs.py
@@ -0,0 +1,104 @@
+"""ElevenLabs TTS 语音合成适配器。"""
+
+import time
+
+import httpx
+from tenacity import (
+    retry,
+    retry_if_exception_type,
+    stop_after_attempt,
+    wait_exponential,
+)
+
+from app.core.logging import get_logger
+from app.services.adapters.base import AdapterConfig, BaseAdapter
+from app.services.adapters.registry import AdapterRegistry
+
+logger = get_logger(__name__)
+
+ELEVENLABS_API_BASE = "https://api.elevenlabs.io/v1"
+DEFAULT_VOICE_ID = "21m00Tcm4TlvDq8ikWAM"  # Rachel
+
+
+@AdapterRegistry.register("tts", "elevenlabs")
+class ElevenLabsTtsAdapter(BaseAdapter[bytes]):
+    """ElevenLabs TTS 语音合成适配器，返回 MP3 bytes。"""
+
+    adapter_type = "tts"
+    adapter_name = "elevenlabs"
+
+    def __init__(self, config: AdapterConfig):
+        super().__init__(config)
+        self.api_base = config.api_base or ELEVENLABS_API_BASE
+
+    async def execute(self, text: str, **kwargs) -> bytes:
+        """将文本转换为语音 MP3 bytes。"""
+        start_time = time.time()
+        logger.info("elevenlabs_tts_start", text_length=len(text))
+
+        voice_id = kwargs.get("voice_id") or DEFAULT_VOICE_ID
+        model_id = kwargs.get("model") or self.config.model or "eleven_multilingual_v2"
+        stability = kwargs.get("stability", 0.5)
+        similarity_boost = kwargs.get("similarity_boost", 0.75)
+
+        url = f"{self.api_base}/text-to-speech/{voice_id}"
+
+        payload = {
+            "text": text,
+            "model_id": model_id,
+            "voice_settings": {
+                "stability": stability,
+                "similarity_boost": similarity_boost,
+            },
+        }
+
+        audio_bytes = await self._call_api(url, payload)
+
+        elapsed = time.time() - start_time
+        logger.info(
+            "elevenlabs_tts_success",
+            elapsed_seconds=round(elapsed, 2),
+            audio_size_bytes=len(audio_bytes),
+        )
+
+        return audio_bytes
+
+    async def health_check(self) -> bool:
+        """检查 ElevenLabs API 是否可用。"""
+        try:
+            async with httpx.AsyncClient(timeout=10) as client:
+                response = await client.get(
+                    f"{self.api_base}/voices",
+                    headers={"xi-api-key": self.config.api_key},
+                )
+                return response.status_code == 200
+        except Exception:
+            return False
+
+    @property
+    def estimated_cost(self) -> float:
+        """预估每千字符成本 (USD)。"""
+        return 0.03
+
+    @retry(
+        stop=stop_after_attempt(3),
+        wait=wait_exponential(multiplier=1, min=1, max=10),
+        retry=retry_if_exception_type((httpx.HTTPError, httpx.TimeoutException)),
+        reraise=True,
+    )
+    async def _call_api(self, url: str, payload: dict) -> bytes:
+        """调用 ElevenLabs API，带重试机制。"""
+        timeout = self.config.timeout_ms / 1000
+
+        async with httpx.AsyncClient(timeout=timeout) as client:
+            response = await client.post(
+                url,
+                json=payload,
+                headers={
+                    "xi-api-key": self.config.api_key,
+                    "Content-Type": "application/json",
+                    "Accept": "audio/mpeg",
+                },
+            )
+            response.raise_for_status()
+            return response.content
--- a/backend/app/services/adapters/tts/minimax.py
+++ b/backend/app/services/adapters/tts/minimax.py
@@ -0,0 +1,149 @@
+"""MiniMax 语音生成适配器 (T2A V2)。"""
+
+import time
+
+import httpx
+from tenacity import (
+    retry,
+    retry_if_exception_type,
+    stop_after_attempt,
+    wait_exponential,
+)
+
+from app.core.config import settings
+from app.core.logging import get_logger
+from app.services.adapters.base import AdapterConfig, BaseAdapter
+from app.services.adapters.registry import AdapterRegistry
+
+logger = get_logger(__name__)
+
+# MiniMax API 配置
+DEFAULT_API_URL = "https://api.minimaxi.com/v1/t2a_v2"
+DEFAULT_MODEL = "speech-2.6-turbo"
+
+@AdapterRegistry.register("tts", "minimax")
+class MiniMaxTTSAdapter(BaseAdapter[bytes]):
+    """MiniMax 语音生成适配器。
+
+    需要配置:
+    - api_key: MiniMax API Key
+    - minimax_group_id: 可选 (取决于使用的模型/账户类型)
+    """
+
+    adapter_type = "tts"
+    adapter_name = "minimax"
+
+    def __init__(self, config: AdapterConfig):
+        super().__init__(config)
+        self.api_url = DEFAULT_API_URL
+
+    async def execute(
+        self,
+        text: str,
+        voice_id: str | None = None,
+        model: str | None = None,
+        speed: float | None = None,
+        vol: float | None = None,
+        pitch: int | None = None,
+        emotion: str | None = None,
+        **kwargs,
+    ) -> bytes:
+        """生成语音。"""
+        # 1. 优先使用传入参数
+        # 2. 其次使用 Adapter 配置里的 default
+        # 3. 最后使用系统默认值
+        model = model or self.config.model or DEFAULT_MODEL
+
+        cfg = self.config.extra_config or {}
+
+        voice_id = voice_id or cfg.get("voice_id") or "male-qn-qingse"
+        speed = speed if speed is not None else (cfg.get("speed") or 1.0)
+        vol = vol if vol is not None else (cfg.get("vol") or 1.0)
+        pitch = pitch if pitch is not None else (cfg.get("pitch") or 0)
+        emotion = emotion or cfg.get("emotion")
+        group_id = kwargs.get("group_id") or settings.minimax_group_id
+
+        url = self.api_url
+        if group_id:
+            url = f"{self.api_url}?GroupId={group_id}"
+
+        payload = {
+            "model": model,
+            "text": text,
+            "stream": False,
+            "voice_setting": {
+                "voice_id": voice_id,
+                "speed": speed,
+                "vol": vol,
+                "pitch": pitch,
+            },
+            "audio_setting": {
+                "sample_rate": 32000,
+                "bitrate": 128000,
+                "format": "mp3",
+                "channel": 1
+            }
+        }
+
+        if emotion:
+             payload["voice_setting"]["emotion"] = emotion
+
+        start_time = time.time()
+        logger.info("minimax_generate_start", text_length=len(text), model=model)
+
+        result = await self._call_api(url, payload)
+
+        # 错误处理
+        if result.get("base_resp", {}).get("status_code") != 0:
+            error_msg = result.get("base_resp", {}).get("status_msg", "未知错误")
+            raise ValueError(f"MiniMax API 错误: {error_msg}")
+
+        # Hex 解码 (关键逻辑，从 primary.py 迁移)
+        hex_audio = result.get("data", {}).get("audio")
+        if not hex_audio:
+            raise ValueError("API 响应中未找到音频数据 (data.audio)")
+
+        try:
+            audio_bytes = bytes.fromhex(hex_audio)
+        except ValueError:
+             raise ValueError("MiniMax 返回的音频数据不是有效的 Hex 字符串")
+
+        elapsed = time.time() - start_time
+        logger.info(
+            "minimax_generate_success",
+            elapsed_seconds=round(elapsed, 2),
+            audio_size_bytes=len(audio_bytes),
+        )
+
+        return audio_bytes
+
+    async def health_check(self) -> bool:
+        """检查 Minimax API 是否可用。"""
+        try:
+            # 尝试生成极短文本
+            await self.execute("Hi")
+            return True
+        except Exception:
+            return False
+
+    @retry(
+        stop=stop_after_attempt(3),
+        wait=wait_exponential(multiplier=1, min=1, max=10),
+        retry=retry_if_exception_type((httpx.HTTPError, httpx.TimeoutException)),
+        reraise=True,
+    )
+    async def _call_api(self, url: str, payload: dict) -> dict:
+        """调用 API，带重试机制。"""
+        timeout = self.config.timeout_ms / 1000
+
+        async with httpx.AsyncClient(timeout=timeout) as client:
+            response = await client.post(
+                url,
+                json=payload,
+                headers={
+                    "Authorization": f"Bearer {self.config.api_key}",
+                    "Content-Type": "application/json",
+                },
+            )
+            response.raise_for_status()
+            return response.json()