From 3805c18622454666d583bc7450f40d10cd254bef Mon Sep 17 00:00:00 2001 From: torin Date: Fri, 24 Apr 2026 17:58:49 +0800 Subject: [PATCH] feat: add ASR provider support for voice co-creation --- README.md | 7 +- admin-frontend/src/views/AdminProviders.vue | 14 ++- backend/.env.example | 5 + backend/app/api/admin_providers.py | 2 +- backend/app/core/config.py | 5 +- backend/app/db/admin_models.py | 4 +- backend/app/services/adapters/__init__.py | 6 +- backend/app/services/adapters/asr/__init__.py | 1 + backend/app/services/adapters/asr/demo.py | 57 +++++++++ backend/app/services/adapters/asr/models.py | 11 ++ backend/app/services/adapters/asr/openai.py | 76 +++++++++++ backend/app/services/provider_policy.py | 13 +- backend/app/services/provider_router.py | 48 ++++++- backend/app/services/voice_session_service.py | 2 + .../services/voice_transcription_service.py | 119 ++++-------------- backend/tests/test_provider_router.py | 22 +++- docs/planning/demo-checklist.md | 36 +++++- docs/planning/demo-validation-log.md | 42 +++++++ .../voice-co-creation-mode-incremental-prd.md | 51 ++++++-- .../voice-co-creation-phase-a-tech-spec.md | 4 +- frontend/src/views/AdminProviders.vue | 2 +- scripts/demo_smoke.sh | 70 ++++++++++- 22 files changed, 471 insertions(+), 126 deletions(-) create mode 100644 backend/app/services/adapters/asr/__init__.py create mode 100644 backend/app/services/adapters/asr/demo.py create mode 100644 backend/app/services/adapters/asr/models.py create mode 100644 backend/app/services/adapters/asr/openai.py diff --git a/README.md b/README.md index 90a259c..5e9c18d 100644 --- a/README.md +++ b/README.md @@ -63,11 +63,14 @@ docker compose ps docker compose logs -f backend ./scripts/demo_smoke.sh SMOKE_AUDIO=1 ./scripts/demo_smoke.sh +SMOKE_VOICE=1 ./scripts/demo_smoke.sh docker compose down docker compose down -v ``` -`scripts/demo_smoke.sh` 会检查健康状态、本地登录、统一生成后台任务、主记录落库、资产重试、故事列表和 Provider 能力分层。默认跳过 TTS;演示前需要验证语音链路时使用 `SMOKE_AUDIO=1`。 +`scripts/demo_smoke.sh` 会检查健康状态、本地登录、统一生成后台任务、主记录落库、资产重试、故事列表和 Provider 能力分层。默认跳过 TTS 和语音共创;演示前需要验证朗读链路时使用 `SMOKE_AUDIO=1`,需要验证 Voice Studio Alpha 时使用 `SMOKE_VOICE=1`。 + +语音共创的 ASR 能力已纳入 Provider 分层。默认 `ASR_PROVIDERS=["demo"]` 会使用 `transcript_hint` 或文本上传作为本地演示转写;需要真实转写时可设置 `ASR_PROVIDERS=["openai_asr", "demo"]` 并配置 `OPENAI_API_KEY`。 ## 手动开发 @@ -142,7 +145,7 @@ npm run build | GET | `/api/stories/{story_id}` | 故事详情 | | DELETE | `/api/stories/{story_id}` | 删除故事 | | GET/POST/PUT/DELETE | `/admin/providers` | Provider 管理,需开启管理后台 | -| GET | `/admin/providers/capabilities` | Provider 能力分层说明,需开启管理后台 | +| GET | `/admin/providers/capabilities` | Provider 能力分层说明(text/image/tts/storybook/asr),需开启管理后台 | ## 文档入口 diff --git a/admin-frontend/src/views/AdminProviders.vue b/admin-frontend/src/views/AdminProviders.vue index a2db1e1..e4e7bbb 100644 --- a/admin-frontend/src/views/AdminProviders.vue +++ b/admin-frontend/src/views/AdminProviders.vue @@ -109,6 +109,14 @@ > 绘本 + @@ -316,7 +324,7 @@