From fe360fad3c8a369d06d823bbf9c7746f30ecb5e6 Mon Sep 17 00:00:00 2001 From: houhuan Date: Mon, 11 May 2026 13:34:03 +0800 Subject: [PATCH] =?UTF-8?q?=E6=94=B9=E7=94=A8=20.NET=20System.Speech=20?= =?UTF-8?q?=E5=AE=9E=E7=8E=B0=20Win11=20=E6=9C=AC=E5=9C=B0=20TTS=20?= =?UTF-8?q?=E8=AF=AD=E9=9F=B3=E6=92=AD=E6=94=BE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .env.example | 8 +-- app/services/tts.py | 138 +++++++++++++++++++------------------------- config.py | 13 ++--- requirements.txt | 4 +- 4 files changed, 67 insertions(+), 96 deletions(-) diff --git a/.env.example b/.env.example index d4f8ad3..c6dbf99 100644 --- a/.env.example +++ b/.env.example @@ -2,12 +2,10 @@ WECOM_BOT_ID=your_bot_id_here WECOM_BOT_SECRET=your_bot_secret_here -# Xiaomi TTS -XIAOMI_USER_ID=1136458602 -XIAOMI_TOKEN_PATH=.mi.token -XIAOMI_SPEAKER_DID=3ba2c1e8-d8cb-45c5-b88a-15624e7a02f3 +# Windows 11 Local TTS (.NET System.Speech via PowerShell) +TTS_VOICE_NAME= +TTS_RATE=0 # TTS Behavior TTS_ENABLED=true TTS_MAX_TEXT_LENGTH=500 -TTS_TIMEOUT_SECONDS=15 diff --git a/app/services/tts.py b/app/services/tts.py index 72ce1e6..14566ec 100644 --- a/app/services/tts.py +++ b/app/services/tts.py @@ -1,76 +1,42 @@ -import asyncio -import json -import logging -import threading -from pathlib import Path -from typing import Tuple, Any, Dict - -from aiohttp import ClientSession -from miservice import MiAccount, MiNAService, MiTokenStore - +import logging, subprocess +from typing import Tuple, Any, Dict, List import config logger = logging.getLogger(__name__) -class SafeTokenStore(MiTokenStore): - """Wraps MiTokenStore to never lose passToken on auth failure.""" - - def __init__(self, token_path): - super().__init__(token_path) - self._saved_pass_token = "" - self._load_backup() - - def _load_backup(self): - path = Path(self.token_path) - backup = Path(str(path) + ".backup") - if backup.exists(): - try: - data = json.loads(backup.read_text("utf-8")) - self._saved_pass_token = data.get("passToken", "") - except Exception: - pass - - def _save_backup(self, token): - path = Path(self.token_path) - backup = Path(str(path) + ".backup") - try: - backup.write_text(json.dumps(token, ensure_ascii=False, indent=2), encoding="utf-8") - except Exception: - pass - - def save_token(self, token=None): - if token and token.get("passToken"): - self._saved_pass_token = token["passToken"] - self._save_backup(token) - elif token is None and self._saved_pass_token: - # miservice is trying to delete token after auth failure - # Don't let it — restore from backup - logger.warning("miservice tried to wipe token, restoring passToken...") - return - super().save_token(token) +def _run_ps(commands, timeout=60): + script = "; ".join(commands) + try: + p = subprocess.run( + ["powershell.exe", "-NoProfile", "-NonInteractive", "-Command", script], + capture_output=True, text=True, timeout=timeout) + return p.returncode, p.stdout.strip() + except subprocess.TimeoutExpired: + return -1, "timeout" + except Exception as e: + return -1, str(e) -def _run_async_in_thread(coro, timeout: float = 15.0): - result = None - error = None - - def _target(): - nonlocal result, error - loop = asyncio.new_event_loop() - asyncio.set_event_loop(loop) - try: - result = loop.run_until_complete(coro) - except Exception as e: - error = e - finally: - loop.close() - - t = threading.Thread(target=_target) - t.start() - t.join(timeout=timeout) - if error: - raise error +def list_voices() -> List[Dict[str, str]]: + cmds = [ + "Add-Type -AssemblyName System.Speech", + "$s = New-Object System.Speech.Synthesis.SpeechSynthesizer", + "foreach ($v in $s.GetInstalledVoices()) {", + " $i = $v.VoiceInfo", + ' Write-Host ("VOICE:" + $i.Name + "|" + $i.Description + "|" + $i.Culture + "|" + $i.Gender + "|" + $i.Age)', + "}", + "$s.Dispose()", + ] + code, out = _run_ps(cmds) + result = [] + for line in out.splitlines(): + if line.startswith("VOICE:"): + parts = line[6:].strip().split("|") + if len(parts) >= 5: + result.append({"name": parts[0].strip(), "description": parts[1].strip(), + "culture": parts[2].strip(), "gender": parts[3].strip(), + "age": parts[4].strip()}) return result @@ -79,23 +45,35 @@ def speak(text: str) -> Tuple[bool, Dict[str, Any]]: logger.info("TTS disabled, skipping: %s", text) return True, {"skipped": True} - text = text[: config.TTS_MAX_TEXT_LENGTH].strip() + text = text[:config.TTS_MAX_TEXT_LENGTH].strip() if not text: return False, {"error": "empty text after truncation"} - async def _tts(): - token_store = SafeTokenStore(config.XIAOMI_TOKEN_PATH) - async with ClientSession() as session: - account = MiAccount( - session, config.XIAOMI_USER_ID, None, token_store - ) - mina = MiNAService(account) - return await mina.text_to_speech(config.XIAOMI_SPEAKER_DID, text) + safe = text.replace(chr(34), chr(34) + chr(34)) + vname = (config.TTS_VOICE_NAME or "").replace(chr(34), chr(34) + chr(34)) + + cmds = [ + "Add-Type -AssemblyName System.Speech", + "$s = New-Object System.Speech.Synthesis.SpeechSynthesizer", + ] + if vname: + cmds += [ + "foreach ($v in $s.GetInstalledVoices()) {", + ' if ($v.VoiceInfo.Name -like "*' + vname + '*") { $s.SelectVoice($v.VoiceInfo.Name); break }', + "}", + ] + cmds += [ + "$s.Rate = " + str(config.TTS_RATE), + "$s.Volume = 100", + '$s.Speak("' + safe + '")', + "$s.Dispose()", + ] try: - result = _run_async_in_thread(_tts(), timeout=config.TTS_TIMEOUT_SECONDS) - ok = isinstance(result, dict) and result.get("code") == 0 - return ok, result or {} + code, out = _run_ps(cmds) + if code != 0: + return False, {"error": f"TTS failed: {out}"} + return True, {"spoken": True} except Exception as e: - logger.exception("TTS call failed") - return False, {"error": str(e)} + logger.exception("TTS failed") + return False, {"error": str(e)} \ No newline at end of file diff --git a/config.py b/config.py index cac5018..488e3cd 100644 --- a/config.py +++ b/config.py @@ -1,4 +1,4 @@ -import os +import os from pathlib import Path from dotenv import load_dotenv @@ -29,15 +29,10 @@ def _env_int(key: str, default: int) -> int: WECOM_BOT_ID = _env("WECOM_BOT_ID") WECOM_BOT_SECRET = _env("WECOM_BOT_SECRET") -# Xiaomi TTS -XIAOMI_USER_ID = _env("XIAOMI_USER_ID", "1136458602") -XIAOMI_TOKEN_PATH = _env( - "XIAOMI_TOKEN_PATH", - str(Path(__file__).resolve().parent / ".mi.token"), -) -XIAOMI_SPEAKER_DID = _env("XIAOMI_SPEAKER_DID", "3ba2c1e8-d8cb-45c5-b88a-15624e7a02f3") +# Windows Local TTS +TTS_VOICE_NAME = _env("TTS_VOICE_NAME", "") # empty = system default voice +TTS_RATE = _env_int("TTS_RATE", 0) # SAPI rate: -10 (slowest) to 10 (fastest), default 0 # TTS TTS_ENABLED = _env_bool("TTS_ENABLED", True) TTS_MAX_TEXT_LENGTH = _env_int("TTS_MAX_TEXT_LENGTH", 500) -TTS_TIMEOUT_SECONDS = _env_int("TTS_TIMEOUT_SECONDS", 15) diff --git a/requirements.txt b/requirements.txt index 76bf105..807bba9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,6 @@ -websockets>=13.0 +websockets>=13.0 python-dotenv>=1.0.0 -miservice_fork>=2.9.0 +pywin32>=311 aiohttp>=3.9.0 pytest>=8.0.0 pytest-asyncio>=0.23.0