改用 .NET System.Speech 实现 Win11 本地 TTS 语音播放

This commit is contained in:
2026-05-11 13:34:03 +08:00
parent ca462e290a
commit fe360fad3c
4 changed files with 67 additions and 96 deletions
+3 -5
View File
@@ -2,12 +2,10 @@
WECOM_BOT_ID=your_bot_id_here WECOM_BOT_ID=your_bot_id_here
WECOM_BOT_SECRET=your_bot_secret_here WECOM_BOT_SECRET=your_bot_secret_here
# Xiaomi TTS # Windows 11 Local TTS (.NET System.Speech via PowerShell)
XIAOMI_USER_ID=1136458602 TTS_VOICE_NAME=
XIAOMI_TOKEN_PATH=.mi.token TTS_RATE=0
XIAOMI_SPEAKER_DID=3ba2c1e8-d8cb-45c5-b88a-15624e7a02f3
# TTS Behavior # TTS Behavior
TTS_ENABLED=true TTS_ENABLED=true
TTS_MAX_TEXT_LENGTH=500 TTS_MAX_TEXT_LENGTH=500
TTS_TIMEOUT_SECONDS=15
+55 -77
View File
@@ -1,76 +1,42 @@
import asyncio import logging, subprocess
import json from typing import Tuple, Any, Dict, List
import logging
import threading
from pathlib import Path
from typing import Tuple, Any, Dict
from aiohttp import ClientSession
from miservice import MiAccount, MiNAService, MiTokenStore
import config import config
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
class SafeTokenStore(MiTokenStore): def _run_ps(commands, timeout=60):
"""Wraps MiTokenStore to never lose passToken on auth failure.""" script = "; ".join(commands)
def __init__(self, token_path):
super().__init__(token_path)
self._saved_pass_token = ""
self._load_backup()
def _load_backup(self):
path = Path(self.token_path)
backup = Path(str(path) + ".backup")
if backup.exists():
try: try:
data = json.loads(backup.read_text("utf-8")) p = subprocess.run(
self._saved_pass_token = data.get("passToken", "") ["powershell.exe", "-NoProfile", "-NonInteractive", "-Command", script],
except Exception: capture_output=True, text=True, timeout=timeout)
pass return p.returncode, p.stdout.strip()
except subprocess.TimeoutExpired:
def _save_backup(self, token): return -1, "timeout"
path = Path(self.token_path)
backup = Path(str(path) + ".backup")
try:
backup.write_text(json.dumps(token, ensure_ascii=False, indent=2), encoding="utf-8")
except Exception:
pass
def save_token(self, token=None):
if token and token.get("passToken"):
self._saved_pass_token = token["passToken"]
self._save_backup(token)
elif token is None and self._saved_pass_token:
# miservice is trying to delete token after auth failure
# Don't let it — restore from backup
logger.warning("miservice tried to wipe token, restoring passToken...")
return
super().save_token(token)
def _run_async_in_thread(coro, timeout: float = 15.0):
result = None
error = None
def _target():
nonlocal result, error
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
try:
result = loop.run_until_complete(coro)
except Exception as e: except Exception as e:
error = e return -1, str(e)
finally:
loop.close()
t = threading.Thread(target=_target)
t.start() def list_voices() -> List[Dict[str, str]]:
t.join(timeout=timeout) cmds = [
if error: "Add-Type -AssemblyName System.Speech",
raise error "$s = New-Object System.Speech.Synthesis.SpeechSynthesizer",
"foreach ($v in $s.GetInstalledVoices()) {",
" $i = $v.VoiceInfo",
' Write-Host ("VOICE:" + $i.Name + "|" + $i.Description + "|" + $i.Culture + "|" + $i.Gender + "|" + $i.Age)',
"}",
"$s.Dispose()",
]
code, out = _run_ps(cmds)
result = []
for line in out.splitlines():
if line.startswith("VOICE:"):
parts = line[6:].strip().split("|")
if len(parts) >= 5:
result.append({"name": parts[0].strip(), "description": parts[1].strip(),
"culture": parts[2].strip(), "gender": parts[3].strip(),
"age": parts[4].strip()})
return result return result
@@ -83,19 +49,31 @@ def speak(text: str) -> Tuple[bool, Dict[str, Any]]:
if not text: if not text:
return False, {"error": "empty text after truncation"} return False, {"error": "empty text after truncation"}
async def _tts(): safe = text.replace(chr(34), chr(34) + chr(34))
token_store = SafeTokenStore(config.XIAOMI_TOKEN_PATH) vname = (config.TTS_VOICE_NAME or "").replace(chr(34), chr(34) + chr(34))
async with ClientSession() as session:
account = MiAccount( cmds = [
session, config.XIAOMI_USER_ID, None, token_store "Add-Type -AssemblyName System.Speech",
) "$s = New-Object System.Speech.Synthesis.SpeechSynthesizer",
mina = MiNAService(account) ]
return await mina.text_to_speech(config.XIAOMI_SPEAKER_DID, text) if vname:
cmds += [
"foreach ($v in $s.GetInstalledVoices()) {",
' if ($v.VoiceInfo.Name -like "*' + vname + '*") { $s.SelectVoice($v.VoiceInfo.Name); break }',
"}",
]
cmds += [
"$s.Rate = " + str(config.TTS_RATE),
"$s.Volume = 100",
'$s.Speak("' + safe + '")',
"$s.Dispose()",
]
try: try:
result = _run_async_in_thread(_tts(), timeout=config.TTS_TIMEOUT_SECONDS) code, out = _run_ps(cmds)
ok = isinstance(result, dict) and result.get("code") == 0 if code != 0:
return ok, result or {} return False, {"error": f"TTS failed: {out}"}
return True, {"spoken": True}
except Exception as e: except Exception as e:
logger.exception("TTS call failed") logger.exception("TTS failed")
return False, {"error": str(e)} return False, {"error": str(e)}
+4 -9
View File
@@ -1,4 +1,4 @@
import os import os
from pathlib import Path from pathlib import Path
from dotenv import load_dotenv from dotenv import load_dotenv
@@ -29,15 +29,10 @@ def _env_int(key: str, default: int) -> int:
WECOM_BOT_ID = _env("WECOM_BOT_ID") WECOM_BOT_ID = _env("WECOM_BOT_ID")
WECOM_BOT_SECRET = _env("WECOM_BOT_SECRET") WECOM_BOT_SECRET = _env("WECOM_BOT_SECRET")
# Xiaomi TTS # Windows Local TTS
XIAOMI_USER_ID = _env("XIAOMI_USER_ID", "1136458602") TTS_VOICE_NAME = _env("TTS_VOICE_NAME", "") # empty = system default voice
XIAOMI_TOKEN_PATH = _env( TTS_RATE = _env_int("TTS_RATE", 0) # SAPI rate: -10 (slowest) to 10 (fastest), default 0
"XIAOMI_TOKEN_PATH",
str(Path(__file__).resolve().parent / ".mi.token"),
)
XIAOMI_SPEAKER_DID = _env("XIAOMI_SPEAKER_DID", "3ba2c1e8-d8cb-45c5-b88a-15624e7a02f3")
# TTS # TTS
TTS_ENABLED = _env_bool("TTS_ENABLED", True) TTS_ENABLED = _env_bool("TTS_ENABLED", True)
TTS_MAX_TEXT_LENGTH = _env_int("TTS_MAX_TEXT_LENGTH", 500) TTS_MAX_TEXT_LENGTH = _env_int("TTS_MAX_TEXT_LENGTH", 500)
TTS_TIMEOUT_SECONDS = _env_int("TTS_TIMEOUT_SECONDS", 15)
+2 -2
View File
@@ -1,6 +1,6 @@
websockets>=13.0 websockets>=13.0
python-dotenv>=1.0.0 python-dotenv>=1.0.0
miservice_fork>=2.9.0 pywin32>=311
aiohttp>=3.9.0 aiohttp>=3.9.0
pytest>=8.0.0 pytest>=8.0.0
pytest-asyncio>=0.23.0 pytest-asyncio>=0.23.0