改用 .NET System.Speech 实现 Win11 本地 TTS 语音播放
This commit is contained in:
+3
-5
@@ -2,12 +2,10 @@
|
||||
WECOM_BOT_ID=your_bot_id_here
|
||||
WECOM_BOT_SECRET=your_bot_secret_here
|
||||
|
||||
# Xiaomi TTS
|
||||
XIAOMI_USER_ID=1136458602
|
||||
XIAOMI_TOKEN_PATH=.mi.token
|
||||
XIAOMI_SPEAKER_DID=3ba2c1e8-d8cb-45c5-b88a-15624e7a02f3
|
||||
# Windows 11 Local TTS (.NET System.Speech via PowerShell)
|
||||
TTS_VOICE_NAME=
|
||||
TTS_RATE=0
|
||||
|
||||
# TTS Behavior
|
||||
TTS_ENABLED=true
|
||||
TTS_MAX_TEXT_LENGTH=500
|
||||
TTS_TIMEOUT_SECONDS=15
|
||||
|
||||
+56
-78
@@ -1,76 +1,42 @@
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import threading
|
||||
from pathlib import Path
|
||||
from typing import Tuple, Any, Dict
|
||||
|
||||
from aiohttp import ClientSession
|
||||
from miservice import MiAccount, MiNAService, MiTokenStore
|
||||
|
||||
import logging, subprocess
|
||||
from typing import Tuple, Any, Dict, List
|
||||
import config
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class SafeTokenStore(MiTokenStore):
|
||||
"""Wraps MiTokenStore to never lose passToken on auth failure."""
|
||||
|
||||
def __init__(self, token_path):
|
||||
super().__init__(token_path)
|
||||
self._saved_pass_token = ""
|
||||
self._load_backup()
|
||||
|
||||
def _load_backup(self):
|
||||
path = Path(self.token_path)
|
||||
backup = Path(str(path) + ".backup")
|
||||
if backup.exists():
|
||||
def _run_ps(commands, timeout=60):
|
||||
script = "; ".join(commands)
|
||||
try:
|
||||
data = json.loads(backup.read_text("utf-8"))
|
||||
self._saved_pass_token = data.get("passToken", "")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def _save_backup(self, token):
|
||||
path = Path(self.token_path)
|
||||
backup = Path(str(path) + ".backup")
|
||||
try:
|
||||
backup.write_text(json.dumps(token, ensure_ascii=False, indent=2), encoding="utf-8")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def save_token(self, token=None):
|
||||
if token and token.get("passToken"):
|
||||
self._saved_pass_token = token["passToken"]
|
||||
self._save_backup(token)
|
||||
elif token is None and self._saved_pass_token:
|
||||
# miservice is trying to delete token after auth failure
|
||||
# Don't let it — restore from backup
|
||||
logger.warning("miservice tried to wipe token, restoring passToken...")
|
||||
return
|
||||
super().save_token(token)
|
||||
|
||||
|
||||
def _run_async_in_thread(coro, timeout: float = 15.0):
|
||||
result = None
|
||||
error = None
|
||||
|
||||
def _target():
|
||||
nonlocal result, error
|
||||
loop = asyncio.new_event_loop()
|
||||
asyncio.set_event_loop(loop)
|
||||
try:
|
||||
result = loop.run_until_complete(coro)
|
||||
p = subprocess.run(
|
||||
["powershell.exe", "-NoProfile", "-NonInteractive", "-Command", script],
|
||||
capture_output=True, text=True, timeout=timeout)
|
||||
return p.returncode, p.stdout.strip()
|
||||
except subprocess.TimeoutExpired:
|
||||
return -1, "timeout"
|
||||
except Exception as e:
|
||||
error = e
|
||||
finally:
|
||||
loop.close()
|
||||
return -1, str(e)
|
||||
|
||||
t = threading.Thread(target=_target)
|
||||
t.start()
|
||||
t.join(timeout=timeout)
|
||||
if error:
|
||||
raise error
|
||||
|
||||
def list_voices() -> List[Dict[str, str]]:
|
||||
cmds = [
|
||||
"Add-Type -AssemblyName System.Speech",
|
||||
"$s = New-Object System.Speech.Synthesis.SpeechSynthesizer",
|
||||
"foreach ($v in $s.GetInstalledVoices()) {",
|
||||
" $i = $v.VoiceInfo",
|
||||
' Write-Host ("VOICE:" + $i.Name + "|" + $i.Description + "|" + $i.Culture + "|" + $i.Gender + "|" + $i.Age)',
|
||||
"}",
|
||||
"$s.Dispose()",
|
||||
]
|
||||
code, out = _run_ps(cmds)
|
||||
result = []
|
||||
for line in out.splitlines():
|
||||
if line.startswith("VOICE:"):
|
||||
parts = line[6:].strip().split("|")
|
||||
if len(parts) >= 5:
|
||||
result.append({"name": parts[0].strip(), "description": parts[1].strip(),
|
||||
"culture": parts[2].strip(), "gender": parts[3].strip(),
|
||||
"age": parts[4].strip()})
|
||||
return result
|
||||
|
||||
|
||||
@@ -79,23 +45,35 @@ def speak(text: str) -> Tuple[bool, Dict[str, Any]]:
|
||||
logger.info("TTS disabled, skipping: %s", text)
|
||||
return True, {"skipped": True}
|
||||
|
||||
text = text[: config.TTS_MAX_TEXT_LENGTH].strip()
|
||||
text = text[:config.TTS_MAX_TEXT_LENGTH].strip()
|
||||
if not text:
|
||||
return False, {"error": "empty text after truncation"}
|
||||
|
||||
async def _tts():
|
||||
token_store = SafeTokenStore(config.XIAOMI_TOKEN_PATH)
|
||||
async with ClientSession() as session:
|
||||
account = MiAccount(
|
||||
session, config.XIAOMI_USER_ID, None, token_store
|
||||
)
|
||||
mina = MiNAService(account)
|
||||
return await mina.text_to_speech(config.XIAOMI_SPEAKER_DID, text)
|
||||
safe = text.replace(chr(34), chr(34) + chr(34))
|
||||
vname = (config.TTS_VOICE_NAME or "").replace(chr(34), chr(34) + chr(34))
|
||||
|
||||
cmds = [
|
||||
"Add-Type -AssemblyName System.Speech",
|
||||
"$s = New-Object System.Speech.Synthesis.SpeechSynthesizer",
|
||||
]
|
||||
if vname:
|
||||
cmds += [
|
||||
"foreach ($v in $s.GetInstalledVoices()) {",
|
||||
' if ($v.VoiceInfo.Name -like "*' + vname + '*") { $s.SelectVoice($v.VoiceInfo.Name); break }',
|
||||
"}",
|
||||
]
|
||||
cmds += [
|
||||
"$s.Rate = " + str(config.TTS_RATE),
|
||||
"$s.Volume = 100",
|
||||
'$s.Speak("' + safe + '")',
|
||||
"$s.Dispose()",
|
||||
]
|
||||
|
||||
try:
|
||||
result = _run_async_in_thread(_tts(), timeout=config.TTS_TIMEOUT_SECONDS)
|
||||
ok = isinstance(result, dict) and result.get("code") == 0
|
||||
return ok, result or {}
|
||||
code, out = _run_ps(cmds)
|
||||
if code != 0:
|
||||
return False, {"error": f"TTS failed: {out}"}
|
||||
return True, {"spoken": True}
|
||||
except Exception as e:
|
||||
logger.exception("TTS call failed")
|
||||
logger.exception("TTS failed")
|
||||
return False, {"error": str(e)}
|
||||
@@ -1,4 +1,4 @@
|
||||
import os
|
||||
import os
|
||||
from pathlib import Path
|
||||
from dotenv import load_dotenv
|
||||
|
||||
@@ -29,15 +29,10 @@ def _env_int(key: str, default: int) -> int:
|
||||
WECOM_BOT_ID = _env("WECOM_BOT_ID")
|
||||
WECOM_BOT_SECRET = _env("WECOM_BOT_SECRET")
|
||||
|
||||
# Xiaomi TTS
|
||||
XIAOMI_USER_ID = _env("XIAOMI_USER_ID", "1136458602")
|
||||
XIAOMI_TOKEN_PATH = _env(
|
||||
"XIAOMI_TOKEN_PATH",
|
||||
str(Path(__file__).resolve().parent / ".mi.token"),
|
||||
)
|
||||
XIAOMI_SPEAKER_DID = _env("XIAOMI_SPEAKER_DID", "3ba2c1e8-d8cb-45c5-b88a-15624e7a02f3")
|
||||
# Windows Local TTS
|
||||
TTS_VOICE_NAME = _env("TTS_VOICE_NAME", "") # empty = system default voice
|
||||
TTS_RATE = _env_int("TTS_RATE", 0) # SAPI rate: -10 (slowest) to 10 (fastest), default 0
|
||||
|
||||
# TTS
|
||||
TTS_ENABLED = _env_bool("TTS_ENABLED", True)
|
||||
TTS_MAX_TEXT_LENGTH = _env_int("TTS_MAX_TEXT_LENGTH", 500)
|
||||
TTS_TIMEOUT_SECONDS = _env_int("TTS_TIMEOUT_SECONDS", 15)
|
||||
|
||||
+2
-2
@@ -1,6 +1,6 @@
|
||||
websockets>=13.0
|
||||
websockets>=13.0
|
||||
python-dotenv>=1.0.0
|
||||
miservice_fork>=2.9.0
|
||||
pywin32>=311
|
||||
aiohttp>=3.9.0
|
||||
pytest>=8.0.0
|
||||
pytest-asyncio>=0.23.0
|
||||
|
||||
Reference in New Issue
Block a user