改用 .NET System.Speech 实现 Win11 本地 TTS 语音播放
This commit is contained in:
+3
-5
@@ -2,12 +2,10 @@
|
|||||||
WECOM_BOT_ID=your_bot_id_here
|
WECOM_BOT_ID=your_bot_id_here
|
||||||
WECOM_BOT_SECRET=your_bot_secret_here
|
WECOM_BOT_SECRET=your_bot_secret_here
|
||||||
|
|
||||||
# Xiaomi TTS
|
# Windows 11 Local TTS (.NET System.Speech via PowerShell)
|
||||||
XIAOMI_USER_ID=1136458602
|
TTS_VOICE_NAME=
|
||||||
XIAOMI_TOKEN_PATH=.mi.token
|
TTS_RATE=0
|
||||||
XIAOMI_SPEAKER_DID=3ba2c1e8-d8cb-45c5-b88a-15624e7a02f3
|
|
||||||
|
|
||||||
# TTS Behavior
|
# TTS Behavior
|
||||||
TTS_ENABLED=true
|
TTS_ENABLED=true
|
||||||
TTS_MAX_TEXT_LENGTH=500
|
TTS_MAX_TEXT_LENGTH=500
|
||||||
TTS_TIMEOUT_SECONDS=15
|
|
||||||
|
|||||||
+56
-78
@@ -1,76 +1,42 @@
|
|||||||
import asyncio
|
import logging, subprocess
|
||||||
import json
|
from typing import Tuple, Any, Dict, List
|
||||||
import logging
|
|
||||||
import threading
|
|
||||||
from pathlib import Path
|
|
||||||
from typing import Tuple, Any, Dict
|
|
||||||
|
|
||||||
from aiohttp import ClientSession
|
|
||||||
from miservice import MiAccount, MiNAService, MiTokenStore
|
|
||||||
|
|
||||||
import config
|
import config
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
class SafeTokenStore(MiTokenStore):
|
def _run_ps(commands, timeout=60):
|
||||||
"""Wraps MiTokenStore to never lose passToken on auth failure."""
|
script = "; ".join(commands)
|
||||||
|
|
||||||
def __init__(self, token_path):
|
|
||||||
super().__init__(token_path)
|
|
||||||
self._saved_pass_token = ""
|
|
||||||
self._load_backup()
|
|
||||||
|
|
||||||
def _load_backup(self):
|
|
||||||
path = Path(self.token_path)
|
|
||||||
backup = Path(str(path) + ".backup")
|
|
||||||
if backup.exists():
|
|
||||||
try:
|
try:
|
||||||
data = json.loads(backup.read_text("utf-8"))
|
p = subprocess.run(
|
||||||
self._saved_pass_token = data.get("passToken", "")
|
["powershell.exe", "-NoProfile", "-NonInteractive", "-Command", script],
|
||||||
except Exception:
|
capture_output=True, text=True, timeout=timeout)
|
||||||
pass
|
return p.returncode, p.stdout.strip()
|
||||||
|
except subprocess.TimeoutExpired:
|
||||||
def _save_backup(self, token):
|
return -1, "timeout"
|
||||||
path = Path(self.token_path)
|
|
||||||
backup = Path(str(path) + ".backup")
|
|
||||||
try:
|
|
||||||
backup.write_text(json.dumps(token, ensure_ascii=False, indent=2), encoding="utf-8")
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
|
|
||||||
def save_token(self, token=None):
|
|
||||||
if token and token.get("passToken"):
|
|
||||||
self._saved_pass_token = token["passToken"]
|
|
||||||
self._save_backup(token)
|
|
||||||
elif token is None and self._saved_pass_token:
|
|
||||||
# miservice is trying to delete token after auth failure
|
|
||||||
# Don't let it — restore from backup
|
|
||||||
logger.warning("miservice tried to wipe token, restoring passToken...")
|
|
||||||
return
|
|
||||||
super().save_token(token)
|
|
||||||
|
|
||||||
|
|
||||||
def _run_async_in_thread(coro, timeout: float = 15.0):
|
|
||||||
result = None
|
|
||||||
error = None
|
|
||||||
|
|
||||||
def _target():
|
|
||||||
nonlocal result, error
|
|
||||||
loop = asyncio.new_event_loop()
|
|
||||||
asyncio.set_event_loop(loop)
|
|
||||||
try:
|
|
||||||
result = loop.run_until_complete(coro)
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
error = e
|
return -1, str(e)
|
||||||
finally:
|
|
||||||
loop.close()
|
|
||||||
|
|
||||||
t = threading.Thread(target=_target)
|
|
||||||
t.start()
|
def list_voices() -> List[Dict[str, str]]:
|
||||||
t.join(timeout=timeout)
|
cmds = [
|
||||||
if error:
|
"Add-Type -AssemblyName System.Speech",
|
||||||
raise error
|
"$s = New-Object System.Speech.Synthesis.SpeechSynthesizer",
|
||||||
|
"foreach ($v in $s.GetInstalledVoices()) {",
|
||||||
|
" $i = $v.VoiceInfo",
|
||||||
|
' Write-Host ("VOICE:" + $i.Name + "|" + $i.Description + "|" + $i.Culture + "|" + $i.Gender + "|" + $i.Age)',
|
||||||
|
"}",
|
||||||
|
"$s.Dispose()",
|
||||||
|
]
|
||||||
|
code, out = _run_ps(cmds)
|
||||||
|
result = []
|
||||||
|
for line in out.splitlines():
|
||||||
|
if line.startswith("VOICE:"):
|
||||||
|
parts = line[6:].strip().split("|")
|
||||||
|
if len(parts) >= 5:
|
||||||
|
result.append({"name": parts[0].strip(), "description": parts[1].strip(),
|
||||||
|
"culture": parts[2].strip(), "gender": parts[3].strip(),
|
||||||
|
"age": parts[4].strip()})
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
@@ -79,23 +45,35 @@ def speak(text: str) -> Tuple[bool, Dict[str, Any]]:
|
|||||||
logger.info("TTS disabled, skipping: %s", text)
|
logger.info("TTS disabled, skipping: %s", text)
|
||||||
return True, {"skipped": True}
|
return True, {"skipped": True}
|
||||||
|
|
||||||
text = text[: config.TTS_MAX_TEXT_LENGTH].strip()
|
text = text[:config.TTS_MAX_TEXT_LENGTH].strip()
|
||||||
if not text:
|
if not text:
|
||||||
return False, {"error": "empty text after truncation"}
|
return False, {"error": "empty text after truncation"}
|
||||||
|
|
||||||
async def _tts():
|
safe = text.replace(chr(34), chr(34) + chr(34))
|
||||||
token_store = SafeTokenStore(config.XIAOMI_TOKEN_PATH)
|
vname = (config.TTS_VOICE_NAME or "").replace(chr(34), chr(34) + chr(34))
|
||||||
async with ClientSession() as session:
|
|
||||||
account = MiAccount(
|
cmds = [
|
||||||
session, config.XIAOMI_USER_ID, None, token_store
|
"Add-Type -AssemblyName System.Speech",
|
||||||
)
|
"$s = New-Object System.Speech.Synthesis.SpeechSynthesizer",
|
||||||
mina = MiNAService(account)
|
]
|
||||||
return await mina.text_to_speech(config.XIAOMI_SPEAKER_DID, text)
|
if vname:
|
||||||
|
cmds += [
|
||||||
|
"foreach ($v in $s.GetInstalledVoices()) {",
|
||||||
|
' if ($v.VoiceInfo.Name -like "*' + vname + '*") { $s.SelectVoice($v.VoiceInfo.Name); break }',
|
||||||
|
"}",
|
||||||
|
]
|
||||||
|
cmds += [
|
||||||
|
"$s.Rate = " + str(config.TTS_RATE),
|
||||||
|
"$s.Volume = 100",
|
||||||
|
'$s.Speak("' + safe + '")',
|
||||||
|
"$s.Dispose()",
|
||||||
|
]
|
||||||
|
|
||||||
try:
|
try:
|
||||||
result = _run_async_in_thread(_tts(), timeout=config.TTS_TIMEOUT_SECONDS)
|
code, out = _run_ps(cmds)
|
||||||
ok = isinstance(result, dict) and result.get("code") == 0
|
if code != 0:
|
||||||
return ok, result or {}
|
return False, {"error": f"TTS failed: {out}"}
|
||||||
|
return True, {"spoken": True}
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.exception("TTS call failed")
|
logger.exception("TTS failed")
|
||||||
return False, {"error": str(e)}
|
return False, {"error": str(e)}
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
import os
|
import os
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
@@ -29,15 +29,10 @@ def _env_int(key: str, default: int) -> int:
|
|||||||
WECOM_BOT_ID = _env("WECOM_BOT_ID")
|
WECOM_BOT_ID = _env("WECOM_BOT_ID")
|
||||||
WECOM_BOT_SECRET = _env("WECOM_BOT_SECRET")
|
WECOM_BOT_SECRET = _env("WECOM_BOT_SECRET")
|
||||||
|
|
||||||
# Xiaomi TTS
|
# Windows Local TTS
|
||||||
XIAOMI_USER_ID = _env("XIAOMI_USER_ID", "1136458602")
|
TTS_VOICE_NAME = _env("TTS_VOICE_NAME", "") # empty = system default voice
|
||||||
XIAOMI_TOKEN_PATH = _env(
|
TTS_RATE = _env_int("TTS_RATE", 0) # SAPI rate: -10 (slowest) to 10 (fastest), default 0
|
||||||
"XIAOMI_TOKEN_PATH",
|
|
||||||
str(Path(__file__).resolve().parent / ".mi.token"),
|
|
||||||
)
|
|
||||||
XIAOMI_SPEAKER_DID = _env("XIAOMI_SPEAKER_DID", "3ba2c1e8-d8cb-45c5-b88a-15624e7a02f3")
|
|
||||||
|
|
||||||
# TTS
|
# TTS
|
||||||
TTS_ENABLED = _env_bool("TTS_ENABLED", True)
|
TTS_ENABLED = _env_bool("TTS_ENABLED", True)
|
||||||
TTS_MAX_TEXT_LENGTH = _env_int("TTS_MAX_TEXT_LENGTH", 500)
|
TTS_MAX_TEXT_LENGTH = _env_int("TTS_MAX_TEXT_LENGTH", 500)
|
||||||
TTS_TIMEOUT_SECONDS = _env_int("TTS_TIMEOUT_SECONDS", 15)
|
|
||||||
|
|||||||
+2
-2
@@ -1,6 +1,6 @@
|
|||||||
websockets>=13.0
|
websockets>=13.0
|
||||||
python-dotenv>=1.0.0
|
python-dotenv>=1.0.0
|
||||||
miservice_fork>=2.9.0
|
pywin32>=311
|
||||||
aiohttp>=3.9.0
|
aiohttp>=3.9.0
|
||||||
pytest>=8.0.0
|
pytest>=8.0.0
|
||||||
pytest-asyncio>=0.23.0
|
pytest-asyncio>=0.23.0
|
||||||
|
|||||||
Reference in New Issue
Block a user