改用 .NET System.Speech 实现 Win11 本地 TTS 语音播放
This commit is contained in:
+58
-80
@@ -1,76 +1,42 @@
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import threading
|
||||
from pathlib import Path
|
||||
from typing import Tuple, Any, Dict
|
||||
|
||||
from aiohttp import ClientSession
|
||||
from miservice import MiAccount, MiNAService, MiTokenStore
|
||||
|
||||
import logging, subprocess
|
||||
from typing import Tuple, Any, Dict, List
|
||||
import config
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class SafeTokenStore(MiTokenStore):
|
||||
"""Wraps MiTokenStore to never lose passToken on auth failure."""
|
||||
|
||||
def __init__(self, token_path):
|
||||
super().__init__(token_path)
|
||||
self._saved_pass_token = ""
|
||||
self._load_backup()
|
||||
|
||||
def _load_backup(self):
|
||||
path = Path(self.token_path)
|
||||
backup = Path(str(path) + ".backup")
|
||||
if backup.exists():
|
||||
try:
|
||||
data = json.loads(backup.read_text("utf-8"))
|
||||
self._saved_pass_token = data.get("passToken", "")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def _save_backup(self, token):
|
||||
path = Path(self.token_path)
|
||||
backup = Path(str(path) + ".backup")
|
||||
try:
|
||||
backup.write_text(json.dumps(token, ensure_ascii=False, indent=2), encoding="utf-8")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def save_token(self, token=None):
|
||||
if token and token.get("passToken"):
|
||||
self._saved_pass_token = token["passToken"]
|
||||
self._save_backup(token)
|
||||
elif token is None and self._saved_pass_token:
|
||||
# miservice is trying to delete token after auth failure
|
||||
# Don't let it — restore from backup
|
||||
logger.warning("miservice tried to wipe token, restoring passToken...")
|
||||
return
|
||||
super().save_token(token)
|
||||
def _run_ps(commands, timeout=60):
|
||||
script = "; ".join(commands)
|
||||
try:
|
||||
p = subprocess.run(
|
||||
["powershell.exe", "-NoProfile", "-NonInteractive", "-Command", script],
|
||||
capture_output=True, text=True, timeout=timeout)
|
||||
return p.returncode, p.stdout.strip()
|
||||
except subprocess.TimeoutExpired:
|
||||
return -1, "timeout"
|
||||
except Exception as e:
|
||||
return -1, str(e)
|
||||
|
||||
|
||||
def _run_async_in_thread(coro, timeout: float = 15.0):
|
||||
result = None
|
||||
error = None
|
||||
|
||||
def _target():
|
||||
nonlocal result, error
|
||||
loop = asyncio.new_event_loop()
|
||||
asyncio.set_event_loop(loop)
|
||||
try:
|
||||
result = loop.run_until_complete(coro)
|
||||
except Exception as e:
|
||||
error = e
|
||||
finally:
|
||||
loop.close()
|
||||
|
||||
t = threading.Thread(target=_target)
|
||||
t.start()
|
||||
t.join(timeout=timeout)
|
||||
if error:
|
||||
raise error
|
||||
def list_voices() -> List[Dict[str, str]]:
|
||||
cmds = [
|
||||
"Add-Type -AssemblyName System.Speech",
|
||||
"$s = New-Object System.Speech.Synthesis.SpeechSynthesizer",
|
||||
"foreach ($v in $s.GetInstalledVoices()) {",
|
||||
" $i = $v.VoiceInfo",
|
||||
' Write-Host ("VOICE:" + $i.Name + "|" + $i.Description + "|" + $i.Culture + "|" + $i.Gender + "|" + $i.Age)',
|
||||
"}",
|
||||
"$s.Dispose()",
|
||||
]
|
||||
code, out = _run_ps(cmds)
|
||||
result = []
|
||||
for line in out.splitlines():
|
||||
if line.startswith("VOICE:"):
|
||||
parts = line[6:].strip().split("|")
|
||||
if len(parts) >= 5:
|
||||
result.append({"name": parts[0].strip(), "description": parts[1].strip(),
|
||||
"culture": parts[2].strip(), "gender": parts[3].strip(),
|
||||
"age": parts[4].strip()})
|
||||
return result
|
||||
|
||||
|
||||
@@ -79,23 +45,35 @@ def speak(text: str) -> Tuple[bool, Dict[str, Any]]:
|
||||
logger.info("TTS disabled, skipping: %s", text)
|
||||
return True, {"skipped": True}
|
||||
|
||||
text = text[: config.TTS_MAX_TEXT_LENGTH].strip()
|
||||
text = text[:config.TTS_MAX_TEXT_LENGTH].strip()
|
||||
if not text:
|
||||
return False, {"error": "empty text after truncation"}
|
||||
|
||||
async def _tts():
|
||||
token_store = SafeTokenStore(config.XIAOMI_TOKEN_PATH)
|
||||
async with ClientSession() as session:
|
||||
account = MiAccount(
|
||||
session, config.XIAOMI_USER_ID, None, token_store
|
||||
)
|
||||
mina = MiNAService(account)
|
||||
return await mina.text_to_speech(config.XIAOMI_SPEAKER_DID, text)
|
||||
safe = text.replace(chr(34), chr(34) + chr(34))
|
||||
vname = (config.TTS_VOICE_NAME or "").replace(chr(34), chr(34) + chr(34))
|
||||
|
||||
cmds = [
|
||||
"Add-Type -AssemblyName System.Speech",
|
||||
"$s = New-Object System.Speech.Synthesis.SpeechSynthesizer",
|
||||
]
|
||||
if vname:
|
||||
cmds += [
|
||||
"foreach ($v in $s.GetInstalledVoices()) {",
|
||||
' if ($v.VoiceInfo.Name -like "*' + vname + '*") { $s.SelectVoice($v.VoiceInfo.Name); break }',
|
||||
"}",
|
||||
]
|
||||
cmds += [
|
||||
"$s.Rate = " + str(config.TTS_RATE),
|
||||
"$s.Volume = 100",
|
||||
'$s.Speak("' + safe + '")',
|
||||
"$s.Dispose()",
|
||||
]
|
||||
|
||||
try:
|
||||
result = _run_async_in_thread(_tts(), timeout=config.TTS_TIMEOUT_SECONDS)
|
||||
ok = isinstance(result, dict) and result.get("code") == 0
|
||||
return ok, result or {}
|
||||
code, out = _run_ps(cmds)
|
||||
if code != 0:
|
||||
return False, {"error": f"TTS failed: {out}"}
|
||||
return True, {"spoken": True}
|
||||
except Exception as e:
|
||||
logger.exception("TTS call failed")
|
||||
return False, {"error": str(e)}
|
||||
logger.exception("TTS failed")
|
||||
return False, {"error": str(e)}
|
||||
Reference in New Issue
Block a user