From fe360fad3c8a369d06d823bbf9c7746f30ecb5e6 Mon Sep 17 00:00:00 2001
From: houhuan <houhuanya@gmail.com>
Date: Mon, 11 May 2026 13:34:03 +0800
Subject: [PATCH] =?UTF-8?q?=E6=94=B9=E7=94=A8=20.NET=20System.Speech=20?=
 =?UTF-8?q?=E5=AE=9E=E7=8E=B0=20Win11=20=E6=9C=AC=E5=9C=B0=20TTS=20?=
 =?UTF-8?q?=E8=AF=AD=E9=9F=B3=E6=92=AD=E6=94=BE?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .env.example        |   8 +--
 app/services/tts.py | 138 +++++++++++++++++++-------------------------
 config.py           |  13 ++---
 requirements.txt    |   4 +-
 4 files changed, 67 insertions(+), 96 deletions(-)

diff --git a/.env.example b/.env.example
index d4f8ad3..c6dbf99 100644
--- a/.env.example
+++ b/.env.example
@@ -2,12 +2,10 @@
 WECOM_BOT_ID=your_bot_id_here
 WECOM_BOT_SECRET=your_bot_secret_here
 
-# Xiaomi TTS
-XIAOMI_USER_ID=1136458602
-XIAOMI_TOKEN_PATH=.mi.token
-XIAOMI_SPEAKER_DID=3ba2c1e8-d8cb-45c5-b88a-15624e7a02f3
+# Windows 11 Local TTS (.NET System.Speech via PowerShell)
+TTS_VOICE_NAME=
+TTS_RATE=0
 
 # TTS Behavior
 TTS_ENABLED=true
 TTS_MAX_TEXT_LENGTH=500
-TTS_TIMEOUT_SECONDS=15
diff --git a/app/services/tts.py b/app/services/tts.py
index 72ce1e6..14566ec 100644
--- a/app/services/tts.py
+++ b/app/services/tts.py
@@ -1,76 +1,42 @@
-import asyncio
-import json
-import logging
-import threading
-from pathlib import Path
-from typing import Tuple, Any, Dict
-
-from aiohttp import ClientSession
-from miservice import MiAccount, MiNAService, MiTokenStore
-
+import logging, subprocess
+from typing import Tuple, Any, Dict, List
 import config
 
 logger = logging.getLogger(__name__)
 
 
-class SafeTokenStore(MiTokenStore):
-    """Wraps MiTokenStore to never lose passToken on auth failure."""
-
-    def __init__(self, token_path):
-        super().__init__(token_path)
-        self._saved_pass_token = ""
-        self._load_backup()
-
-    def _load_backup(self):
-        path = Path(self.token_path)
-        backup = Path(str(path) + ".backup")
-        if backup.exists():
-            try:
-                data = json.loads(backup.read_text("utf-8"))
-                self._saved_pass_token = data.get("passToken", "")
-            except Exception:
-                pass
-
-    def _save_backup(self, token):
-        path = Path(self.token_path)
-        backup = Path(str(path) + ".backup")
-        try:
-            backup.write_text(json.dumps(token, ensure_ascii=False, indent=2), encoding="utf-8")
-        except Exception:
-            pass
-
-    def save_token(self, token=None):
-        if token and token.get("passToken"):
-            self._saved_pass_token = token["passToken"]
-            self._save_backup(token)
-        elif token is None and self._saved_pass_token:
-            # miservice is trying to delete token after auth failure
-            # Don't let it — restore from backup
-            logger.warning("miservice tried to wipe token, restoring passToken...")
-            return
-        super().save_token(token)
+def _run_ps(commands, timeout=60):
+    script = "; ".join(commands)
+    try:
+        p = subprocess.run(
+            ["powershell.exe", "-NoProfile", "-NonInteractive", "-Command", script],
+            capture_output=True, text=True, timeout=timeout)
+        return p.returncode, p.stdout.strip()
+    except subprocess.TimeoutExpired:
+        return -1, "timeout"
+    except Exception as e:
+        return -1, str(e)
 
 
-def _run_async_in_thread(coro, timeout: float = 15.0):
-    result = None
-    error = None
-
-    def _target():
-        nonlocal result, error
-        loop = asyncio.new_event_loop()
-        asyncio.set_event_loop(loop)
-        try:
-            result = loop.run_until_complete(coro)
-        except Exception as e:
-            error = e
-        finally:
-            loop.close()
-
-    t = threading.Thread(target=_target)
-    t.start()
-    t.join(timeout=timeout)
-    if error:
-        raise error
+def list_voices() -> List[Dict[str, str]]:
+    cmds = [
+        "Add-Type -AssemblyName System.Speech",
+        "$s = New-Object System.Speech.Synthesis.SpeechSynthesizer",
+        "foreach ($v in $s.GetInstalledVoices()) {",
+        "  $i = $v.VoiceInfo",
+        '  Write-Host ("VOICE:" + $i.Name + "|" + $i.Description + "|" + $i.Culture + "|" + $i.Gender + "|" + $i.Age)',
+        "}",
+        "$s.Dispose()",
+    ]
+    code, out = _run_ps(cmds)
+    result = []
+    for line in out.splitlines():
+        if line.startswith("VOICE:"):
+            parts = line[6:].strip().split("|")
+            if len(parts) >= 5:
+                result.append({"name": parts[0].strip(), "description": parts[1].strip(),
+                               "culture": parts[2].strip(), "gender": parts[3].strip(),
+                               "age": parts[4].strip()})
     return result
 
 
@@ -79,23 +45,35 @@ def speak(text: str) -> Tuple[bool, Dict[str, Any]]:
         logger.info("TTS disabled, skipping: %s", text)
         return True, {"skipped": True}
 
-    text = text[: config.TTS_MAX_TEXT_LENGTH].strip()
+    text = text[:config.TTS_MAX_TEXT_LENGTH].strip()
     if not text:
         return False, {"error": "empty text after truncation"}
 
-    async def _tts():
-        token_store = SafeTokenStore(config.XIAOMI_TOKEN_PATH)
-        async with ClientSession() as session:
-            account = MiAccount(
-                session, config.XIAOMI_USER_ID, None, token_store
-            )
-            mina = MiNAService(account)
-            return await mina.text_to_speech(config.XIAOMI_SPEAKER_DID, text)
+    safe = text.replace(chr(34), chr(34) + chr(34))
+    vname = (config.TTS_VOICE_NAME or "").replace(chr(34), chr(34) + chr(34))
+
+    cmds = [
+        "Add-Type -AssemblyName System.Speech",
+        "$s = New-Object System.Speech.Synthesis.SpeechSynthesizer",
+    ]
+    if vname:
+        cmds += [
+            "foreach ($v in $s.GetInstalledVoices()) {",
+            '  if ($v.VoiceInfo.Name -like "*' + vname + '*") { $s.SelectVoice($v.VoiceInfo.Name); break }',
+            "}",
+        ]
+    cmds += [
+        "$s.Rate = " + str(config.TTS_RATE),
+        "$s.Volume = 100",
+        '$s.Speak("' + safe + '")',
+        "$s.Dispose()",
+    ]
 
     try:
-        result = _run_async_in_thread(_tts(), timeout=config.TTS_TIMEOUT_SECONDS)
-        ok = isinstance(result, dict) and result.get("code") == 0
-        return ok, result or {}
+        code, out = _run_ps(cmds)
+        if code != 0:
+            return False, {"error": f"TTS failed: {out}"}
+        return True, {"spoken": True}
     except Exception as e:
-        logger.exception("TTS call failed")
-        return False, {"error": str(e)}
+        logger.exception("TTS failed")
+        return False, {"error": str(e)}
\ No newline at end of file
diff --git a/config.py b/config.py
index cac5018..488e3cd 100644
--- a/config.py
+++ b/config.py
@@ -1,4 +1,4 @@
-import os
+﻿import os
 from pathlib import Path
 from dotenv import load_dotenv
 
@@ -29,15 +29,10 @@ def _env_int(key: str, default: int) -> int:
 WECOM_BOT_ID = _env("WECOM_BOT_ID")
 WECOM_BOT_SECRET = _env("WECOM_BOT_SECRET")
 
-# Xiaomi TTS
-XIAOMI_USER_ID = _env("XIAOMI_USER_ID", "1136458602")
-XIAOMI_TOKEN_PATH = _env(
-    "XIAOMI_TOKEN_PATH",
-    str(Path(__file__).resolve().parent / ".mi.token"),
-)
-XIAOMI_SPEAKER_DID = _env("XIAOMI_SPEAKER_DID", "3ba2c1e8-d8cb-45c5-b88a-15624e7a02f3")
+# Windows Local TTS
+TTS_VOICE_NAME = _env("TTS_VOICE_NAME", "")  # empty = system default voice
+TTS_RATE = _env_int("TTS_RATE", 0)            # SAPI rate: -10 (slowest) to 10 (fastest), default 0
 
 # TTS
 TTS_ENABLED = _env_bool("TTS_ENABLED", True)
 TTS_MAX_TEXT_LENGTH = _env_int("TTS_MAX_TEXT_LENGTH", 500)
-TTS_TIMEOUT_SECONDS = _env_int("TTS_TIMEOUT_SECONDS", 15)
diff --git a/requirements.txt b/requirements.txt
index 76bf105..807bba9 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,6 +1,6 @@
-websockets>=13.0
+﻿websockets>=13.0
 python-dotenv>=1.0.0
-miservice_fork>=2.9.0
+pywin32>=311
 aiohttp>=3.9.0
 pytest>=8.0.0
 pytest-asyncio>=0.23.0