v4.0.0 大大大更新

2026-05-01 01:12:42 +08:00 · 2025-12-13 13:44:35 +08:00
parent 97c05aa33c
commit c7bacdfff7
61 changed files with 12407 additions and 5889 deletions
@@ -0,0 +1,13 @@
+# coding=utf-8
+"""
+TrendRadar - 热点新闻聚合与分析工具
+
+使用方式:
+  python -m trendradar        # 模块执行
+  trendradar                  # 安装后执行
+"""
+
+from trendradar.context import AppContext
+
+__version__ = "4.0.0"
+__all__ = ["AppContext", "__version__"]
@@ -0,0 +1,719 @@
+# coding=utf-8
+"""
+TrendRadar 主程序
+
+热点新闻聚合与分析工具
+支持: python -m trendradar
+"""
+
+import os
+import webbrowser
+from pathlib import Path
+from typing import Dict, List, Tuple, Optional
+
+import requests
+
+from trendradar.context import AppContext
+
+# 版本号直接定义，避免循环导入
+VERSION = "4.0.0"
+from trendradar.core import load_config
+from trendradar.crawler import DataFetcher
+from trendradar.storage import convert_crawl_results_to_news_data
+
+
+def check_version_update(
+    current_version: str, version_url: str, proxy_url: Optional[str] = None
+) -> Tuple[bool, Optional[str]]:
+    """检查版本更新"""
+    try:
+        proxies = None
+        if proxy_url:
+            proxies = {"http": proxy_url, "https": proxy_url}
+
+        headers = {
+            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
+            "Accept": "text/plain, */*",
+            "Cache-Control": "no-cache",
+        }
+
+        response = requests.get(
+            version_url, proxies=proxies, headers=headers, timeout=10
+        )
+        response.raise_for_status()
+
+        remote_version = response.text.strip()
+        print(f"当前版本: {current_version}, 远程版本: {remote_version}")
+
+        # 比较版本
+        def parse_version(version_str):
+            try:
+                parts = version_str.strip().split(".")
+                if len(parts) != 3:
+                    raise ValueError("版本号格式不正确")
+                return int(parts[0]), int(parts[1]), int(parts[2])
+            except:
+                return 0, 0, 0
+
+        current_tuple = parse_version(current_version)
+        remote_tuple = parse_version(remote_version)
+
+        need_update = current_tuple < remote_tuple
+        return need_update, remote_version if need_update else None
+
+    except Exception as e:
+        print(f"版本检查失败: {e}")
+        return False, None
+
+
+# === 主分析器 ===
+class NewsAnalyzer:
+    """新闻分析器"""
+
+    # 模式策略定义
+    MODE_STRATEGIES = {
+        "incremental": {
+            "mode_name": "增量模式",
+            "description": "增量模式（只关注新增新闻，无新增时不推送）",
+            "realtime_report_type": "实时增量",
+            "summary_report_type": "当日汇总",
+            "should_send_realtime": True,
+            "should_generate_summary": True,
+            "summary_mode": "daily",
+        },
+        "current": {
+            "mode_name": "当前榜单模式",
+            "description": "当前榜单模式（当前榜单匹配新闻 + 新增新闻区域 + 按时推送）",
+            "realtime_report_type": "实时当前榜单",
+            "summary_report_type": "当前榜单汇总",
+            "should_send_realtime": True,
+            "should_generate_summary": True,
+            "summary_mode": "current",
+        },
+        "daily": {
+            "mode_name": "当日汇总模式",
+            "description": "当日汇总模式（所有匹配新闻 + 新增新闻区域 + 按时推送）",
+            "realtime_report_type": "",
+            "summary_report_type": "当日汇总",
+            "should_send_realtime": False,
+            "should_generate_summary": True,
+            "summary_mode": "daily",
+        },
+    }
+
+    def __init__(self):
+        # 加载配置
+        print("正在加载配置...")
+        config = load_config()
+        print(f"TrendRadar v{VERSION} 配置加载完成")
+        print(f"监控平台数量: {len(config['PLATFORMS'])}")
+        print(f"时区: {config.get('TIMEZONE', 'Asia/Shanghai')}")
+
+        # 创建应用上下文
+        self.ctx = AppContext(config)
+
+        self.request_interval = self.ctx.config["REQUEST_INTERVAL"]
+        self.report_mode = self.ctx.config["REPORT_MODE"]
+        self.rank_threshold = self.ctx.rank_threshold
+        self.is_github_actions = os.environ.get("GITHUB_ACTIONS") == "true"
+        self.is_docker_container = self._detect_docker_environment()
+        self.update_info = None
+        self.proxy_url = None
+        self._setup_proxy()
+        self.data_fetcher = DataFetcher(self.proxy_url)
+
+        # 初始化存储管理器（使用 AppContext）
+        self._init_storage_manager()
+
+        if self.is_github_actions:
+            self._check_version_update()
+
+    def _init_storage_manager(self) -> None:
+        """初始化存储管理器（使用 AppContext）"""
+        # 获取数据保留天数（支持环境变量覆盖）
+        env_retention = os.environ.get("STORAGE_RETENTION_DAYS", "").strip()
+        if env_retention:
+            # 环境变量覆盖配置
+            self.ctx.config["STORAGE"]["RETENTION_DAYS"] = int(env_retention)
+
+        self.storage_manager = self.ctx.get_storage_manager()
+        print(f"存储后端: {self.storage_manager.backend_name}")
+
+        retention_days = self.ctx.config.get("STORAGE", {}).get("RETENTION_DAYS", 0)
+        if retention_days > 0:
+            print(f"数据保留天数: {retention_days} 天")
+
+    def _detect_docker_environment(self) -> bool:
+        """检测是否运行在 Docker 容器中"""
+        try:
+            if os.environ.get("DOCKER_CONTAINER") == "true":
+                return True
+
+            if os.path.exists("/.dockerenv"):
+                return True
+
+            return False
+        except Exception:
+            return False
+
+    def _should_open_browser(self) -> bool:
+        """判断是否应该打开浏览器"""
+        return not self.is_github_actions and not self.is_docker_container
+
+    def _setup_proxy(self) -> None:
+        """设置代理配置"""
+        if not self.is_github_actions and self.ctx.config["USE_PROXY"]:
+            self.proxy_url = self.ctx.config["DEFAULT_PROXY"]
+            print("本地环境，使用代理")
+        elif not self.is_github_actions and not self.ctx.config["USE_PROXY"]:
+            print("本地环境，未启用代理")
+        else:
+            print("GitHub Actions环境，不使用代理")
+
+    def _check_version_update(self) -> None:
+        """检查版本更新"""
+        try:
+            need_update, remote_version = check_version_update(
+                VERSION, self.ctx.config["VERSION_CHECK_URL"], self.proxy_url
+            )
+
+            if need_update and remote_version:
+                self.update_info = {
+                    "current_version": VERSION,
+                    "remote_version": remote_version,
+                }
+                print(f"发现新版本: {remote_version} (当前: {VERSION})")
+            else:
+                print("版本检查完成，当前为最新版本")
+        except Exception as e:
+            print(f"版本检查出错: {e}")
+
+    def _get_mode_strategy(self) -> Dict:
+        """获取当前模式的策略配置"""
+        return self.MODE_STRATEGIES.get(self.report_mode, self.MODE_STRATEGIES["daily"])
+
+    def _has_notification_configured(self) -> bool:
+        """检查是否配置了任何通知渠道"""
+        cfg = self.ctx.config
+        return any(
+            [
+                cfg["FEISHU_WEBHOOK_URL"],
+                cfg["DINGTALK_WEBHOOK_URL"],
+                cfg["WEWORK_WEBHOOK_URL"],
+                (cfg["TELEGRAM_BOT_TOKEN"] and cfg["TELEGRAM_CHAT_ID"]),
+                (
+                    cfg["EMAIL_FROM"]
+                    and cfg["EMAIL_PASSWORD"]
+                    and cfg["EMAIL_TO"]
+                ),
+                (cfg["NTFY_SERVER_URL"] and cfg["NTFY_TOPIC"]),
+                cfg["BARK_URL"],
+                cfg["SLACK_WEBHOOK_URL"],
+            ]
+        )
+
+    def _has_valid_content(
+        self, stats: List[Dict], new_titles: Optional[Dict] = None
+    ) -> bool:
+        """检查是否有有效的新闻内容"""
+        if self.report_mode in ["incremental", "current"]:
+            # 增量模式和current模式下，只要stats有内容就说明有匹配的新闻
+            return any(stat["count"] > 0 for stat in stats)
+        else:
+            # 当日汇总模式下，检查是否有匹配的频率词新闻或新增新闻
+            has_matched_news = any(stat["count"] > 0 for stat in stats)
+            has_new_news = bool(
+                new_titles and any(len(titles) > 0 for titles in new_titles.values())
+            )
+            return has_matched_news or has_new_news
+
+    def _load_analysis_data(
+        self,
+    ) -> Optional[Tuple[Dict, Dict, Dict, Dict, List, List]]:
+        """统一的数据加载和预处理，使用当前监控平台列表过滤历史数据"""
+        try:
+            # 获取当前配置的监控平台ID列表
+            current_platform_ids = self.ctx.platform_ids
+            print(f"当前监控平台: {current_platform_ids}")
+
+            all_results, id_to_name, title_info = self.ctx.read_today_titles(
+                current_platform_ids
+            )
+
+            if not all_results:
+                print("没有找到当天的数据")
+                return None
+
+            total_titles = sum(len(titles) for titles in all_results.values())
+            print(f"读取到 {total_titles} 个标题（已按当前监控平台过滤）")
+
+            new_titles = self.ctx.detect_new_titles(current_platform_ids)
+            word_groups, filter_words, global_filters = self.ctx.load_frequency_words()
+
+            return (
+                all_results,
+                id_to_name,
+                title_info,
+                new_titles,
+                word_groups,
+                filter_words,
+                global_filters,
+            )
+        except Exception as e:
+            print(f"数据加载失败: {e}")
+            return None
+
+    def _prepare_current_title_info(self, results: Dict, time_info: str) -> Dict:
+        """从当前抓取结果构建标题信息"""
+        title_info = {}
+        for source_id, titles_data in results.items():
+            title_info[source_id] = {}
+            for title, title_data in titles_data.items():
+                ranks = title_data.get("ranks", [])
+                url = title_data.get("url", "")
+                mobile_url = title_data.get("mobileUrl", "")
+
+                title_info[source_id][title] = {
+                    "first_time": time_info,
+                    "last_time": time_info,
+                    "count": 1,
+                    "ranks": ranks,
+                    "url": url,
+                    "mobileUrl": mobile_url,
+                }
+        return title_info
+
+    def _run_analysis_pipeline(
+        self,
+        data_source: Dict,
+        mode: str,
+        title_info: Dict,
+        new_titles: Dict,
+        word_groups: List[Dict],
+        filter_words: List[str],
+        id_to_name: Dict,
+        failed_ids: Optional[List] = None,
+        is_daily_summary: bool = False,
+        global_filters: Optional[List[str]] = None,
+    ) -> Tuple[List[Dict], Optional[str]]:
+        """统一的分析流水线：数据处理 → 统计计算 → HTML生成"""
+
+        # 统计计算（使用 AppContext）
+        stats, total_titles = self.ctx.count_frequency(
+            data_source,
+            word_groups,
+            filter_words,
+            id_to_name,
+            title_info,
+            new_titles,
+            mode=mode,
+            global_filters=global_filters,
+        )
+
+        # HTML生成（如果启用）
+        html_file = None
+        if self.ctx.config["STORAGE"]["FORMATS"]["HTML"]:
+            html_file = self.ctx.generate_html(
+                stats,
+                total_titles,
+                failed_ids=failed_ids,
+                new_titles=new_titles,
+                id_to_name=id_to_name,
+                mode=mode,
+                is_daily_summary=is_daily_summary,
+                update_info=self.update_info if self.ctx.config["SHOW_VERSION_UPDATE"] else None,
+            )
+
+        return stats, html_file
+
+    def _send_notification_if_needed(
+        self,
+        stats: List[Dict],
+        report_type: str,
+        mode: str,
+        failed_ids: Optional[List] = None,
+        new_titles: Optional[Dict] = None,
+        id_to_name: Optional[Dict] = None,
+        html_file_path: Optional[str] = None,
+    ) -> bool:
+        """统一的通知发送逻辑，包含所有判断条件"""
+        has_notification = self._has_notification_configured()
+        cfg = self.ctx.config
+
+        if (
+            cfg["ENABLE_NOTIFICATION"]
+            and has_notification
+            and self._has_valid_content(stats, new_titles)
+        ):
+            # 推送窗口控制
+            if cfg["PUSH_WINDOW"]["ENABLED"]:
+                push_manager = self.ctx.create_push_manager()
+                time_range_start = cfg["PUSH_WINDOW"]["TIME_RANGE"]["START"]
+                time_range_end = cfg["PUSH_WINDOW"]["TIME_RANGE"]["END"]
+
+                if not push_manager.is_in_time_range(time_range_start, time_range_end):
+                    now = self.ctx.get_time()
+                    print(
+                        f"推送窗口控制：当前时间 {now.strftime('%H:%M')} 不在推送时间窗口 {time_range_start}-{time_range_end} 内，跳过推送"
+                    )
+                    return False
+
+                if cfg["PUSH_WINDOW"]["ONCE_PER_DAY"]:
+                    if push_manager.has_pushed_today():
+                        print(f"推送窗口控制：今天已推送过，跳过本次推送")
+                        return False
+                    else:
+                        print(f"推送窗口控制：今天首次推送")
+
+            # 准备报告数据
+            report_data = self.ctx.prepare_report(stats, failed_ids, new_titles, id_to_name, mode)
+
+            # 是否发送版本更新信息
+            update_info_to_send = self.update_info if cfg["SHOW_VERSION_UPDATE"] else None
+
+            # 使用 NotificationDispatcher 发送到所有渠道
+            dispatcher = self.ctx.create_notification_dispatcher()
+            results = dispatcher.dispatch_all(
+                report_data=report_data,
+                report_type=report_type,
+                update_info=update_info_to_send,
+                proxy_url=self.proxy_url,
+                mode=mode,
+                html_file_path=html_file_path,
+            )
+
+            if not results:
+                print("未配置任何通知渠道，跳过通知发送")
+                return False
+
+            # 如果成功发送了任何通知，且启用了每天只推一次，则记录推送
+            if (
+                cfg["PUSH_WINDOW"]["ENABLED"]
+                and cfg["PUSH_WINDOW"]["ONCE_PER_DAY"]
+                and any(results.values())
+            ):
+                push_manager = self.ctx.create_push_manager()
+                push_manager.record_push(report_type)
+
+            return True
+
+        elif cfg["ENABLE_NOTIFICATION"] and not has_notification:
+            print("⚠️ 警告：通知功能已启用但未配置任何通知渠道，将跳过通知发送")
+        elif not cfg["ENABLE_NOTIFICATION"]:
+            print(f"跳过{report_type}通知：通知功能已禁用")
+        elif (
+            cfg["ENABLE_NOTIFICATION"]
+            and has_notification
+            and not self._has_valid_content(stats, new_titles)
+        ):
+            mode_strategy = self._get_mode_strategy()
+            if "实时" in report_type:
+                print(
+                    f"跳过实时推送通知：{mode_strategy['mode_name']}下未检测到匹配的新闻"
+                )
+            else:
+                print(
+                    f"跳过{mode_strategy['summary_report_type']}通知：未匹配到有效的新闻内容"
+                )
+
+        return False
+
+    def _generate_summary_report(self, mode_strategy: Dict) -> Optional[str]:
+        """生成汇总报告（带通知）"""
+        summary_type = (
+            "当前榜单汇总" if mode_strategy["summary_mode"] == "current" else "当日汇总"
+        )
+        print(f"生成{summary_type}报告...")
+
+        # 加载分析数据
+        analysis_data = self._load_analysis_data()
+        if not analysis_data:
+            return None
+
+        all_results, id_to_name, title_info, new_titles, word_groups, filter_words, global_filters = (
+            analysis_data
+        )
+
+        # 运行分析流水线
+        stats, html_file = self._run_analysis_pipeline(
+            all_results,
+            mode_strategy["summary_mode"],
+            title_info,
+            new_titles,
+            word_groups,
+            filter_words,
+            id_to_name,
+            is_daily_summary=True,
+            global_filters=global_filters,
+        )
+
+        if html_file:
+            print(f"{summary_type}报告已生成: {html_file}")
+
+        # 发送通知
+        self._send_notification_if_needed(
+            stats,
+            mode_strategy["summary_report_type"],
+            mode_strategy["summary_mode"],
+            failed_ids=[],
+            new_titles=new_titles,
+            id_to_name=id_to_name,
+            html_file_path=html_file,
+        )
+
+        return html_file
+
+    def _generate_summary_html(self, mode: str = "daily") -> Optional[str]:
+        """生成汇总HTML"""
+        summary_type = "当前榜单汇总" if mode == "current" else "当日汇总"
+        print(f"生成{summary_type}HTML...")
+
+        # 加载分析数据
+        analysis_data = self._load_analysis_data()
+        if not analysis_data:
+            return None
+
+        all_results, id_to_name, title_info, new_titles, word_groups, filter_words, global_filters = (
+            analysis_data
+        )
+
+        # 运行分析流水线
+        _, html_file = self._run_analysis_pipeline(
+            all_results,
+            mode,
+            title_info,
+            new_titles,
+            word_groups,
+            filter_words,
+            id_to_name,
+            is_daily_summary=True,
+            global_filters=global_filters,
+        )
+
+        if html_file:
+            print(f"{summary_type}HTML已生成: {html_file}")
+        return html_file
+
+    def _initialize_and_check_config(self) -> None:
+        """通用初始化和配置检查"""
+        now = self.ctx.get_time()
+        print(f"当前北京时间: {now.strftime('%Y-%m-%d %H:%M:%S')}")
+
+        if not self.ctx.config["ENABLE_CRAWLER"]:
+            print("爬虫功能已禁用（ENABLE_CRAWLER=False），程序退出")
+            return
+
+        has_notification = self._has_notification_configured()
+        if not self.ctx.config["ENABLE_NOTIFICATION"]:
+            print("通知功能已禁用（ENABLE_NOTIFICATION=False），将只进行数据抓取")
+        elif not has_notification:
+            print("未配置任何通知渠道，将只进行数据抓取，不发送通知")
+        else:
+            print("通知功能已启用，将发送通知")
+
+        mode_strategy = self._get_mode_strategy()
+        print(f"报告模式: {self.report_mode}")
+        print(f"运行模式: {mode_strategy['description']}")
+
+    def _crawl_data(self) -> Tuple[Dict, Dict, List]:
+        """执行数据爬取"""
+        ids = []
+        for platform in self.ctx.platforms:
+            if "name" in platform:
+                ids.append((platform["id"], platform["name"]))
+            else:
+                ids.append(platform["id"])
+
+        print(
+            f"配置的监控平台: {[p.get('name', p['id']) for p in self.ctx.platforms]}"
+        )
+        print(f"开始爬取数据，请求间隔 {self.request_interval} 毫秒")
+        Path("output").mkdir(parents=True, exist_ok=True)
+
+        results, id_to_name, failed_ids = self.data_fetcher.crawl_websites(
+            ids, self.request_interval
+        )
+
+        # 转换为 NewsData 格式并保存到存储后端
+        crawl_time = self.ctx.format_time()
+        crawl_date = self.ctx.format_date()
+        news_data = convert_crawl_results_to_news_data(
+            results, id_to_name, failed_ids, crawl_time, crawl_date
+        )
+
+        # 保存到存储后端（SQLite）
+        if self.storage_manager.save_news_data(news_data):
+            print(f"数据已保存到存储后端: {self.storage_manager.backend_name}")
+
+        # 保存 TXT 快照（如果启用）
+        txt_file = self.storage_manager.save_txt_snapshot(news_data)
+        if txt_file:
+            print(f"TXT 快照已保存: {txt_file}")
+
+        # 兼容：同时保存到原有 TXT 格式（确保向后兼容）
+        if self.ctx.config["STORAGE"]["FORMATS"]["TXT"]:
+            title_file = self.ctx.save_titles(results, id_to_name, failed_ids)
+            print(f"标题已保存到: {title_file}")
+
+        return results, id_to_name, failed_ids
+
+    def _execute_mode_strategy(
+        self, mode_strategy: Dict, results: Dict, id_to_name: Dict, failed_ids: List
+    ) -> Optional[str]:
+        """执行模式特定逻辑"""
+        # 获取当前监控平台ID列表
+        current_platform_ids = self.ctx.platform_ids
+
+        new_titles = self.ctx.detect_new_titles(current_platform_ids)
+        time_info = self.ctx.format_time()
+        if self.ctx.config["STORAGE"]["FORMATS"]["TXT"]:
+            self.ctx.save_titles(results, id_to_name, failed_ids)
+        word_groups, filter_words, global_filters = self.ctx.load_frequency_words()
+
+        # current模式下，实时推送需要使用完整的历史数据来保证统计信息的完整性
+        if self.report_mode == "current":
+            # 加载完整的历史数据（已按当前平台过滤）
+            analysis_data = self._load_analysis_data()
+            if analysis_data:
+                (
+                    all_results,
+                    historical_id_to_name,
+                    historical_title_info,
+                    historical_new_titles,
+                    _,
+                    _,
+                    _,
+                ) = analysis_data
+
+                print(
+                    f"current模式：使用过滤后的历史数据，包含平台：{list(all_results.keys())}"
+                )
+
+                stats, html_file = self._run_analysis_pipeline(
+                    all_results,
+                    self.report_mode,
+                    historical_title_info,
+                    historical_new_titles,
+                    word_groups,
+                    filter_words,
+                    historical_id_to_name,
+                    failed_ids=failed_ids,
+                    global_filters=global_filters,
+                )
+
+                combined_id_to_name = {**historical_id_to_name, **id_to_name}
+
+                if html_file:
+                    print(f"HTML报告已生成: {html_file}")
+
+                # 发送实时通知（使用完整历史数据的统计结果）
+                summary_html = None
+                if mode_strategy["should_send_realtime"]:
+                    self._send_notification_if_needed(
+                        stats,
+                        mode_strategy["realtime_report_type"],
+                        self.report_mode,
+                        failed_ids=failed_ids,
+                        new_titles=historical_new_titles,
+                        id_to_name=combined_id_to_name,
+                        html_file_path=html_file,
+                    )
+            else:
+                print("❌ 严重错误：无法读取刚保存的数据文件")
+                raise RuntimeError("数据一致性检查失败：保存后立即读取失败")
+        else:
+            title_info = self._prepare_current_title_info(results, time_info)
+            stats, html_file = self._run_analysis_pipeline(
+                results,
+                self.report_mode,
+                title_info,
+                new_titles,
+                word_groups,
+                filter_words,
+                id_to_name,
+                failed_ids=failed_ids,
+                global_filters=global_filters,
+            )
+            if html_file:
+                print(f"HTML报告已生成: {html_file}")
+
+            # 发送实时通知（如果需要）
+            summary_html = None
+            if mode_strategy["should_send_realtime"]:
+                self._send_notification_if_needed(
+                    stats,
+                    mode_strategy["realtime_report_type"],
+                    self.report_mode,
+                    failed_ids=failed_ids,
+                    new_titles=new_titles,
+                    id_to_name=id_to_name,
+                    html_file_path=html_file,
+                )
+
+        # 生成汇总报告（如果需要）
+        summary_html = None
+        if mode_strategy["should_generate_summary"]:
+            if mode_strategy["should_send_realtime"]:
+                # 如果已经发送了实时通知，汇总只生成HTML不发送通知
+                summary_html = self._generate_summary_html(
+                    mode_strategy["summary_mode"]
+                )
+            else:
+                # daily模式：直接生成汇总报告并发送通知
+                summary_html = self._generate_summary_report(mode_strategy)
+
+        # 打开浏览器（仅在非容器环境）
+        if self._should_open_browser() and html_file:
+            if summary_html:
+                summary_url = "file://" + str(Path(summary_html).resolve())
+                print(f"正在打开汇总报告: {summary_url}")
+                webbrowser.open(summary_url)
+            else:
+                file_url = "file://" + str(Path(html_file).resolve())
+                print(f"正在打开HTML报告: {file_url}")
+                webbrowser.open(file_url)
+        elif self.is_docker_container and html_file:
+            if summary_html:
+                print(f"汇总报告已生成（Docker环境）: {summary_html}")
+            else:
+                print(f"HTML报告已生成（Docker环境）: {html_file}")
+
+        return summary_html
+
+    def run(self) -> None:
+        """执行分析流程"""
+        try:
+            self._initialize_and_check_config()
+
+            mode_strategy = self._get_mode_strategy()
+
+            results, id_to_name, failed_ids = self._crawl_data()
+
+            self._execute_mode_strategy(mode_strategy, results, id_to_name, failed_ids)
+
+        except Exception as e:
+            print(f"分析流程执行出错: {e}")
+            raise
+        finally:
+            # 清理资源（包括过期数据清理和数据库连接关闭）
+            self.ctx.cleanup()
+
+
+def main():
+    """主程序入口"""
+    try:
+        analyzer = NewsAnalyzer()
+        analyzer.run()
+    except FileNotFoundError as e:
+        print(f"❌ 配置文件错误: {e}")
+        print("\n请确保以下文件存在:")
+        print("  • config/config.yaml")
+        print("  • config/frequency_words.txt")
+        print("\n参考项目文档进行正确配置")
+    except Exception as e:
+        print(f"❌ 程序运行错误: {e}")
+        raise
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,388 @@
+# coding=utf-8
+"""
+应用上下文模块
+
+提供配置上下文类，封装所有依赖配置的操作，消除全局状态和包装函数。
+"""
+
+from datetime import datetime
+from pathlib import Path
+from typing import Any, Callable, Dict, List, Optional, Tuple
+
+from trendradar.utils.time import (
+    get_configured_time,
+    format_date_folder,
+    format_time_filename,
+    get_current_time_display,
+    convert_time_for_display,
+)
+from trendradar.core import (
+    load_frequency_words,
+    matches_word_groups,
+    save_titles_to_file,
+    read_all_today_titles,
+    detect_latest_new_titles,
+    is_first_crawl_today,
+    count_word_frequency,
+)
+from trendradar.report import (
+    clean_title,
+    prepare_report_data,
+    generate_html_report,
+    render_html_content,
+)
+from trendradar.notification import (
+    render_feishu_content,
+    render_dingtalk_content,
+    split_content_into_batches,
+    NotificationDispatcher,
+    PushRecordManager,
+)
+from trendradar.storage import get_storage_manager
+
+
+class AppContext:
+    """
+    应用上下文类
+
+    封装所有依赖配置的操作，提供统一的接口。
+    消除对全局 CONFIG 的依赖，提高可测试性。
+
+    使用示例:
+        config = load_config()
+        ctx = AppContext(config)
+
+        # 时间操作
+        now = ctx.get_time()
+        date_folder = ctx.format_date()
+
+        # 存储操作
+        storage = ctx.get_storage_manager()
+
+        # 报告生成
+        html = ctx.generate_html_report(stats, total_titles, ...)
+    """
+
+    def __init__(self, config: Dict[str, Any]):
+        """
+        初始化应用上下文
+
+        Args:
+            config: 完整的配置字典
+        """
+        self.config = config
+        self._storage_manager = None
+
+    # === 配置访问 ===
+
+    @property
+    def timezone(self) -> str:
+        """获取配置的时区"""
+        return self.config.get("TIMEZONE", "Asia/Shanghai")
+
+    @property
+    def rank_threshold(self) -> int:
+        """获取排名阈值"""
+        return self.config.get("RANK_THRESHOLD", 50)
+
+    @property
+    def weight_config(self) -> Dict:
+        """获取权重配置"""
+        return self.config.get("WEIGHT_CONFIG", {})
+
+    @property
+    def platforms(self) -> List[Dict]:
+        """获取平台配置列表"""
+        return self.config.get("PLATFORMS", [])
+
+    @property
+    def platform_ids(self) -> List[str]:
+        """获取平台ID列表"""
+        return [p["id"] for p in self.platforms]
+
+    # === 时间操作 ===
+
+    def get_time(self) -> datetime:
+        """获取当前配置时区的时间"""
+        return get_configured_time(self.timezone)
+
+    def format_date(self) -> str:
+        """格式化日期文件夹 (YYYY-MM-DD)"""
+        return format_date_folder(timezone=self.timezone)
+
+    def format_time(self) -> str:
+        """格式化时间文件名 (HH-MM)"""
+        return format_time_filename(self.timezone)
+
+    def get_time_display(self) -> str:
+        """获取时间显示 (HH:MM)"""
+        return get_current_time_display(self.timezone)
+
+    @staticmethod
+    def convert_time_display(time_str: str) -> str:
+        """将 HH-MM 转换为 HH:MM"""
+        return convert_time_for_display(time_str)
+
+    # === 存储操作 ===
+
+    def get_storage_manager(self):
+        """获取存储管理器（延迟初始化，单例）"""
+        if self._storage_manager is None:
+            storage_config = self.config.get("STORAGE", {})
+            remote_config = storage_config.get("REMOTE", {})
+            local_config = storage_config.get("LOCAL", {})
+            pull_config = storage_config.get("PULL", {})
+
+            self._storage_manager = get_storage_manager(
+                backend_type=storage_config.get("BACKEND", "auto"),
+                data_dir=local_config.get("DATA_DIR", "output"),
+                enable_txt=storage_config.get("FORMATS", {}).get("TXT", True),
+                enable_html=storage_config.get("FORMATS", {}).get("HTML", True),
+                remote_config={
+                    "bucket_name": remote_config.get("BUCKET_NAME", ""),
+                    "access_key_id": remote_config.get("ACCESS_KEY_ID", ""),
+                    "secret_access_key": remote_config.get("SECRET_ACCESS_KEY", ""),
+                    "endpoint_url": remote_config.get("ENDPOINT_URL", ""),
+                    "region": remote_config.get("REGION", ""),
+                },
+                local_retention_days=local_config.get("RETENTION_DAYS", 0),
+                remote_retention_days=remote_config.get("RETENTION_DAYS", 0),
+                pull_enabled=pull_config.get("ENABLED", False),
+                pull_days=pull_config.get("DAYS", 7),
+                timezone=self.timezone,
+            )
+        return self._storage_manager
+
+    def get_output_path(self, subfolder: str, filename: str) -> str:
+        """获取输出路径"""
+        output_dir = Path("output") / self.format_date() / subfolder
+        output_dir.mkdir(parents=True, exist_ok=True)
+        return str(output_dir / filename)
+
+    # === 数据处理 ===
+
+    def save_titles(self, results: Dict, id_to_name: Dict, failed_ids: List) -> str:
+        """保存标题到文件"""
+        output_path = self.get_output_path("txt", f"{self.format_time()}.txt")
+        return save_titles_to_file(results, id_to_name, failed_ids, output_path, clean_title)
+
+    def read_today_titles(
+        self, platform_ids: Optional[List[str]] = None
+    ) -> Tuple[Dict, Dict, Dict]:
+        """读取当天所有标题"""
+        return read_all_today_titles(self.get_storage_manager(), platform_ids)
+
+    def detect_new_titles(
+        self, platform_ids: Optional[List[str]] = None
+    ) -> Dict:
+        """检测最新批次的新增标题"""
+        return detect_latest_new_titles(self.get_storage_manager(), platform_ids)
+
+    def is_first_crawl(self) -> bool:
+        """检测是否是当天第一次爬取"""
+        return is_first_crawl_today("output", self.format_date())
+
+    # === 频率词处理 ===
+
+    def load_frequency_words(
+        self, frequency_file: Optional[str] = None
+    ) -> Tuple[List[Dict], List[str], List[str]]:
+        """加载频率词配置"""
+        return load_frequency_words(frequency_file)
+
+    def matches_word_groups(
+        self,
+        title: str,
+        word_groups: List[Dict],
+        filter_words: List[str],
+        global_filters: Optional[List[str]] = None,
+    ) -> bool:
+        """检查标题是否匹配词组规则"""
+        return matches_word_groups(title, word_groups, filter_words, global_filters)
+
+    # === 统计分析 ===
+
+    def count_frequency(
+        self,
+        results: Dict,
+        word_groups: List[Dict],
+        filter_words: List[str],
+        id_to_name: Dict,
+        title_info: Optional[Dict] = None,
+        new_titles: Optional[Dict] = None,
+        mode: str = "daily",
+        global_filters: Optional[List[str]] = None,
+    ) -> Tuple[List[Dict], int]:
+        """统计词频"""
+        return count_word_frequency(
+            results=results,
+            word_groups=word_groups,
+            filter_words=filter_words,
+            id_to_name=id_to_name,
+            title_info=title_info,
+            rank_threshold=self.rank_threshold,
+            new_titles=new_titles,
+            mode=mode,
+            global_filters=global_filters,
+            weight_config=self.weight_config,
+            max_news_per_keyword=self.config.get("MAX_NEWS_PER_KEYWORD", 0),
+            sort_by_position_first=self.config.get("SORT_BY_POSITION_FIRST", False),
+            is_first_crawl_func=self.is_first_crawl,
+            convert_time_func=self.convert_time_display,
+        )
+
+    # === 报告生成 ===
+
+    def prepare_report(
+        self,
+        stats: List[Dict],
+        failed_ids: Optional[List] = None,
+        new_titles: Optional[Dict] = None,
+        id_to_name: Optional[Dict] = None,
+        mode: str = "daily",
+    ) -> Dict:
+        """准备报告数据"""
+        return prepare_report_data(
+            stats=stats,
+            failed_ids=failed_ids,
+            new_titles=new_titles,
+            id_to_name=id_to_name,
+            mode=mode,
+            rank_threshold=self.rank_threshold,
+            matches_word_groups_func=self.matches_word_groups,
+            load_frequency_words_func=self.load_frequency_words,
+        )
+
+    def generate_html(
+        self,
+        stats: List[Dict],
+        total_titles: int,
+        failed_ids: Optional[List] = None,
+        new_titles: Optional[Dict] = None,
+        id_to_name: Optional[Dict] = None,
+        mode: str = "daily",
+        is_daily_summary: bool = False,
+        update_info: Optional[Dict] = None,
+    ) -> str:
+        """生成HTML报告"""
+        return generate_html_report(
+            stats=stats,
+            total_titles=total_titles,
+            failed_ids=failed_ids,
+            new_titles=new_titles,
+            id_to_name=id_to_name,
+            mode=mode,
+            is_daily_summary=is_daily_summary,
+            update_info=update_info,
+            rank_threshold=self.rank_threshold,
+            output_dir="output",
+            date_folder=self.format_date(),
+            time_filename=self.format_time(),
+            render_html_func=lambda *args, **kwargs: self.render_html(*args, **kwargs),
+            matches_word_groups_func=self.matches_word_groups,
+            load_frequency_words_func=self.load_frequency_words,
+            enable_index_copy=True,
+        )
+
+    def render_html(
+        self,
+        report_data: Dict,
+        total_titles: int,
+        is_daily_summary: bool = False,
+        mode: str = "daily",
+        update_info: Optional[Dict] = None,
+    ) -> str:
+        """渲染HTML内容"""
+        return render_html_content(
+            report_data=report_data,
+            total_titles=total_titles,
+            is_daily_summary=is_daily_summary,
+            mode=mode,
+            update_info=update_info,
+            reverse_content_order=self.config.get("REVERSE_CONTENT_ORDER", False),
+            get_time_func=self.get_time,
+        )
+
+    # === 通知内容渲染 ===
+
+    def render_feishu(
+        self,
+        report_data: Dict,
+        update_info: Optional[Dict] = None,
+        mode: str = "daily",
+    ) -> str:
+        """渲染飞书内容"""
+        return render_feishu_content(
+            report_data=report_data,
+            update_info=update_info,
+            mode=mode,
+            separator=self.config.get("FEISHU_MESSAGE_SEPARATOR", "---"),
+            reverse_content_order=self.config.get("REVERSE_CONTENT_ORDER", False),
+            get_time_func=self.get_time,
+        )
+
+    def render_dingtalk(
+        self,
+        report_data: Dict,
+        update_info: Optional[Dict] = None,
+        mode: str = "daily",
+    ) -> str:
+        """渲染钉钉内容"""
+        return render_dingtalk_content(
+            report_data=report_data,
+            update_info=update_info,
+            mode=mode,
+            reverse_content_order=self.config.get("REVERSE_CONTENT_ORDER", False),
+            get_time_func=self.get_time,
+        )
+
+    def split_content(
+        self,
+        report_data: Dict,
+        format_type: str,
+        update_info: Optional[Dict] = None,
+        max_bytes: Optional[int] = None,
+        mode: str = "daily",
+    ) -> List[str]:
+        """分批处理消息内容"""
+        return split_content_into_batches(
+            report_data=report_data,
+            format_type=format_type,
+            update_info=update_info,
+            max_bytes=max_bytes,
+            mode=mode,
+            batch_sizes={
+                "dingtalk": self.config.get("DINGTALK_BATCH_SIZE", 20000),
+                "feishu": self.config.get("FEISHU_BATCH_SIZE", 29000),
+                "default": self.config.get("MESSAGE_BATCH_SIZE", 4000),
+            },
+            feishu_separator=self.config.get("FEISHU_MESSAGE_SEPARATOR", "---"),
+            reverse_content_order=self.config.get("REVERSE_CONTENT_ORDER", False),
+            get_time_func=self.get_time,
+        )
+
+    # === 通知发送 ===
+
+    def create_notification_dispatcher(self) -> NotificationDispatcher:
+        """创建通知调度器"""
+        return NotificationDispatcher(
+            config=self.config,
+            get_time_func=self.get_time,
+            split_content_func=self.split_content,
+        )
+
+    def create_push_manager(self) -> PushRecordManager:
+        """创建推送记录管理器"""
+        return PushRecordManager(
+            storage_backend=self.get_storage_manager(),
+            get_time_func=self.get_time,
+        )
+
+    # === 资源清理 ===
+
+    def cleanup(self):
+        """清理资源"""
+        if self._storage_manager:
+            self._storage_manager.cleanup_old_data()
+            self._storage_manager.cleanup()
+            self._storage_manager = None
@@ -0,0 +1,47 @@
+# coding=utf-8
+"""
+核心模块 - 配置管理和核心工具
+"""
+
+from trendradar.core.config import (
+    parse_multi_account_config,
+    validate_paired_configs,
+    limit_accounts,
+    get_account_at_index,
+)
+from trendradar.core.loader import load_config
+from trendradar.core.frequency import load_frequency_words, matches_word_groups
+from trendradar.core.data import (
+    save_titles_to_file,
+    read_all_today_titles_from_storage,
+    read_all_today_titles,
+    detect_latest_new_titles_from_storage,
+    detect_latest_new_titles,
+    is_first_crawl_today,
+)
+from trendradar.core.analyzer import (
+    calculate_news_weight,
+    format_time_display,
+    count_word_frequency,
+)
+
+__all__ = [
+    "parse_multi_account_config",
+    "validate_paired_configs",
+    "limit_accounts",
+    "get_account_at_index",
+    "load_config",
+    "load_frequency_words",
+    "matches_word_groups",
+    # 数据处理
+    "save_titles_to_file",
+    "read_all_today_titles_from_storage",
+    "read_all_today_titles",
+    "detect_latest_new_titles_from_storage",
+    "detect_latest_new_titles",
+    "is_first_crawl_today",
+    # 统计分析
+    "calculate_news_weight",
+    "format_time_display",
+    "count_word_frequency",
+]
@@ -0,0 +1,469 @@
+# coding=utf-8
+"""
+统计分析模块
+
+提供新闻统计和分析功能：
+- calculate_news_weight: 计算新闻权重
+- format_time_display: 格式化时间显示
+- count_word_frequency: 统计词频
+"""
+
+from typing import Dict, List, Tuple, Optional, Callable
+
+from trendradar.core.frequency import matches_word_groups
+
+
+def calculate_news_weight(
+    title_data: Dict,
+    rank_threshold: int,
+    weight_config: Dict,
+) -> float:
+    """
+    计算新闻权重，用于排序
+
+    Args:
+        title_data: 标题数据，包含 ranks 和 count
+        rank_threshold: 排名阈值
+        weight_config: 权重配置 {RANK_WEIGHT, FREQUENCY_WEIGHT, HOTNESS_WEIGHT}
+
+    Returns:
+        float: 计算出的权重值
+    """
+    ranks = title_data.get("ranks", [])
+    if not ranks:
+        return 0.0
+
+    count = title_data.get("count", len(ranks))
+
+    # 排名权重：Σ(11 - min(rank, 10)) / 出现次数
+    rank_scores = []
+    for rank in ranks:
+        score = 11 - min(rank, 10)
+        rank_scores.append(score)
+
+    rank_weight = sum(rank_scores) / len(ranks) if ranks else 0
+
+    # 频次权重：min(出现次数, 10) × 10
+    frequency_weight = min(count, 10) * 10
+
+    # 热度加成：高排名次数 / 总出现次数 × 100
+    high_rank_count = sum(1 for rank in ranks if rank <= rank_threshold)
+    hotness_ratio = high_rank_count / len(ranks) if ranks else 0
+    hotness_weight = hotness_ratio * 100
+
+    total_weight = (
+        rank_weight * weight_config["RANK_WEIGHT"]
+        + frequency_weight * weight_config["FREQUENCY_WEIGHT"]
+        + hotness_weight * weight_config["HOTNESS_WEIGHT"]
+    )
+
+    return total_weight
+
+
+def format_time_display(
+    first_time: str,
+    last_time: str,
+    convert_time_func: Callable[[str], str],
+) -> str:
+    """
+    格式化时间显示（将 HH-MM 转换为 HH:MM）
+
+    Args:
+        first_time: 首次出现时间
+        last_time: 最后出现时间
+        convert_time_func: 时间格式转换函数
+
+    Returns:
+        str: 格式化后的时间显示字符串
+    """
+    if not first_time:
+        return ""
+    # 转换为显示格式
+    first_display = convert_time_func(first_time)
+    last_display = convert_time_func(last_time)
+    if first_display == last_display or not last_display:
+        return first_display
+    else:
+        return f"[{first_display} ~ {last_display}]"
+
+
+def count_word_frequency(
+    results: Dict,
+    word_groups: List[Dict],
+    filter_words: List[str],
+    id_to_name: Dict,
+    title_info: Optional[Dict] = None,
+    rank_threshold: int = 3,
+    new_titles: Optional[Dict] = None,
+    mode: str = "daily",
+    global_filters: Optional[List[str]] = None,
+    weight_config: Optional[Dict] = None,
+    max_news_per_keyword: int = 0,
+    sort_by_position_first: bool = False,
+    is_first_crawl_func: Optional[Callable[[], bool]] = None,
+    convert_time_func: Optional[Callable[[str], str]] = None,
+) -> Tuple[List[Dict], int]:
+    """
+    统计词频，支持必须词、频率词、过滤词、全局过滤词，并标记新增标题
+
+    Args:
+        results: 抓取结果 {source_id: {title: title_data}}
+        word_groups: 词组配置列表
+        filter_words: 过滤词列表
+        id_to_name: ID 到名称的映射
+        title_info: 标题统计信息（可选）
+        rank_threshold: 排名阈值
+        new_titles: 新增标题（可选）
+        mode: 报告模式 (daily/incremental/current)
+        global_filters: 全局过滤词（可选）
+        weight_config: 权重配置
+        max_news_per_keyword: 每个关键词最大显示数量
+        sort_by_position_first: 是否优先按配置位置排序
+        is_first_crawl_func: 检测是否是当天第一次爬取的函数
+        convert_time_func: 时间格式转换函数
+
+    Returns:
+        Tuple[List[Dict], int]: (统计结果列表, 总标题数)
+    """
+    # 默认权重配置
+    if weight_config is None:
+        weight_config = {
+            "RANK_WEIGHT": 0.4,
+            "FREQUENCY_WEIGHT": 0.3,
+            "HOTNESS_WEIGHT": 0.3,
+        }
+
+    # 默认时间转换函数
+    if convert_time_func is None:
+        convert_time_func = lambda x: x
+
+    # 默认首次爬取检测函数
+    if is_first_crawl_func is None:
+        is_first_crawl_func = lambda: True
+
+    # 如果没有配置词组，创建一个包含所有新闻的虚拟词组
+    if not word_groups:
+        print("频率词配置为空，将显示所有新闻")
+        word_groups = [{"required": [], "normal": [], "group_key": "全部新闻"}]
+        filter_words = []  # 清空过滤词，显示所有新闻
+
+    is_first_today = is_first_crawl_func()
+
+    # 确定处理的数据源和新增标记逻辑
+    if mode == "incremental":
+        if is_first_today:
+            # 增量模式 + 当天第一次：处理所有新闻，都标记为新增
+            results_to_process = results
+            all_news_are_new = True
+        else:
+            # 增量模式 + 当天非第一次：只处理新增的新闻
+            results_to_process = new_titles if new_titles else {}
+            all_news_are_new = True
+    elif mode == "current":
+        # current 模式：只处理当前时间批次的新闻，但统计信息来自全部历史
+        if title_info:
+            latest_time = None
+            for source_titles in title_info.values():
+                for title_data in source_titles.values():
+                    last_time = title_data.get("last_time", "")
+                    if last_time:
+                        if latest_time is None or last_time > latest_time:
+                            latest_time = last_time
+
+            # 只处理 last_time 等于最新时间的新闻
+            if latest_time:
+                results_to_process = {}
+                for source_id, source_titles in results.items():
+                    if source_id in title_info:
+                        filtered_titles = {}
+                        for title, title_data in source_titles.items():
+                            if title in title_info[source_id]:
+                                info = title_info[source_id][title]
+                                if info.get("last_time") == latest_time:
+                                    filtered_titles[title] = title_data
+                        if filtered_titles:
+                            results_to_process[source_id] = filtered_titles
+
+                print(
+                    f"当前榜单模式：最新时间 {latest_time}，筛选出 {sum(len(titles) for titles in results_to_process.values())} 条当前榜单新闻"
+                )
+            else:
+                results_to_process = results
+        else:
+            results_to_process = results
+        all_news_are_new = False
+    else:
+        # 当日汇总模式：处理所有新闻
+        results_to_process = results
+        all_news_are_new = False
+        total_input_news = sum(len(titles) for titles in results.values())
+        filter_status = (
+            "全部显示"
+            if len(word_groups) == 1 and word_groups[0]["group_key"] == "全部新闻"
+            else "频率词过滤"
+        )
+        print(f"当日汇总模式：处理 {total_input_news} 条新闻，模式：{filter_status}")
+
+    word_stats = {}
+    total_titles = 0
+    processed_titles = {}
+    matched_new_count = 0
+
+    if title_info is None:
+        title_info = {}
+    if new_titles is None:
+        new_titles = {}
+
+    for group in word_groups:
+        group_key = group["group_key"]
+        word_stats[group_key] = {"count": 0, "titles": {}}
+
+    for source_id, titles_data in results_to_process.items():
+        total_titles += len(titles_data)
+
+        if source_id not in processed_titles:
+            processed_titles[source_id] = {}
+
+        for title, title_data in titles_data.items():
+            if title in processed_titles.get(source_id, {}):
+                continue
+
+            # 使用统一的匹配逻辑
+            matches_frequency_words = matches_word_groups(
+                title, word_groups, filter_words, global_filters
+            )
+
+            if not matches_frequency_words:
+                continue
+
+            # 如果是增量模式或 current 模式第一次，统计匹配的新增新闻数量
+            if (mode == "incremental" and all_news_are_new) or (
+                mode == "current" and is_first_today
+            ):
+                matched_new_count += 1
+
+            source_ranks = title_data.get("ranks", [])
+            source_url = title_data.get("url", "")
+            source_mobile_url = title_data.get("mobileUrl", "")
+
+            # 找到匹配的词组（防御性转换确保类型安全）
+            title_lower = str(title).lower() if not isinstance(title, str) else title.lower()
+            for group in word_groups:
+                required_words = group["required"]
+                normal_words = group["normal"]
+
+                # 如果是"全部新闻"模式，所有标题都匹配第一个（唯一的）词组
+                if len(word_groups) == 1 and word_groups[0]["group_key"] == "全部新闻":
+                    group_key = group["group_key"]
+                    word_stats[group_key]["count"] += 1
+                    if source_id not in word_stats[group_key]["titles"]:
+                        word_stats[group_key]["titles"][source_id] = []
+                else:
+                    # 原有的匹配逻辑
+                    if required_words:
+                        all_required_present = all(
+                            req_word.lower() in title_lower
+                            for req_word in required_words
+                        )
+                        if not all_required_present:
+                            continue
+
+                    if normal_words:
+                        any_normal_present = any(
+                            normal_word.lower() in title_lower
+                            for normal_word in normal_words
+                        )
+                        if not any_normal_present:
+                            continue
+
+                    group_key = group["group_key"]
+                    word_stats[group_key]["count"] += 1
+                    if source_id not in word_stats[group_key]["titles"]:
+                        word_stats[group_key]["titles"][source_id] = []
+
+                first_time = ""
+                last_time = ""
+                count_info = 1
+                ranks = source_ranks if source_ranks else []
+                url = source_url
+                mobile_url = source_mobile_url
+
+                # 对于 current 模式，从历史统计信息中获取完整数据
+                if (
+                    mode == "current"
+                    and title_info
+                    and source_id in title_info
+                    and title in title_info[source_id]
+                ):
+                    info = title_info[source_id][title]
+                    first_time = info.get("first_time", "")
+                    last_time = info.get("last_time", "")
+                    count_info = info.get("count", 1)
+                    if "ranks" in info and info["ranks"]:
+                        ranks = info["ranks"]
+                    url = info.get("url", source_url)
+                    mobile_url = info.get("mobileUrl", source_mobile_url)
+                elif (
+                    title_info
+                    and source_id in title_info
+                    and title in title_info[source_id]
+                ):
+                    info = title_info[source_id][title]
+                    first_time = info.get("first_time", "")
+                    last_time = info.get("last_time", "")
+                    count_info = info.get("count", 1)
+                    if "ranks" in info and info["ranks"]:
+                        ranks = info["ranks"]
+                    url = info.get("url", source_url)
+                    mobile_url = info.get("mobileUrl", source_mobile_url)
+
+                if not ranks:
+                    ranks = [99]
+
+                time_display = format_time_display(first_time, last_time, convert_time_func)
+
+                source_name = id_to_name.get(source_id, source_id)
+
+                # 判断是否为新增
+                is_new = False
+                if all_news_are_new:
+                    # 增量模式下所有处理的新闻都是新增，或者当天第一次的所有新闻都是新增
+                    is_new = True
+                elif new_titles and source_id in new_titles:
+                    # 检查是否在新增列表中
+                    new_titles_for_source = new_titles[source_id]
+                    is_new = title in new_titles_for_source
+
+                word_stats[group_key]["titles"][source_id].append(
+                    {
+                        "title": title,
+                        "source_name": source_name,
+                        "first_time": first_time,
+                        "last_time": last_time,
+                        "time_display": time_display,
+                        "count": count_info,
+                        "ranks": ranks,
+                        "rank_threshold": rank_threshold,
+                        "url": url,
+                        "mobileUrl": mobile_url,
+                        "is_new": is_new,
+                    }
+                )
+
+                if source_id not in processed_titles:
+                    processed_titles[source_id] = {}
+                processed_titles[source_id][title] = True
+
+                break
+
+    # 最后统一打印汇总信息
+    if mode == "incremental":
+        if is_first_today:
+            total_input_news = sum(len(titles) for titles in results.values())
+            filter_status = (
+                "全部显示"
+                if len(word_groups) == 1 and word_groups[0]["group_key"] == "全部新闻"
+                else "频率词匹配"
+            )
+            print(
+                f"增量模式：当天第一次爬取，{total_input_news} 条新闻中有 {matched_new_count} 条{filter_status}"
+            )
+        else:
+            if new_titles:
+                total_new_count = sum(len(titles) for titles in new_titles.values())
+                filter_status = (
+                    "全部显示"
+                    if len(word_groups) == 1
+                    and word_groups[0]["group_key"] == "全部新闻"
+                    else "匹配频率词"
+                )
+                print(
+                    f"增量模式：{total_new_count} 条新增新闻中，有 {matched_new_count} 条{filter_status}"
+                )
+                if matched_new_count == 0 and len(word_groups) > 1:
+                    print("增量模式：没有新增新闻匹配频率词，将不会发送通知")
+            else:
+                print("增量模式：未检测到新增新闻")
+    elif mode == "current":
+        total_input_news = sum(len(titles) for titles in results_to_process.values())
+        if is_first_today:
+            filter_status = (
+                "全部显示"
+                if len(word_groups) == 1 and word_groups[0]["group_key"] == "全部新闻"
+                else "频率词匹配"
+            )
+            print(
+                f"当前榜单模式：当天第一次爬取，{total_input_news} 条当前榜单新闻中有 {matched_new_count} 条{filter_status}"
+            )
+        else:
+            matched_count = sum(stat["count"] for stat in word_stats.values())
+            filter_status = (
+                "全部显示"
+                if len(word_groups) == 1 and word_groups[0]["group_key"] == "全部新闻"
+                else "频率词匹配"
+            )
+            print(
+                f"当前榜单模式：{total_input_news} 条当前榜单新闻中有 {matched_count} 条{filter_status}"
+            )
+
+    stats = []
+    # 创建 group_key 到位置和最大数量的映射
+    group_key_to_position = {
+        group["group_key"]: idx for idx, group in enumerate(word_groups)
+    }
+    group_key_to_max_count = {
+        group["group_key"]: group.get("max_count", 0) for group in word_groups
+    }
+
+    for group_key, data in word_stats.items():
+        all_titles = []
+        for source_id, title_list in data["titles"].items():
+            all_titles.extend(title_list)
+
+        # 按权重排序
+        sorted_titles = sorted(
+            all_titles,
+            key=lambda x: (
+                -calculate_news_weight(x, rank_threshold, weight_config),
+                min(x["ranks"]) if x["ranks"] else 999,
+                -x["count"],
+            ),
+        )
+
+        # 应用最大显示数量限制（优先级：单独配置 > 全局配置）
+        group_max_count = group_key_to_max_count.get(group_key, 0)
+        if group_max_count == 0:
+            # 使用全局配置
+            group_max_count = max_news_per_keyword
+
+        if group_max_count > 0:
+            sorted_titles = sorted_titles[:group_max_count]
+
+        stats.append(
+            {
+                "word": group_key,
+                "count": data["count"],
+                "position": group_key_to_position.get(group_key, 999),
+                "titles": sorted_titles,
+                "percentage": (
+                    round(data["count"] / total_titles * 100, 2)
+                    if total_titles > 0
+                    else 0
+                ),
+            }
+        )
+
+    # 根据配置选择排序优先级
+    if sort_by_position_first:
+        # 先按配置位置，再按热点条数
+        stats.sort(key=lambda x: (x["position"], -x["count"]))
+    else:
+        # 先按热点条数，再按配置位置（原逻辑）
+        stats.sort(key=lambda x: (-x["count"], x["position"]))
+
+    # 打印过滤后的匹配新闻数（与推送显示一致）
+    matched_news_count = sum(len(stat["titles"]) for stat in stats if stat["count"] > 0)
+    if mode == "daily":
+        print(f"频率词过滤后：{matched_news_count} 条新闻匹配（将显示在推送中）")
+
+    return stats, total_titles
@@ -0,0 +1,152 @@
+# coding=utf-8
+"""
+配置工具模块 - 多账号配置解析和验证
+
+提供多账号推送配置的解析、验证和限制功能
+"""
+
+from typing import Dict, List, Optional, Tuple
+
+
+def parse_multi_account_config(config_value: str, separator: str = ";") -> List[str]:
+    """
+    解析多账号配置，返回账号列表
+
+    Args:
+        config_value: 配置值字符串，多个账号用分隔符分隔
+        separator: 分隔符，默认为 ;
+
+    Returns:
+        账号列表，空字符串会被保留（用于占位）
+
+    Examples:
+        >>> parse_multi_account_config("url1;url2;url3")
+        ['url1', 'url2', 'url3']
+        >>> parse_multi_account_config(";token2")  # 第一个账号无token
+        ['', 'token2']
+        >>> parse_multi_account_config("")
+        []
+    """
+    if not config_value:
+        return []
+    # 保留空字符串用于占位（如 ";token2" 表示第一个账号无token）
+    accounts = [acc.strip() for acc in config_value.split(separator)]
+    # 过滤掉全部为空的情况
+    if all(not acc for acc in accounts):
+        return []
+    return accounts
+
+
+def validate_paired_configs(
+    configs: Dict[str, List[str]],
+    channel_name: str,
+    required_keys: Optional[List[str]] = None
+) -> Tuple[bool, int]:
+    """
+    验证配对配置的数量是否一致
+
+    对于需要多个配置项配对的渠道（如 Telegram 的 token 和 chat_id），
+    验证所有配置项的账号数量是否一致。
+
+    Args:
+        configs: 配置字典，key 为配置名，value 为账号列表
+        channel_name: 渠道名称，用于日志输出
+        required_keys: 必须有值的配置项列表
+
+    Returns:
+        (是否验证通过, 账号数量)
+
+    Examples:
+        >>> validate_paired_configs({
+        ...     "token": ["t1", "t2"],
+        ...     "chat_id": ["c1", "c2"]
+        ... }, "Telegram", ["token", "chat_id"])
+        (True, 2)
+
+        >>> validate_paired_configs({
+        ...     "token": ["t1", "t2"],
+        ...     "chat_id": ["c1"]  # 数量不匹配
+        ... }, "Telegram", ["token", "chat_id"])
+        (False, 0)
+    """
+    # 过滤掉空列表
+    non_empty_configs = {k: v for k, v in configs.items() if v}
+
+    if not non_empty_configs:
+        return True, 0
+
+    # 检查必须项
+    if required_keys:
+        for key in required_keys:
+            if key not in non_empty_configs or not non_empty_configs[key]:
+                return True, 0  # 必须项为空，视为未配置
+
+    # 获取所有非空配置的长度
+    lengths = {k: len(v) for k, v in non_empty_configs.items()}
+    unique_lengths = set(lengths.values())
+
+    if len(unique_lengths) > 1:
+        print(f"❌ {channel_name} 配置错误：配对配置数量不一致，将跳过该渠道推送")
+        for key, length in lengths.items():
+            print(f"   - {key}: {length} 个")
+        return False, 0
+
+    return True, list(unique_lengths)[0] if unique_lengths else 0
+
+
+def limit_accounts(
+    accounts: List[str],
+    max_count: int,
+    channel_name: str
+) -> List[str]:
+    """
+    限制账号数量
+
+    当配置的账号数量超过最大限制时，只使用前 N 个账号，
+    并输出警告信息。
+
+    Args:
+        accounts: 账号列表
+        max_count: 最大账号数量
+        channel_name: 渠道名称，用于日志输出
+
+    Returns:
+        限制后的账号列表
+
+    Examples:
+        >>> limit_accounts(["a1", "a2", "a3"], 2, "飞书")
+        ⚠️ 飞书 配置了 3 个账号，超过最大限制 2，只使用前 2 个
+        ['a1', 'a2']
+    """
+    if len(accounts) > max_count:
+        print(f"⚠️ {channel_name} 配置了 {len(accounts)} 个账号，超过最大限制 {max_count}，只使用前 {max_count} 个")
+        print(f"   ⚠️ 警告：如果您是 fork 用户，过多账号可能导致 GitHub Actions 运行时间过长，存在账号风险")
+        return accounts[:max_count]
+    return accounts
+
+
+def get_account_at_index(accounts: List[str], index: int, default: str = "") -> str:
+    """
+    安全获取指定索引的账号值
+
+    当索引超出范围或账号值为空时，返回默认值。
+
+    Args:
+        accounts: 账号列表
+        index: 索引
+        default: 默认值
+
+    Returns:
+        账号值或默认值
+
+    Examples:
+        >>> get_account_at_index(["a", "b", "c"], 1)
+        'b'
+        >>> get_account_at_index(["a", "", "c"], 1, "default")
+        'default'
+        >>> get_account_at_index(["a"], 5, "default")
+        'default'
+    """
+    if index < len(accounts):
+        return accounts[index] if accounts[index] else default
+    return default
@@ -0,0 +1,291 @@
+# coding=utf-8
+"""
+数据处理模块
+
+提供数据读取、保存和检测功能：
+- save_titles_to_file: 保存标题到 TXT 文件
+- read_all_today_titles: 从存储后端读取当天所有标题
+- detect_latest_new_titles: 检测最新批次的新增标题
+
+Author: TrendRadar Team
+"""
+
+from pathlib import Path
+from typing import Dict, List, Tuple, Optional, Callable
+
+
+def save_titles_to_file(
+    results: Dict,
+    id_to_name: Dict,
+    failed_ids: List,
+    output_path: str,
+    clean_title_func: Callable[[str], str],
+) -> str:
+    """
+    保存标题到 TXT 文件
+
+    Args:
+        results: 抓取结果 {source_id: {title: title_data}}
+        id_to_name: ID 到名称的映射
+        failed_ids: 失败的 ID 列表
+        output_path: 输出文件路径
+        clean_title_func: 标题清理函数
+
+    Returns:
+        str: 保存的文件路径
+    """
+    # 确保目录存在
+    Path(output_path).parent.mkdir(parents=True, exist_ok=True)
+
+    with open(output_path, "w", encoding="utf-8") as f:
+        for id_value, title_data in results.items():
+            # id | name 或 id
+            name = id_to_name.get(id_value)
+            if name and name != id_value:
+                f.write(f"{id_value} | {name}\n")
+            else:
+                f.write(f"{id_value}\n")
+
+            # 按排名排序标题
+            sorted_titles = []
+            for title, info in title_data.items():
+                cleaned_title = clean_title_func(title)
+                if isinstance(info, dict):
+                    ranks = info.get("ranks", [])
+                    url = info.get("url", "")
+                    mobile_url = info.get("mobileUrl", "")
+                else:
+                    ranks = info if isinstance(info, list) else []
+                    url = ""
+                    mobile_url = ""
+
+                rank = ranks[0] if ranks else 1
+                sorted_titles.append((rank, cleaned_title, url, mobile_url))
+
+            sorted_titles.sort(key=lambda x: x[0])
+
+            for rank, cleaned_title, url, mobile_url in sorted_titles:
+                line = f"{rank}. {cleaned_title}"
+
+                if url:
+                    line += f" [URL:{url}]"
+                if mobile_url:
+                    line += f" [MOBILE:{mobile_url}]"
+                f.write(line + "\n")
+
+            f.write("\n")
+
+        if failed_ids:
+            f.write("==== 以下ID请求失败 ====\n")
+            for id_value in failed_ids:
+                f.write(f"{id_value}\n")
+
+    return output_path
+
+
+def read_all_today_titles_from_storage(
+    storage_manager,
+    current_platform_ids: Optional[List[str]] = None,
+) -> Tuple[Dict, Dict, Dict]:
+    """
+    从存储后端读取当天所有标题（SQLite 数据）
+
+    Args:
+        storage_manager: 存储管理器实例
+        current_platform_ids: 当前监控的平台 ID 列表（用于过滤）
+
+    Returns:
+        Tuple[Dict, Dict, Dict]: (all_results, id_to_name, title_info)
+    """
+    try:
+        news_data = storage_manager.get_today_all_data()
+
+        if not news_data or not news_data.items:
+            return {}, {}, {}
+
+        all_results = {}
+        final_id_to_name = {}
+        title_info = {}
+
+        for source_id, news_list in news_data.items.items():
+            # 按平台过滤
+            if current_platform_ids is not None and source_id not in current_platform_ids:
+                continue
+
+            # 获取来源名称
+            source_name = news_data.id_to_name.get(source_id, source_id)
+            final_id_to_name[source_id] = source_name
+
+            if source_id not in all_results:
+                all_results[source_id] = {}
+                title_info[source_id] = {}
+
+            for item in news_list:
+                title = item.title
+                ranks = getattr(item, 'ranks', [item.rank])
+                first_time = getattr(item, 'first_time', item.crawl_time)
+                last_time = getattr(item, 'last_time', item.crawl_time)
+                count = getattr(item, 'count', 1)
+
+                all_results[source_id][title] = {
+                    "ranks": ranks,
+                    "url": item.url or "",
+                    "mobileUrl": item.mobile_url or "",
+                }
+
+                title_info[source_id][title] = {
+                    "first_time": first_time,
+                    "last_time": last_time,
+                    "count": count,
+                    "ranks": ranks,
+                    "url": item.url or "",
+                    "mobileUrl": item.mobile_url or "",
+                }
+
+        return all_results, final_id_to_name, title_info
+
+    except Exception as e:
+        print(f"[存储] 从存储后端读取数据失败: {e}")
+        return {}, {}, {}
+
+
+def read_all_today_titles(
+    storage_manager,
+    current_platform_ids: Optional[List[str]] = None,
+) -> Tuple[Dict, Dict, Dict]:
+    """
+    读取当天所有标题（从存储后端）
+
+    Args:
+        storage_manager: 存储管理器实例
+        current_platform_ids: 当前监控的平台 ID 列表（用于过滤）
+
+    Returns:
+        Tuple[Dict, Dict, Dict]: (all_results, id_to_name, title_info)
+    """
+    all_results, final_id_to_name, title_info = read_all_today_titles_from_storage(
+        storage_manager, current_platform_ids
+    )
+
+    if all_results:
+        total_count = sum(len(titles) for titles in all_results.values())
+        print(f"[存储] 已从存储后端读取 {total_count} 条标题")
+    else:
+        print("[存储] 当天暂无数据")
+
+    return all_results, final_id_to_name, title_info
+
+
+def detect_latest_new_titles_from_storage(
+    storage_manager,
+    current_platform_ids: Optional[List[str]] = None,
+) -> Dict:
+    """
+    从存储后端检测最新批次的新增标题
+
+    Args:
+        storage_manager: 存储管理器实例
+        current_platform_ids: 当前监控的平台 ID 列表（用于过滤）
+
+    Returns:
+        Dict: 新增标题 {source_id: {title: title_data}}
+    """
+    try:
+        # 获取最新抓取数据
+        latest_data = storage_manager.get_latest_crawl_data()
+        if not latest_data or not latest_data.items:
+            return {}
+
+        # 获取所有历史数据
+        all_data = storage_manager.get_today_all_data()
+        if not all_data or not all_data.items:
+            # 没有历史数据（第一次抓取），不应该有"新增"标题
+            return {}
+
+        # 收集历史标题（不包括最新批次的时间）
+        latest_time = latest_data.crawl_time
+        historical_titles = {}
+
+        for source_id, news_list in all_data.items.items():
+            if current_platform_ids is not None and source_id not in current_platform_ids:
+                continue
+
+            historical_titles[source_id] = set()
+            for item in news_list:
+                # 只统计非最新批次的标题
+                first_time = getattr(item, 'first_time', item.crawl_time)
+                if first_time != latest_time:
+                    historical_titles[source_id].add(item.title)
+
+        # 检查是否是当天第一次抓取（没有任何历史标题）
+        # 如果所有平台的历史标题集合都为空，说明只有一个抓取批次，不应该有"新增"标题
+        has_historical_data = any(len(titles) > 0 for titles in historical_titles.values())
+        if not has_historical_data:
+            return {}
+
+        # 找出新增标题
+        new_titles = {}
+        for source_id, news_list in latest_data.items.items():
+            if current_platform_ids is not None and source_id not in current_platform_ids:
+                continue
+
+            historical_set = historical_titles.get(source_id, set())
+            source_new_titles = {}
+
+            for item in news_list:
+                if item.title not in historical_set:
+                    source_new_titles[item.title] = {
+                        "ranks": [item.rank],
+                        "url": item.url or "",
+                        "mobileUrl": item.mobile_url or "",
+                    }
+
+            if source_new_titles:
+                new_titles[source_id] = source_new_titles
+
+        return new_titles
+
+    except Exception as e:
+        print(f"[存储] 从存储后端检测新标题失败: {e}")
+        return {}
+
+
+def detect_latest_new_titles(
+    storage_manager,
+    current_platform_ids: Optional[List[str]] = None,
+) -> Dict:
+    """
+    检测当日最新批次的新增标题（从存储后端）
+
+    Args:
+        storage_manager: 存储管理器实例
+        current_platform_ids: 当前监控的平台 ID 列表（用于过滤）
+
+    Returns:
+        Dict: 新增标题 {source_id: {title: title_data}}
+    """
+    new_titles = detect_latest_new_titles_from_storage(storage_manager, current_platform_ids)
+    if new_titles:
+        total_new = sum(len(titles) for titles in new_titles.values())
+        print(f"[存储] 从存储后端检测到 {total_new} 条新增标题")
+    return new_titles
+
+
+def is_first_crawl_today(output_dir: str, date_folder: str) -> bool:
+    """
+    检测是否是当天第一次爬取
+
+    Args:
+        output_dir: 输出目录
+        date_folder: 日期文件夹名称
+
+    Returns:
+        bool: 是否是当天第一次爬取
+    """
+    txt_dir = Path(output_dir) / date_folder / "txt"
+
+    if not txt_dir.exists():
+        return True
+
+    files = sorted([f for f in txt_dir.iterdir() if f.suffix == ".txt"])
+    return len(files) <= 1
@@ -0,0 +1,194 @@
+# coding=utf-8
+"""
+频率词配置加载模块
+
+负责从配置文件加载频率词规则，支持：
+- 普通词组
+- 必须词（+前缀）
+- 过滤词（!前缀）
+- 全局过滤词（[GLOBAL_FILTER] 区域）
+- 最大显示数量（@前缀）
+"""
+
+import os
+from pathlib import Path
+from typing import Dict, List, Tuple, Optional
+
+
+def load_frequency_words(
+    frequency_file: Optional[str] = None,
+) -> Tuple[List[Dict], List[str], List[str]]:
+    """
+    加载频率词配置
+
+    配置文件格式说明：
+    - 每个词组由空行分隔
+    - [GLOBAL_FILTER] 区域定义全局过滤词
+    - [WORD_GROUPS] 区域定义词组（默认）
+
+    词组语法：
+    - 普通词：直接写入，任意匹配即可
+    - +词：必须词，所有必须词都要匹配
+    - !词：过滤词，匹配则排除
+    - @数字：该词组最多显示的条数
+
+    Args:
+        frequency_file: 频率词配置文件路径，默认从环境变量 FREQUENCY_WORDS_PATH 获取或使用 config/frequency_words.txt
+
+    Returns:
+        (词组列表, 词组内过滤词, 全局过滤词)
+
+    Raises:
+        FileNotFoundError: 频率词文件不存在
+    """
+    if frequency_file is None:
+        frequency_file = os.environ.get(
+            "FREQUENCY_WORDS_PATH", "config/frequency_words.txt"
+        )
+
+    frequency_path = Path(frequency_file)
+    if not frequency_path.exists():
+        raise FileNotFoundError(f"频率词文件 {frequency_file} 不存在")
+
+    with open(frequency_path, "r", encoding="utf-8") as f:
+        content = f.read()
+
+    word_groups = [group.strip() for group in content.split("\n\n") if group.strip()]
+
+    processed_groups = []
+    filter_words = []
+    global_filters = []
+
+    # 默认区域（向后兼容）
+    current_section = "WORD_GROUPS"
+
+    for group in word_groups:
+        lines = [line.strip() for line in group.split("\n") if line.strip()]
+
+        if not lines:
+            continue
+
+        # 检查是否为区域标记
+        if lines[0].startswith("[") and lines[0].endswith("]"):
+            section_name = lines[0][1:-1].upper()
+            if section_name in ("GLOBAL_FILTER", "WORD_GROUPS"):
+                current_section = section_name
+                lines = lines[1:]  # 移除标记行
+
+        # 处理全局过滤区域
+        if current_section == "GLOBAL_FILTER":
+            # 直接添加所有非空行到全局过滤列表
+            for line in lines:
+                # 忽略特殊语法前缀，只提取纯文本
+                if line.startswith(("!", "+", "@")):
+                    continue  # 全局过滤区不支持特殊语法
+                if line:
+                    global_filters.append(line)
+            continue
+
+        # 处理词组区域
+        words = lines
+
+        group_required_words = []
+        group_normal_words = []
+        group_filter_words = []
+        group_max_count = 0  # 默认不限制
+
+        for word in words:
+            if word.startswith("@"):
+                # 解析最大显示数量（只接受正整数）
+                try:
+                    count = int(word[1:])
+                    if count > 0:
+                        group_max_count = count
+                except (ValueError, IndexError):
+                    pass  # 忽略无效的@数字格式
+            elif word.startswith("!"):
+                filter_words.append(word[1:])
+                group_filter_words.append(word[1:])
+            elif word.startswith("+"):
+                group_required_words.append(word[1:])
+            else:
+                group_normal_words.append(word)
+
+        if group_required_words or group_normal_words:
+            if group_normal_words:
+                group_key = " ".join(group_normal_words)
+            else:
+                group_key = " ".join(group_required_words)
+
+            processed_groups.append(
+                {
+                    "required": group_required_words,
+                    "normal": group_normal_words,
+                    "group_key": group_key,
+                    "max_count": group_max_count,
+                }
+            )
+
+    return processed_groups, filter_words, global_filters
+
+
+def matches_word_groups(
+    title: str,
+    word_groups: List[Dict],
+    filter_words: List[str],
+    global_filters: Optional[List[str]] = None
+) -> bool:
+    """
+    检查标题是否匹配词组规则
+
+    Args:
+        title: 标题文本
+        word_groups: 词组列表
+        filter_words: 过滤词列表
+        global_filters: 全局过滤词列表
+
+    Returns:
+        是否匹配
+    """
+    # 防御性类型检查：确保 title 是有效字符串
+    if not isinstance(title, str):
+        title = str(title) if title is not None else ""
+    if not title.strip():
+        return False
+
+    title_lower = title.lower()
+
+    # 全局过滤检查（优先级最高）
+    if global_filters:
+        if any(global_word.lower() in title_lower for global_word in global_filters):
+            return False
+
+    # 如果没有配置词组，则匹配所有标题（支持显示全部新闻）
+    if not word_groups:
+        return True
+
+    # 过滤词检查
+    if any(filter_word.lower() in title_lower for filter_word in filter_words):
+        return False
+
+    # 词组匹配检查
+    for group in word_groups:
+        required_words = group["required"]
+        normal_words = group["normal"]
+
+        # 必须词检查
+        if required_words:
+            all_required_present = all(
+                req_word.lower() in title_lower for req_word in required_words
+            )
+            if not all_required_present:
+                continue
+
+        # 普通词检查
+        if normal_words:
+            any_normal_present = any(
+                normal_word.lower() in title_lower for normal_word in normal_words
+            )
+            if not any_normal_present:
+                continue
+
+        return True
+
+    return False
@@ -0,0 +1,332 @@
+# coding=utf-8
+"""
+配置加载模块
+
+负责从 YAML 配置文件和环境变量加载配置。
+"""
+
+import os
+from pathlib import Path
+from typing import Dict, Any, Optional
+
+import yaml
+
+from .config import parse_multi_account_config, validate_paired_configs
+
+
+def _get_env_bool(key: str, default: bool = False) -> Optional[bool]:
+    """从环境变量获取布尔值，如果未设置返回 None"""
+    value = os.environ.get(key, "").strip().lower()
+    if not value:
+        return None
+    return value in ("true", "1")
+
+
+def _get_env_int(key: str, default: int = 0) -> int:
+    """从环境变量获取整数值"""
+    value = os.environ.get(key, "").strip()
+    if not value:
+        return default
+    try:
+        return int(value)
+    except ValueError:
+        return default
+
+
+def _get_env_str(key: str, default: str = "") -> str:
+    """从环境变量获取字符串值"""
+    return os.environ.get(key, "").strip() or default
+
+
+def _load_app_config(config_data: Dict) -> Dict:
+    """加载应用配置"""
+    app_config = config_data.get("app", {})
+    return {
+        "VERSION_CHECK_URL": app_config.get("version_check_url", ""),
+        "SHOW_VERSION_UPDATE": app_config.get("show_version_update", True),
+        "TIMEZONE": _get_env_str("TIMEZONE") or app_config.get("timezone", "Asia/Shanghai"),
+    }
+
+
+def _load_crawler_config(config_data: Dict) -> Dict:
+    """加载爬虫配置"""
+    crawler_config = config_data.get("crawler", {})
+    enable_crawler_env = _get_env_bool("ENABLE_CRAWLER")
+    return {
+        "REQUEST_INTERVAL": crawler_config.get("request_interval", 100),
+        "USE_PROXY": crawler_config.get("use_proxy", False),
+        "DEFAULT_PROXY": crawler_config.get("default_proxy", ""),
+        "ENABLE_CRAWLER": enable_crawler_env if enable_crawler_env is not None else crawler_config.get("enable_crawler", True),
+    }
+
+
+def _load_report_config(config_data: Dict) -> Dict:
+    """加载报告配置"""
+    report_config = config_data.get("report", {})
+
+    # 环境变量覆盖
+    sort_by_position_env = _get_env_bool("SORT_BY_POSITION_FIRST")
+    reverse_content_env = _get_env_bool("REVERSE_CONTENT_ORDER")
+    max_news_env = _get_env_int("MAX_NEWS_PER_KEYWORD")
+
+    return {
+        "REPORT_MODE": _get_env_str("REPORT_MODE") or report_config.get("mode", "daily"),
+        "RANK_THRESHOLD": report_config.get("rank_threshold", 10),
+        "SORT_BY_POSITION_FIRST": sort_by_position_env if sort_by_position_env is not None else report_config.get("sort_by_position_first", False),
+        "MAX_NEWS_PER_KEYWORD": max_news_env or report_config.get("max_news_per_keyword", 0),
+        "REVERSE_CONTENT_ORDER": reverse_content_env if reverse_content_env is not None else report_config.get("reverse_content_order", False),
+    }
+
+
+def _load_notification_config(config_data: Dict) -> Dict:
+    """加载通知配置"""
+    notification = config_data.get("notification", {})
+    enable_notification_env = _get_env_bool("ENABLE_NOTIFICATION")
+
+    return {
+        "ENABLE_NOTIFICATION": enable_notification_env if enable_notification_env is not None else notification.get("enable_notification", True),
+        "MESSAGE_BATCH_SIZE": notification.get("message_batch_size", 4000),
+        "DINGTALK_BATCH_SIZE": notification.get("dingtalk_batch_size", 20000),
+        "FEISHU_BATCH_SIZE": notification.get("feishu_batch_size", 29000),
+        "BARK_BATCH_SIZE": notification.get("bark_batch_size", 3600),
+        "SLACK_BATCH_SIZE": notification.get("slack_batch_size", 4000),
+        "BATCH_SEND_INTERVAL": notification.get("batch_send_interval", 1.0),
+        "FEISHU_MESSAGE_SEPARATOR": notification.get("feishu_message_separator", "---"),
+        "MAX_ACCOUNTS_PER_CHANNEL": _get_env_int("MAX_ACCOUNTS_PER_CHANNEL") or notification.get("max_accounts_per_channel", 3),
+    }
+
+
+def _load_push_window_config(config_data: Dict) -> Dict:
+    """加载推送窗口配置"""
+    notification = config_data.get("notification", {})
+    push_window = notification.get("push_window", {})
+    time_range = push_window.get("time_range", {})
+
+    enabled_env = _get_env_bool("PUSH_WINDOW_ENABLED")
+    once_per_day_env = _get_env_bool("PUSH_WINDOW_ONCE_PER_DAY")
+
+    return {
+        "ENABLED": enabled_env if enabled_env is not None else push_window.get("enabled", False),
+        "TIME_RANGE": {
+            "START": _get_env_str("PUSH_WINDOW_START") or time_range.get("start", "08:00"),
+            "END": _get_env_str("PUSH_WINDOW_END") or time_range.get("end", "22:00"),
+        },
+        "ONCE_PER_DAY": once_per_day_env if once_per_day_env is not None else push_window.get("once_per_day", True),
+    }
+
+
+def _load_weight_config(config_data: Dict) -> Dict:
+    """加载权重配置"""
+    weight = config_data.get("weight", {})
+    return {
+        "RANK_WEIGHT": weight.get("rank_weight", 1.0),
+        "FREQUENCY_WEIGHT": weight.get("frequency_weight", 1.0),
+        "HOTNESS_WEIGHT": weight.get("hotness_weight", 1.0),
+    }
+
+
+def _load_storage_config(config_data: Dict) -> Dict:
+    """加载存储配置"""
+    storage = config_data.get("storage", {})
+    formats = storage.get("formats", {})
+    local = storage.get("local", {})
+    remote = storage.get("remote", {})
+    pull = storage.get("pull", {})
+
+    txt_enabled_env = _get_env_bool("STORAGE_TXT_ENABLED")
+    html_enabled_env = _get_env_bool("STORAGE_HTML_ENABLED")
+    pull_enabled_env = _get_env_bool("PULL_ENABLED")
+
+    return {
+        "BACKEND": _get_env_str("STORAGE_BACKEND") or storage.get("backend", "auto"),
+        "FORMATS": {
+            "SQLITE": formats.get("sqlite", True),
+            "TXT": txt_enabled_env if txt_enabled_env is not None else formats.get("txt", True),
+            "HTML": html_enabled_env if html_enabled_env is not None else formats.get("html", True),
+        },
+        "LOCAL": {
+            "DATA_DIR": local.get("data_dir", "output"),
+            "RETENTION_DAYS": _get_env_int("LOCAL_RETENTION_DAYS") or local.get("retention_days", 0),
+        },
+        "REMOTE": {
+            "ENDPOINT_URL": _get_env_str("S3_ENDPOINT_URL") or remote.get("endpoint_url", ""),
+            "BUCKET_NAME": _get_env_str("S3_BUCKET_NAME") or remote.get("bucket_name", ""),
+            "ACCESS_KEY_ID": _get_env_str("S3_ACCESS_KEY_ID") or remote.get("access_key_id", ""),
+            "SECRET_ACCESS_KEY": _get_env_str("S3_SECRET_ACCESS_KEY") or remote.get("secret_access_key", ""),
+            "REGION": _get_env_str("S3_REGION") or remote.get("region", ""),
+            "RETENTION_DAYS": _get_env_int("REMOTE_RETENTION_DAYS") or remote.get("retention_days", 0),
+        },
+        "PULL": {
+            "ENABLED": pull_enabled_env if pull_enabled_env is not None else pull.get("enabled", False),
+            "DAYS": _get_env_int("PULL_DAYS") or pull.get("days", 7),
+        },
+    }
+
+
+def _load_webhook_config(config_data: Dict) -> Dict:
+    """加载 Webhook 配置"""
+    notification = config_data.get("notification", {})
+    webhooks = notification.get("webhooks", {})
+
+    return {
+        # 飞书
+        "FEISHU_WEBHOOK_URL": _get_env_str("FEISHU_WEBHOOK_URL") or webhooks.get("feishu_url", ""),
+        # 钉钉
+        "DINGTALK_WEBHOOK_URL": _get_env_str("DINGTALK_WEBHOOK_URL") or webhooks.get("dingtalk_url", ""),
+        # 企业微信
+        "WEWORK_WEBHOOK_URL": _get_env_str("WEWORK_WEBHOOK_URL") or webhooks.get("wework_url", ""),
+        "WEWORK_MSG_TYPE": _get_env_str("WEWORK_MSG_TYPE") or webhooks.get("wework_msg_type", "markdown"),
+        # Telegram
+        "TELEGRAM_BOT_TOKEN": _get_env_str("TELEGRAM_BOT_TOKEN") or webhooks.get("telegram_bot_token", ""),
+        "TELEGRAM_CHAT_ID": _get_env_str("TELEGRAM_CHAT_ID") or webhooks.get("telegram_chat_id", ""),
+        # 邮件
+        "EMAIL_FROM": _get_env_str("EMAIL_FROM") or webhooks.get("email_from", ""),
+        "EMAIL_PASSWORD": _get_env_str("EMAIL_PASSWORD") or webhooks.get("email_password", ""),
+        "EMAIL_TO": _get_env_str("EMAIL_TO") or webhooks.get("email_to", ""),
+        "EMAIL_SMTP_SERVER": _get_env_str("EMAIL_SMTP_SERVER") or webhooks.get("email_smtp_server", ""),
+        "EMAIL_SMTP_PORT": _get_env_str("EMAIL_SMTP_PORT") or webhooks.get("email_smtp_port", ""),
+        # ntfy
+        "NTFY_SERVER_URL": _get_env_str("NTFY_SERVER_URL") or webhooks.get("ntfy_server_url") or "https://ntfy.sh",
+        "NTFY_TOPIC": _get_env_str("NTFY_TOPIC") or webhooks.get("ntfy_topic", ""),
+        "NTFY_TOKEN": _get_env_str("NTFY_TOKEN") or webhooks.get("ntfy_token", ""),
+        # Bark
+        "BARK_URL": _get_env_str("BARK_URL") or webhooks.get("bark_url", ""),
+        # Slack
+        "SLACK_WEBHOOK_URL": _get_env_str("SLACK_WEBHOOK_URL") or webhooks.get("slack_webhook_url", ""),
+    }
+
+
+def _print_notification_sources(config: Dict) -> None:
+    """打印通知渠道配置来源信息"""
+    notification_sources = []
+    max_accounts = config["MAX_ACCOUNTS_PER_CHANNEL"]
+
+    if config["FEISHU_WEBHOOK_URL"]:
+        accounts = parse_multi_account_config(config["FEISHU_WEBHOOK_URL"])
+        count = min(len(accounts), max_accounts)
+        source = "环境变量" if os.environ.get("FEISHU_WEBHOOK_URL") else "配置文件"
+        notification_sources.append(f"飞书({source}, {count}个账号)")
+
+    if config["DINGTALK_WEBHOOK_URL"]:
+        accounts = parse_multi_account_config(config["DINGTALK_WEBHOOK_URL"])
+        count = min(len(accounts), max_accounts)
+        source = "环境变量" if os.environ.get("DINGTALK_WEBHOOK_URL") else "配置文件"
+        notification_sources.append(f"钉钉({source}, {count}个账号)")
+
+    if config["WEWORK_WEBHOOK_URL"]:
+        accounts = parse_multi_account_config(config["WEWORK_WEBHOOK_URL"])
+        count = min(len(accounts), max_accounts)
+        source = "环境变量" if os.environ.get("WEWORK_WEBHOOK_URL") else "配置文件"
+        notification_sources.append(f"企业微信({source}, {count}个账号)")
+
+    if config["TELEGRAM_BOT_TOKEN"] and config["TELEGRAM_CHAT_ID"]:
+        tokens = parse_multi_account_config(config["TELEGRAM_BOT_TOKEN"])
+        chat_ids = parse_multi_account_config(config["TELEGRAM_CHAT_ID"])
+        valid, count = validate_paired_configs(
+            {"bot_token": tokens, "chat_id": chat_ids},
+            "Telegram",
+            required_keys=["bot_token", "chat_id"]
+        )
+        if valid and count > 0:
+            count = min(count, max_accounts)
+            token_source = "环境变量" if os.environ.get("TELEGRAM_BOT_TOKEN") else "配置文件"
+            notification_sources.append(f"Telegram({token_source}, {count}个账号)")
+
+    if config["EMAIL_FROM"] and config["EMAIL_PASSWORD"] and config["EMAIL_TO"]:
+        from_source = "环境变量" if os.environ.get("EMAIL_FROM") else "配置文件"
+        notification_sources.append(f"邮件({from_source})")
+
+    if config["NTFY_SERVER_URL"] and config["NTFY_TOPIC"]:
+        topics = parse_multi_account_config(config["NTFY_TOPIC"])
+        tokens = parse_multi_account_config(config["NTFY_TOKEN"])
+        if tokens:
+            valid, count = validate_paired_configs(
+                {"topic": topics, "token": tokens},
+                "ntfy"
+            )
+            if valid and count > 0:
+                count = min(count, max_accounts)
+                server_source = "环境变量" if os.environ.get("NTFY_SERVER_URL") else "配置文件"
+                notification_sources.append(f"ntfy({server_source}, {count}个账号)")
+        else:
+            count = min(len(topics), max_accounts)
+            server_source = "环境变量" if os.environ.get("NTFY_SERVER_URL") else "配置文件"
+            notification_sources.append(f"ntfy({server_source}, {count}个账号)")
+
+    if config["BARK_URL"]:
+        accounts = parse_multi_account_config(config["BARK_URL"])
+        count = min(len(accounts), max_accounts)
+        bark_source = "环境变量" if os.environ.get("BARK_URL") else "配置文件"
+        notification_sources.append(f"Bark({bark_source}, {count}个账号)")
+
+    if config["SLACK_WEBHOOK_URL"]:
+        accounts = parse_multi_account_config(config["SLACK_WEBHOOK_URL"])
+        count = min(len(accounts), max_accounts)
+        slack_source = "环境变量" if os.environ.get("SLACK_WEBHOOK_URL") else "配置文件"
+        notification_sources.append(f"Slack({slack_source}, {count}个账号)")
+
+    if notification_sources:
+        print(f"通知渠道配置来源: {', '.join(notification_sources)}")
+        print(f"每个渠道最大账号数: {max_accounts}")
+    else:
+        print("未配置任何通知渠道")
+
+
+def load_config(config_path: Optional[str] = None) -> Dict[str, Any]:
+    """
+    加载配置文件
+
+    Args:
+        config_path: 配置文件路径，默认从环境变量 CONFIG_PATH 获取或使用 config/config.yaml
+
+    Returns:
+        包含所有配置的字典
+
+    Raises:
+        FileNotFoundError: 配置文件不存在
+    """
+    if config_path is None:
+        config_path = os.environ.get("CONFIG_PATH", "config/config.yaml")
+
+    if not Path(config_path).exists():
+        raise FileNotFoundError(f"配置文件 {config_path} 不存在")
+
+    with open(config_path, "r", encoding="utf-8") as f:
+        config_data = yaml.safe_load(f)
+
+    print(f"配置文件加载成功: {config_path}")
+
+    # 合并所有配置
+    config = {}
+
+    # 应用配置
+    config.update(_load_app_config(config_data))
+
+    # 爬虫配置
+    config.update(_load_crawler_config(config_data))
+
+    # 报告配置
+    config.update(_load_report_config(config_data))
+
+    # 通知配置
+    config.update(_load_notification_config(config_data))
+
+    # 推送窗口配置
+    config["PUSH_WINDOW"] = _load_push_window_config(config_data)
+
+    # 权重配置
+    config["WEIGHT_CONFIG"] = _load_weight_config(config_data)
+
+    # 平台配置
+    config["PLATFORMS"] = config_data.get("platforms", [])
+
+    # 存储配置
+    config["STORAGE"] = _load_storage_config(config_data)
+
+    # Webhook 配置
+    config.update(_load_webhook_config(config_data))
+
+    # 打印通知渠道配置来源
+    _print_notification_sources(config)
+
+    return config
@@ -0,0 +1,8 @@
+# coding=utf-8
+"""
+爬虫模块 - 数据抓取功能
+"""
+
+from trendradar.crawler.fetcher import DataFetcher
+
+__all__ = ["DataFetcher"]
@@ -0,0 +1,184 @@
+# coding=utf-8
+"""
+数据获取器模块
+
+负责从 NewsNow API 抓取新闻数据，支持：
+- 单个平台数据获取
+- 批量平台数据爬取
+- 自动重试机制
+- 代理支持
+"""
+
+import json
+import random
+import time
+from typing import Dict, List, Tuple, Optional, Union
+
+import requests
+
+
+class DataFetcher:
+    """数据获取器"""
+
+    # 默认 API 地址
+    DEFAULT_API_URL = "https://newsnow.busiyi.world/api/s"
+
+    # 默认请求头
+    DEFAULT_HEADERS = {
+        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
+        "Accept": "application/json, text/plain, */*",
+        "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
+        "Connection": "keep-alive",
+        "Cache-Control": "no-cache",
+    }
+
+    def __init__(
+        self,
+        proxy_url: Optional[str] = None,
+        api_url: Optional[str] = None,
+    ):
+        """
+        初始化数据获取器
+
+        Args:
+            proxy_url: 代理服务器 URL（可选）
+            api_url: API 基础 URL（可选，默认使用 DEFAULT_API_URL）
+        """
+        self.proxy_url = proxy_url
+        self.api_url = api_url or self.DEFAULT_API_URL
+
+    def fetch_data(
+        self,
+        id_info: Union[str, Tuple[str, str]],
+        max_retries: int = 2,
+        min_retry_wait: int = 3,
+        max_retry_wait: int = 5,
+    ) -> Tuple[Optional[str], str, str]:
+        """
+        获取指定ID数据，支持重试
+
+        Args:
+            id_info: 平台ID 或 (平台ID, 别名) 元组
+            max_retries: 最大重试次数
+            min_retry_wait: 最小重试等待时间（秒）
+            max_retry_wait: 最大重试等待时间（秒）
+
+        Returns:
+            (响应文本, 平台ID, 别名) 元组，失败时响应文本为 None
+        """
+        if isinstance(id_info, tuple):
+            id_value, alias = id_info
+        else:
+            id_value = id_info
+            alias = id_value
+
+        url = f"{self.api_url}?id={id_value}&latest"
+
+        proxies = None
+        if self.proxy_url:
+            proxies = {"http": self.proxy_url, "https": self.proxy_url}
+
+        retries = 0
+        while retries <= max_retries:
+            try:
+                response = requests.get(
+                    url,
+                    proxies=proxies,
+                    headers=self.DEFAULT_HEADERS,
+                    timeout=10,
+                )
+                response.raise_for_status()
+
+                data_text = response.text
+                data_json = json.loads(data_text)
+
+                status = data_json.get("status", "未知")
+                if status not in ["success", "cache"]:
+                    raise ValueError(f"响应状态异常: {status}")
+
+                status_info = "最新数据" if status == "success" else "缓存数据"
+                print(f"获取 {id_value} 成功（{status_info}）")
+                return data_text, id_value, alias
+
+            except Exception as e:
+                retries += 1
+                if retries <= max_retries:
+                    base_wait = random.uniform(min_retry_wait, max_retry_wait)
+                    additional_wait = (retries - 1) * random.uniform(1, 2)
+                    wait_time = base_wait + additional_wait
+                    print(f"请求 {id_value} 失败: {e}. {wait_time:.2f}秒后重试...")
+                    time.sleep(wait_time)
+                else:
+                    print(f"请求 {id_value} 失败: {e}")
+                    return None, id_value, alias
+
+        return None, id_value, alias
+
+    def crawl_websites(
+        self,
+        ids_list: List[Union[str, Tuple[str, str]]],
+        request_interval: int = 100,
+    ) -> Tuple[Dict, Dict, List]:
+        """
+        爬取多个网站数据
+
+        Args:
+            ids_list: 平台ID列表，每个元素可以是字符串或 (平台ID, 别名) 元组
+            request_interval: 请求间隔（毫秒）
+
+        Returns:
+            (结果字典, ID到名称的映射, 失败ID列表) 元组
+        """
+        results = {}
+        id_to_name = {}
+        failed_ids = []
+
+        for i, id_info in enumerate(ids_list):
+            if isinstance(id_info, tuple):
+                id_value, name = id_info
+            else:
+                id_value = id_info
+                name = id_value
+
+            id_to_name[id_value] = name
+            response, _, _ = self.fetch_data(id_info)
+
+            if response:
+                try:
+                    data = json.loads(response)
+                    results[id_value] = {}
+
+                    for index, item in enumerate(data.get("items", []), 1):
+                        title = item.get("title")
+                        # 跳过无效标题（None、float、空字符串）
+                        if title is None or isinstance(title, float) or not str(title).strip():
+                            continue
+                        title = str(title).strip()
+                        url = item.get("url", "")
+                        mobile_url = item.get("mobileUrl", "")
+
+                        if title in results[id_value]:
+                            results[id_value][title]["ranks"].append(index)
+                        else:
+                            results[id_value][title] = {
+                                "ranks": [index],
+                                "url": url,
+                                "mobileUrl": mobile_url,
+                            }
+                except json.JSONDecodeError:
+                    print(f"解析 {id_value} 响应失败")
+                    failed_ids.append(id_value)
+                except Exception as e:
+                    print(f"处理 {id_value} 数据出错: {e}")
+                    failed_ids.append(id_value)
+            else:
+                failed_ids.append(id_value)
+
+            # 请求间隔（除了最后一个）
+            if i < len(ids_list) - 1:
+                actual_interval = request_interval + random.randint(-10, 20)
+                actual_interval = max(50, actual_interval)
+                time.sleep(actual_interval / 1000)
+
+        print(f"成功: {list(results.keys())}, 失败: {failed_ids}")
+        return results, id_to_name, failed_ids
@@ -0,0 +1,81 @@
+# coding=utf-8
+"""
+通知推送模块
+
+提供多渠道通知推送功能，包括：
+- 飞书、钉钉、企业微信
+- Telegram、Slack
+- Email、ntfy、Bark
+
+模块结构：
+- push_manager: 推送记录管理
+- formatters: 内容格式转换
+- batch: 批次处理工具
+- renderer: 通知内容渲染
+- splitter: 消息分批拆分
+- senders: 消息发送器（各渠道发送函数）
+- dispatcher: 多账号通知调度器
+"""
+
+from trendradar.notification.push_manager import PushRecordManager
+from trendradar.notification.formatters import (
+    strip_markdown,
+    convert_markdown_to_mrkdwn,
+)
+from trendradar.notification.batch import (
+    get_batch_header,
+    get_max_batch_header_size,
+    truncate_to_bytes,
+    add_batch_headers,
+)
+from trendradar.notification.renderer import (
+    render_feishu_content,
+    render_dingtalk_content,
+)
+from trendradar.notification.splitter import (
+    split_content_into_batches,
+    DEFAULT_BATCH_SIZES,
+)
+from trendradar.notification.senders import (
+    send_to_feishu,
+    send_to_dingtalk,
+    send_to_wework,
+    send_to_telegram,
+    send_to_email,
+    send_to_ntfy,
+    send_to_bark,
+    send_to_slack,
+    SMTP_CONFIGS,
+)
+from trendradar.notification.dispatcher import NotificationDispatcher
+
+__all__ = [
+    # 推送记录管理
+    "PushRecordManager",
+    # 格式转换
+    "strip_markdown",
+    "convert_markdown_to_mrkdwn",
+    # 批次处理
+    "get_batch_header",
+    "get_max_batch_header_size",
+    "truncate_to_bytes",
+    "add_batch_headers",
+    # 内容渲染
+    "render_feishu_content",
+    "render_dingtalk_content",
+    # 消息分批
+    "split_content_into_batches",
+    "DEFAULT_BATCH_SIZES",
+    # 消息发送器
+    "send_to_feishu",
+    "send_to_dingtalk",
+    "send_to_wework",
+    "send_to_telegram",
+    "send_to_email",
+    "send_to_ntfy",
+    "send_to_bark",
+    "send_to_slack",
+    "SMTP_CONFIGS",
+    # 通知调度器
+    "NotificationDispatcher",
+]
@@ -0,0 +1,115 @@
+# coding=utf-8
+"""
+批次处理模块
+
+提供消息分批发送的辅助函数
+"""
+
+from typing import List
+
+
+def get_batch_header(format_type: str, batch_num: int, total_batches: int) -> str:
+    """根据 format_type 生成对应格式的批次头部
+
+    Args:
+        format_type: 推送类型（telegram, slack, wework_text, bark, feishu, dingtalk, ntfy, wework）
+        batch_num: 当前批次编号
+        total_batches: 总批次数
+
+    Returns:
+        格式化的批次头部字符串
+    """
+    if format_type == "telegram":
+        return f"<b>[第 {batch_num}/{total_batches} 批次]</b>\n\n"
+    elif format_type == "slack":
+        return f"*[第 {batch_num}/{total_batches} 批次]*\n\n"
+    elif format_type in ("wework_text", "bark"):
+        # 企业微信文本模式和 Bark 使用纯文本格式
+        return f"[第 {batch_num}/{total_batches} 批次]\n\n"
+    else:
+        # 飞书、钉钉、ntfy、企业微信 markdown 模式
+        return f"**[第 {batch_num}/{total_batches} 批次]**\n\n"
+
+
+def get_max_batch_header_size(format_type: str) -> int:
+    """估算批次头部的最大字节数（假设最多 99 批次）
+
+    用于在分批时预留空间，避免事后截断破坏内容完整性。
+
+    Args:
+        format_type: 推送类型
+
+    Returns:
+        最大头部字节数
+    """
+    # 生成最坏情况的头部（99/99 批次）
+    max_header = get_batch_header(format_type, 99, 99)
+    return len(max_header.encode("utf-8"))
+
+
+def truncate_to_bytes(text: str, max_bytes: int) -> str:
+    """安全截断字符串到指定字节数，避免截断多字节字符
+
+    Args:
+        text: 要截断的文本
+        max_bytes: 最大字节数
+
+    Returns:
+        截断后的文本
+    """
+    text_bytes = text.encode("utf-8")
+    if len(text_bytes) <= max_bytes:
+        return text
+
+    # 截断到指定字节数
+    truncated = text_bytes[:max_bytes]
+
+    # 处理可能的不完整 UTF-8 字符
+    for i in range(min(4, len(truncated))):
+        try:
+            return truncated[: len(truncated) - i].decode("utf-8")
+        except UnicodeDecodeError:
+            continue
+
+    # 极端情况：返回空字符串
+    return ""
+
+
+def add_batch_headers(
+    batches: List[str], format_type: str, max_bytes: int
+) -> List[str]:
+    """为批次添加头部，动态计算确保总大小不超过限制
+
+    Args:
+        batches: 原始批次列表
+        format_type: 推送类型（bark, telegram, feishu 等）
+        max_bytes: 该推送类型的最大字节限制
+
+    Returns:
+        添加头部后的批次列表
+    """
+    if len(batches) <= 1:
+        return batches
+
+    total = len(batches)
+    result = []
+
+    for i, content in enumerate(batches, 1):
+        # 生成批次头部
+        header = get_batch_header(format_type, i, total)
+        header_size = len(header.encode("utf-8"))
+
+        # 动态计算允许的最大内容大小
+        max_content_size = max_bytes - header_size
+        content_size = len(content.encode("utf-8"))
+
+        # 如果超出，截断到安全大小
+        if content_size > max_content_size:
+            print(
+                f"警告：{format_type} 第 {i}/{total} 批次内容({content_size}字节) + 头部({header_size}字节) 超出限制({max_bytes}字节)，截断到 {max_content_size} 字节"
+            )
+            content = truncate_to_bytes(content, max_content_size)
+
+        result.append(header + content)
+
+    return result
@@ -0,0 +1,420 @@
+# coding=utf-8
+"""
+通知调度器模块
+
+提供统一的通知分发接口。
+支持所有通知渠道的多账号配置，使用 `;` 分隔多个账号。
+
+使用示例:
+    dispatcher = NotificationDispatcher(config, get_time_func, split_content_func)
+    results = dispatcher.dispatch_all(report_data, report_type, ...)
+"""
+
+from typing import Any, Callable, Dict, List, Optional
+
+from trendradar.core.config import (
+    get_account_at_index,
+    limit_accounts,
+    parse_multi_account_config,
+    validate_paired_configs,
+)
+
+from .senders import (
+    send_to_bark,
+    send_to_dingtalk,
+    send_to_email,
+    send_to_feishu,
+    send_to_ntfy,
+    send_to_slack,
+    send_to_telegram,
+    send_to_wework,
+)
+
+
+class NotificationDispatcher:
+    """
+    统一的多账号通知调度器
+
+    将多账号发送逻辑封装，提供简洁的 dispatch_all 接口。
+    内部处理账号解析、数量限制、配对验证等逻辑。
+    """
+
+    def __init__(
+        self,
+        config: Dict[str, Any],
+        get_time_func: Callable,
+        split_content_func: Callable,
+    ):
+        """
+        初始化通知调度器
+
+        Args:
+            config: 完整的配置字典，包含所有通知渠道的配置
+            get_time_func: 获取当前时间的函数
+            split_content_func: 内容分批函数
+        """
+        self.config = config
+        self.get_time_func = get_time_func
+        self.split_content_func = split_content_func
+        self.max_accounts = config.get("MAX_ACCOUNTS_PER_CHANNEL", 3)
+
+    def dispatch_all(
+        self,
+        report_data: Dict,
+        report_type: str,
+        update_info: Optional[Dict] = None,
+        proxy_url: Optional[str] = None,
+        mode: str = "daily",
+        html_file_path: Optional[str] = None,
+    ) -> Dict[str, bool]:
+        """
+        分发通知到所有已配置的渠道
+
+        Args:
+            report_data: 报告数据（由 prepare_report_data 生成）
+            report_type: 报告类型（如 "当日汇总"、"实时增量"）
+            update_info: 版本更新信息（可选）
+            proxy_url: 代理 URL（可选）
+            mode: 报告模式 (daily/current/incremental)
+            html_file_path: HTML 报告文件路径（邮件使用）
+
+        Returns:
+            Dict[str, bool]: 每个渠道的发送结果，key 为渠道名，value 为是否成功
+        """
+        results = {}
+
+        # 飞书
+        if self.config.get("FEISHU_WEBHOOK_URL"):
+            results["feishu"] = self._send_feishu(
+                report_data, report_type, update_info, proxy_url, mode
+            )
+
+        # 钉钉
+        if self.config.get("DINGTALK_WEBHOOK_URL"):
+            results["dingtalk"] = self._send_dingtalk(
+                report_data, report_type, update_info, proxy_url, mode
+            )
+
+        # 企业微信
+        if self.config.get("WEWORK_WEBHOOK_URL"):
+            results["wework"] = self._send_wework(
+                report_data, report_type, update_info, proxy_url, mode
+            )
+
+        # Telegram（需要配对验证）
+        if self.config.get("TELEGRAM_BOT_TOKEN") and self.config.get("TELEGRAM_CHAT_ID"):
+            results["telegram"] = self._send_telegram(
+                report_data, report_type, update_info, proxy_url, mode
+            )
+
+        # ntfy（需要配对验证）
+        if self.config.get("NTFY_SERVER_URL") and self.config.get("NTFY_TOPIC"):
+            results["ntfy"] = self._send_ntfy(
+                report_data, report_type, update_info, proxy_url, mode
+            )
+
+        # Bark
+        if self.config.get("BARK_URL"):
+            results["bark"] = self._send_bark(
+                report_data, report_type, update_info, proxy_url, mode
+            )
+
+        # Slack
+        if self.config.get("SLACK_WEBHOOK_URL"):
+            results["slack"] = self._send_slack(
+                report_data, report_type, update_info, proxy_url, mode
+            )
+
+        # 邮件（保持原有逻辑，已支持多收件人）
+        if (
+            self.config.get("EMAIL_FROM")
+            and self.config.get("EMAIL_PASSWORD")
+            and self.config.get("EMAIL_TO")
+        ):
+            results["email"] = self._send_email(report_type, html_file_path)
+
+        return results
+
+    def _send_to_multi_accounts(
+        self,
+        channel_name: str,
+        config_value: str,
+        send_func: Callable[..., bool],
+        **kwargs,
+    ) -> bool:
+        """
+        通用多账号发送逻辑
+
+        Args:
+            channel_name: 渠道名称（用于日志和账号数量限制提示）
+            config_value: 配置值（可能包含多个账号，用 ; 分隔）
+            send_func: 发送函数，签名为 (account, account_label=..., **kwargs) -> bool
+            **kwargs: 传递给发送函数的其他参数
+
+        Returns:
+            bool: 任一账号发送成功则返回 True
+        """
+        accounts = parse_multi_account_config(config_value)
+        if not accounts:
+            return False
+
+        accounts = limit_accounts(accounts, self.max_accounts, channel_name)
+        results = []
+
+        for i, account in enumerate(accounts):
+            if account:
+                account_label = f"账号{i+1}" if len(accounts) > 1 else ""
+                result = send_func(account, account_label=account_label, **kwargs)
+                results.append(result)
+
+        return any(results) if results else False
+
+    def _send_feishu(
+        self,
+        report_data: Dict,
+        report_type: str,
+        update_info: Optional[Dict],
+        proxy_url: Optional[str],
+        mode: str,
+    ) -> bool:
+        """发送到飞书（多账号）"""
+        return self._send_to_multi_accounts(
+            channel_name="飞书",
+            config_value=self.config["FEISHU_WEBHOOK_URL"],
+            send_func=lambda url, account_label: send_to_feishu(
+                webhook_url=url,
+                report_data=report_data,
+                report_type=report_type,
+                update_info=update_info,
+                proxy_url=proxy_url,
+                mode=mode,
+                account_label=account_label,
+                batch_size=self.config.get("FEISHU_BATCH_SIZE", 29000),
+                batch_interval=self.config.get("BATCH_SEND_INTERVAL", 1.0),
+                split_content_func=self.split_content_func,
+                get_time_func=self.get_time_func,
+            ),
+        )
+
+    def _send_dingtalk(
+        self,
+        report_data: Dict,
+        report_type: str,
+        update_info: Optional[Dict],
+        proxy_url: Optional[str],
+        mode: str,
+    ) -> bool:
+        """发送到钉钉（多账号）"""
+        return self._send_to_multi_accounts(
+            channel_name="钉钉",
+            config_value=self.config["DINGTALK_WEBHOOK_URL"],
+            send_func=lambda url, account_label: send_to_dingtalk(
+                webhook_url=url,
+                report_data=report_data,
+                report_type=report_type,
+                update_info=update_info,
+                proxy_url=proxy_url,
+                mode=mode,
+                account_label=account_label,
+                batch_size=self.config.get("DINGTALK_BATCH_SIZE", 20000),
+                batch_interval=self.config.get("BATCH_SEND_INTERVAL", 1.0),
+                split_content_func=self.split_content_func,
+            ),
+        )
+
+    def _send_wework(
+        self,
+        report_data: Dict,
+        report_type: str,
+        update_info: Optional[Dict],
+        proxy_url: Optional[str],
+        mode: str,
+    ) -> bool:
+        """发送到企业微信（多账号）"""
+        return self._send_to_multi_accounts(
+            channel_name="企业微信",
+            config_value=self.config["WEWORK_WEBHOOK_URL"],
+            send_func=lambda url, account_label: send_to_wework(
+                webhook_url=url,
+                report_data=report_data,
+                report_type=report_type,
+                update_info=update_info,
+                proxy_url=proxy_url,
+                mode=mode,
+                account_label=account_label,
+                batch_size=self.config.get("MESSAGE_BATCH_SIZE", 4000),
+                batch_interval=self.config.get("BATCH_SEND_INTERVAL", 1.0),
+                msg_type=self.config.get("WEWORK_MSG_TYPE", "markdown"),
+                split_content_func=self.split_content_func,
+            ),
+        )
+
+    def _send_telegram(
+        self,
+        report_data: Dict,
+        report_type: str,
+        update_info: Optional[Dict],
+        proxy_url: Optional[str],
+        mode: str,
+    ) -> bool:
+        """发送到 Telegram（多账号，需验证 token 和 chat_id 配对）"""
+        telegram_tokens = parse_multi_account_config(self.config["TELEGRAM_BOT_TOKEN"])
+        telegram_chat_ids = parse_multi_account_config(self.config["TELEGRAM_CHAT_ID"])
+
+        if not telegram_tokens or not telegram_chat_ids:
+            return False
+
+        # 验证配对
+        valid, count = validate_paired_configs(
+            {"bot_token": telegram_tokens, "chat_id": telegram_chat_ids},
+            "Telegram",
+            required_keys=["bot_token", "chat_id"],
+        )
+        if not valid or count == 0:
+            return False
+
+        # 限制账号数量
+        telegram_tokens = limit_accounts(telegram_tokens, self.max_accounts, "Telegram")
+        telegram_chat_ids = telegram_chat_ids[: len(telegram_tokens)]
+
+        results = []
+        for i in range(len(telegram_tokens)):
+            token = telegram_tokens[i]
+            chat_id = telegram_chat_ids[i]
+            if token and chat_id:
+                account_label = f"账号{i+1}" if len(telegram_tokens) > 1 else ""
+                result = send_to_telegram(
+                    bot_token=token,
+                    chat_id=chat_id,
+                    report_data=report_data,
+                    report_type=report_type,
+                    update_info=update_info,
+                    proxy_url=proxy_url,
+                    mode=mode,
+                    account_label=account_label,
+                    batch_size=self.config.get("MESSAGE_BATCH_SIZE", 4000),
+                    batch_interval=self.config.get("BATCH_SEND_INTERVAL", 1.0),
+                    split_content_func=self.split_content_func,
+                )
+                results.append(result)
+
+        return any(results) if results else False
+
+    def _send_ntfy(
+        self,
+        report_data: Dict,
+        report_type: str,
+        update_info: Optional[Dict],
+        proxy_url: Optional[str],
+        mode: str,
+    ) -> bool:
+        """发送到 ntfy（多账号，需验证 topic 和 token 配对）"""
+        ntfy_server_url = self.config["NTFY_SERVER_URL"]
+        ntfy_topics = parse_multi_account_config(self.config["NTFY_TOPIC"])
+        ntfy_tokens = parse_multi_account_config(self.config.get("NTFY_TOKEN", ""))
+
+        if not ntfy_server_url or not ntfy_topics:
+            return False
+
+        # 验证 token 和 topic 数量一致（如果配置了 token）
+        if ntfy_tokens and len(ntfy_tokens) != len(ntfy_topics):
+            print(
+                f"❌ ntfy 配置错误：topic 数量({len(ntfy_topics)})与 token 数量({len(ntfy_tokens)})不一致，跳过 ntfy 推送"
+            )
+            return False
+
+        # 限制账号数量
+        ntfy_topics = limit_accounts(ntfy_topics, self.max_accounts, "ntfy")
+        if ntfy_tokens:
+            ntfy_tokens = ntfy_tokens[: len(ntfy_topics)]
+
+        results = []
+        for i, topic in enumerate(ntfy_topics):
+            if topic:
+                token = get_account_at_index(ntfy_tokens, i, "") if ntfy_tokens else ""
+                account_label = f"账号{i+1}" if len(ntfy_topics) > 1 else ""
+                result = send_to_ntfy(
+                    server_url=ntfy_server_url,
+                    topic=topic,
+                    token=token,
+                    report_data=report_data,
+                    report_type=report_type,
+                    update_info=update_info,
+                    proxy_url=proxy_url,
+                    mode=mode,
+                    account_label=account_label,
+                    batch_size=3800,
+                    split_content_func=self.split_content_func,
+                )
+                results.append(result)
+
+        return any(results) if results else False
+
+    def _send_bark(
+        self,
+        report_data: Dict,
+        report_type: str,
+        update_info: Optional[Dict],
+        proxy_url: Optional[str],
+        mode: str,
+    ) -> bool:
+        """发送到 Bark（多账号）"""
+        return self._send_to_multi_accounts(
+            channel_name="Bark",
+            config_value=self.config["BARK_URL"],
+            send_func=lambda url, account_label: send_to_bark(
+                bark_url=url,
+                report_data=report_data,
+                report_type=report_type,
+                update_info=update_info,
+                proxy_url=proxy_url,
+                mode=mode,
+                account_label=account_label,
+                batch_size=self.config.get("BARK_BATCH_SIZE", 3600),
+                batch_interval=self.config.get("BATCH_SEND_INTERVAL", 1.0),
+                split_content_func=self.split_content_func,
+            ),
+        )
+
+    def _send_slack(
+        self,
+        report_data: Dict,
+        report_type: str,
+        update_info: Optional[Dict],
+        proxy_url: Optional[str],
+        mode: str,
+    ) -> bool:
+        """发送到 Slack（多账号）"""
+        return self._send_to_multi_accounts(
+            channel_name="Slack",
+            config_value=self.config["SLACK_WEBHOOK_URL"],
+            send_func=lambda url, account_label: send_to_slack(
+                webhook_url=url,
+                report_data=report_data,
+                report_type=report_type,
+                update_info=update_info,
+                proxy_url=proxy_url,
+                mode=mode,
+                account_label=account_label,
+                batch_size=self.config.get("SLACK_BATCH_SIZE", 4000),
+                batch_interval=self.config.get("BATCH_SEND_INTERVAL", 1.0),
+                split_content_func=self.split_content_func,
+            ),
+        )
+
+    def _send_email(
+        self,
+        report_type: str,
+        html_file_path: Optional[str],
+    ) -> bool:
+        """发送邮件（保持原有逻辑，已支持多收件人）"""
+        return send_to_email(
+            from_email=self.config["EMAIL_FROM"],
+            password=self.config["EMAIL_PASSWORD"],
+            to_email=self.config["EMAIL_TO"],
+            report_type=report_type,
+            html_file_path=html_file_path,
+            custom_smtp_server=self.config.get("EMAIL_SMTP_SERVER", ""),
+            custom_smtp_port=self.config.get("EMAIL_SMTP_PORT", ""),
+            get_time_func=self.get_time_func,
+        )
@@ -0,0 +1,80 @@
+# coding=utf-8
+"""
+通知内容格式转换模块
+
+提供不同推送平台间的格式转换功能
+"""
+
+import re
+
+
+def strip_markdown(text: str) -> str:
+    """去除文本中的 markdown 语法格式，用于个人微信推送
+
+    Args:
+        text: 包含 markdown 格式的文本
+
+    Returns:
+        纯文本内容
+    """
+    # 去除粗体 **text** 或 __text__
+    text = re.sub(r'\*\*(.+?)\*\*', r'\1', text)
+    text = re.sub(r'__(.+?)__', r'\1', text)
+
+    # 去除斜体 *text* 或 _text_
+    text = re.sub(r'\*(.+?)\*', r'\1', text)
+    text = re.sub(r'_(.+?)_', r'\1', text)
+
+    # 去除删除线 ~~text~~
+    text = re.sub(r'~~(.+?)~~', r'\1', text)
+
+    # 转换链接 [text](url) -> text url（保留 URL）
+    text = re.sub(r'\[([^\]]+)\]\(([^)]+)\)', r'\1 \2', text)
+
+    # 去除图片 ![alt](url) -> alt
+    text = re.sub(r'!\[(.+?)\]\(.+?\)', r'\1', text)
+
+    # 去除行内代码 `code`
+    text = re.sub(r'`(.+?)`', r'\1', text)
+
+    # 去除引用符号 >
+    text = re.sub(r'^>\s*', '', text, flags=re.MULTILINE)
+
+    # 去除标题符号 # ## ### 等
+    text = re.sub(r'^#+\s*', '', text, flags=re.MULTILINE)
+
+    # 去除水平分割线 --- 或 ***
+    text = re.sub(r'^[\-\*]{3,}\s*$', '', text, flags=re.MULTILINE)
+
+    # 去除 HTML 标签 <font color='xxx'>text</font> -> text
+    text = re.sub(r'<font[^>]*>(.+?)</font>', r'\1', text)
+    text = re.sub(r'<[^>]+>', '', text)
+
+    # 清理多余的空行（保留最多两个连续空行）
+    text = re.sub(r'\n{3,}', '\n\n', text)
+
+    return text.strip()
+
+
+def convert_markdown_to_mrkdwn(content: str) -> str:
+    """
+    将标准 Markdown 转换为 Slack 的 mrkdwn 格式
+
+    转换规则：
+    - **粗体** → *粗体*
+    - [文本](url) → <url|文本>
+    - 保留其他格式（代码块、列表等）
+
+    Args:
+        content: Markdown 格式的内容
+
+    Returns:
+        Slack mrkdwn 格式的内容
+    """
+    # 1. 转换链接格式: [文本](url) → <url|文本>
+    content = re.sub(r'\[([^\]]+)\]\(([^)]+)\)', r'<\2|\1>', content)
+
+    # 2. 转换粗体: **文本** → *文本*
+    content = re.sub(r'\*\*([^*]+)\*\*', r'*\1*', content)
+
+    return content
@@ -0,0 +1,109 @@
+# coding=utf-8
+"""
+推送记录管理模块
+
+管理推送记录，支持每日只推送一次和时间窗口控制
+通过 storage_backend 统一存储，支持本地 SQLite 和远程云存储
+"""
+
+from datetime import datetime
+from typing import Callable, Optional, Any
+
+import pytz
+
+
+class PushRecordManager:
+    """
+    推送记录管理器
+
+    通过 storage_backend 统一管理推送记录：
+    - 本地环境：使用 LocalStorageBackend，数据存储在本地 SQLite
+    - GitHub Actions：使用 RemoteStorageBackend，数据存储在云端
+
+    这样 once_per_day 功能在 GitHub Actions 上也能正常工作。
+    """
+
+    def __init__(
+        self,
+        storage_backend: Any,
+        get_time_func: Optional[Callable[[], datetime]] = None,
+    ):
+        """
+        初始化推送记录管理器
+
+        Args:
+            storage_backend: 存储后端实例（LocalStorageBackend 或 RemoteStorageBackend）
+            get_time_func: 获取当前时间的函数（应使用配置的时区）
+        """
+        self.storage_backend = storage_backend
+        self.get_time = get_time_func or self._default_get_time
+
+        print(f"[推送记录] 使用 {storage_backend.backend_name} 存储后端")
+
+    def _default_get_time(self) -> datetime:
+        """默认时间获取函数（UTC+8）"""
+        return datetime.now(pytz.timezone("Asia/Shanghai"))
+
+    def has_pushed_today(self) -> bool:
+        """
+        检查今天是否已经推送过
+
+        Returns:
+            是否已推送
+        """
+        return self.storage_backend.has_pushed_today()
+
+    def record_push(self, report_type: str) -> bool:
+        """
+        记录推送
+
+        Args:
+            report_type: 报告类型
+
+        Returns:
+            是否记录成功
+        """
+        return self.storage_backend.record_push(report_type)
+
+    def is_in_time_range(self, start_time: str, end_time: str) -> bool:
+        """
+        检查当前时间是否在指定时间范围内
+
+        Args:
+            start_time: 开始时间（格式：HH:MM）
+            end_time: 结束时间（格式：HH:MM）
+
+        Returns:
+            是否在时间范围内
+        """
+        now = self.get_time()
+        current_time = now.strftime("%H:%M")
+
+        def normalize_time(time_str: str) -> str:
+            """将时间字符串标准化为 HH:MM 格式"""
+            try:
+                parts = time_str.strip().split(":")
+                if len(parts) != 2:
+                    raise ValueError(f"时间格式错误: {time_str}")
+
+                hour = int(parts[0])
+                minute = int(parts[1])
+
+                if not (0 <= hour <= 23 and 0 <= minute <= 59):
+                    raise ValueError(f"时间范围错误: {time_str}")
+
+                return f"{hour:02d}:{minute:02d}"
+            except Exception as e:
+                print(f"时间格式化错误 '{time_str}': {e}")
+                return time_str
+
+        normalized_start = normalize_time(start_time)
+        normalized_end = normalize_time(end_time)
+        normalized_current = normalize_time(current_time)
+
+        result = normalized_start <= normalized_current <= normalized_end
+
+        if not result:
+            print(f"时间窗口判断：当前 {normalized_current}，窗口 {normalized_start}-{normalized_end}")
+
+        return result
@@ -0,0 +1,260 @@
+# coding=utf-8
+"""
+通知内容渲染模块
+
+提供多平台通知内容渲染功能，生成格式化的推送消息
+"""
+
+from datetime import datetime
+from typing import Dict, List, Optional, Callable
+
+from trendradar.report.formatter import format_title_for_platform
+
+
+def render_feishu_content(
+    report_data: Dict,
+    update_info: Optional[Dict] = None,
+    mode: str = "daily",
+    separator: str = "---",
+    reverse_content_order: bool = False,
+    get_time_func: Optional[Callable[[], datetime]] = None,
+) -> str:
+    """渲染飞书通知内容
+
+    Args:
+        report_data: 报告数据字典，包含 stats, new_titles, failed_ids, total_new_count
+        update_info: 版本更新信息（可选）
+        mode: 报告模式 ("daily", "incremental", "current")
+        separator: 内容分隔符
+        reverse_content_order: 是否反转内容顺序（新增在前）
+        get_time_func: 获取当前时间的函数（可选，默认使用 datetime.now()）
+
+    Returns:
+        格式化的飞书消息内容
+    """
+    # 生成热点词汇统计部分
+    stats_content = ""
+    if report_data["stats"]:
+        stats_content += "📊 **热点词汇统计**\n\n"
+
+        total_count = len(report_data["stats"])
+
+        for i, stat in enumerate(report_data["stats"]):
+            word = stat["word"]
+            count = stat["count"]
+
+            sequence_display = f"<font color='grey'>[{i + 1}/{total_count}]</font>"
+
+            if count >= 10:
+                stats_content += f"🔥 {sequence_display} **{word}** : <font color='red'>{count}</font> 条\n\n"
+            elif count >= 5:
+                stats_content += f"📈 {sequence_display} **{word}** : <font color='orange'>{count}</font> 条\n\n"
+            else:
+                stats_content += f"📌 {sequence_display} **{word}** : {count} 条\n\n"
+
+            for j, title_data in enumerate(stat["titles"], 1):
+                formatted_title = format_title_for_platform(
+                    "feishu", title_data, show_source=True
+                )
+                stats_content += f"  {j}. {formatted_title}\n"
+
+                if j < len(stat["titles"]):
+                    stats_content += "\n"
+
+            if i < len(report_data["stats"]) - 1:
+                stats_content += f"\n{separator}\n\n"
+
+    # 生成新增新闻部分
+    new_titles_content = ""
+    if report_data["new_titles"]:
+        new_titles_content += (
+            f"🆕 **本次新增热点新闻** (共 {report_data['total_new_count']} 条)\n\n"
+        )
+
+        for source_data in report_data["new_titles"]:
+            new_titles_content += (
+                f"**{source_data['source_name']}** ({len(source_data['titles'])} 条):\n"
+            )
+
+            for j, title_data in enumerate(source_data["titles"], 1):
+                title_data_copy = title_data.copy()
+                title_data_copy["is_new"] = False
+                formatted_title = format_title_for_platform(
+                    "feishu", title_data_copy, show_source=False
+                )
+                new_titles_content += f"  {j}. {formatted_title}\n"
+
+            new_titles_content += "\n"
+
+    # 根据配置决定内容顺序
+    text_content = ""
+    if reverse_content_order:
+        # 新增热点在前，热点词汇统计在后
+        if new_titles_content:
+            text_content += new_titles_content
+            if stats_content:
+                text_content += f"\n{separator}\n\n"
+        if stats_content:
+            text_content += stats_content
+    else:
+        # 默认：热点词汇统计在前，新增热点在后
+        if stats_content:
+            text_content += stats_content
+            if new_titles_content:
+                text_content += f"\n{separator}\n\n"
+        if new_titles_content:
+            text_content += new_titles_content
+
+    if not text_content:
+        if mode == "incremental":
+            mode_text = "增量模式下暂无新增匹配的热点词汇"
+        elif mode == "current":
+            mode_text = "当前榜单模式下暂无匹配的热点词汇"
+        else:
+            mode_text = "暂无匹配的热点词汇"
+        text_content = f"📭 {mode_text}\n\n"
+
+    if report_data["failed_ids"]:
+        if text_content and "暂无匹配" not in text_content:
+            text_content += f"\n{separator}\n\n"
+
+        text_content += "⚠️ **数据获取失败的平台：**\n\n"
+        for i, id_value in enumerate(report_data["failed_ids"], 1):
+            text_content += f"  • <font color='red'>{id_value}</font>\n"
+
+    # 获取当前时间
+    now = get_time_func() if get_time_func else datetime.now()
+    text_content += (
+        f"\n\n<font color='grey'>更新时间：{now.strftime('%Y-%m-%d %H:%M:%S')}</font>"
+    )
+
+    if update_info:
+        text_content += f"\n<font color='grey'>TrendRadar 发现新版本 {update_info['remote_version']}，当前 {update_info['current_version']}</font>"
+
+    return text_content
+
+
+def render_dingtalk_content(
+    report_data: Dict,
+    update_info: Optional[Dict] = None,
+    mode: str = "daily",
+    reverse_content_order: bool = False,
+    get_time_func: Optional[Callable[[], datetime]] = None,
+) -> str:
+    """渲染钉钉通知内容
+
+    Args:
+        report_data: 报告数据字典，包含 stats, new_titles, failed_ids, total_new_count
+        update_info: 版本更新信息（可选）
+        mode: 报告模式 ("daily", "incremental", "current")
+        reverse_content_order: 是否反转内容顺序（新增在前）
+        get_time_func: 获取当前时间的函数（可选，默认使用 datetime.now()）
+
+    Returns:
+        格式化的钉钉消息内容
+    """
+    total_titles = sum(
+        len(stat["titles"]) for stat in report_data["stats"] if stat["count"] > 0
+    )
+    now = get_time_func() if get_time_func else datetime.now()
+
+    # 头部信息
+    header_content = f"**总新闻数：** {total_titles}\n\n"
+    header_content += f"**时间：** {now.strftime('%Y-%m-%d %H:%M:%S')}\n\n"
+    header_content += "**类型：** 热点分析报告\n\n"
+    header_content += "---\n\n"
+
+    # 生成热点词汇统计部分
+    stats_content = ""
+    if report_data["stats"]:
+        stats_content += "📊 **热点词汇统计**\n\n"
+
+        total_count = len(report_data["stats"])
+
+        for i, stat in enumerate(report_data["stats"]):
+            word = stat["word"]
+            count = stat["count"]
+
+            sequence_display = f"[{i + 1}/{total_count}]"
+
+            if count >= 10:
+                stats_content += f"🔥 {sequence_display} **{word}** : **{count}** 条\n\n"
+            elif count >= 5:
+                stats_content += f"📈 {sequence_display} **{word}** : **{count}** 条\n\n"
+            else:
+                stats_content += f"📌 {sequence_display} **{word}** : {count} 条\n\n"
+
+            for j, title_data in enumerate(stat["titles"], 1):
+                formatted_title = format_title_for_platform(
+                    "dingtalk", title_data, show_source=True
+                )
+                stats_content += f"  {j}. {formatted_title}\n"
+
+                if j < len(stat["titles"]):
+                    stats_content += "\n"
+
+            if i < len(report_data["stats"]) - 1:
+                stats_content += "\n---\n\n"
+
+    # 生成新增新闻部分
+    new_titles_content = ""
+    if report_data["new_titles"]:
+        new_titles_content += (
+            f"🆕 **本次新增热点新闻** (共 {report_data['total_new_count']} 条)\n\n"
+        )
+
+        for source_data in report_data["new_titles"]:
+            new_titles_content += f"**{source_data['source_name']}** ({len(source_data['titles'])} 条):\n\n"
+
+            for j, title_data in enumerate(source_data["titles"], 1):
+                title_data_copy = title_data.copy()
+                title_data_copy["is_new"] = False
+                formatted_title = format_title_for_platform(
+                    "dingtalk", title_data_copy, show_source=False
+                )
+                new_titles_content += f"  {j}. {formatted_title}\n"
+
+            new_titles_content += "\n"
+
+    # 根据配置决定内容顺序
+    text_content = header_content
+    if reverse_content_order:
+        # 新增热点在前，热点词汇统计在后
+        if new_titles_content:
+            text_content += new_titles_content
+            if stats_content:
+                text_content += "\n---\n\n"
+        if stats_content:
+            text_content += stats_content
+    else:
+        # 默认：热点词汇统计在前，新增热点在后
+        if stats_content:
+            text_content += stats_content
+            if new_titles_content:
+                text_content += "\n---\n\n"
+        if new_titles_content:
+            text_content += new_titles_content
+
+    if not stats_content and not new_titles_content:
+        if mode == "incremental":
+            mode_text = "增量模式下暂无新增匹配的热点词汇"
+        elif mode == "current":
+            mode_text = "当前榜单模式下暂无匹配的热点词汇"
+        else:
+            mode_text = "暂无匹配的热点词汇"
+        text_content += f"📭 {mode_text}\n\n"
+
+    if report_data["failed_ids"]:
+        if "暂无匹配" not in text_content:
+            text_content += "\n---\n\n"
+
+        text_content += "⚠️ **数据获取失败的平台：**\n\n"
+        for i, id_value in enumerate(report_data["failed_ids"], 1):
+            text_content += f"  • **{id_value}**\n"
+
+    text_content += f"\n\n> 更新时间：{now.strftime('%Y-%m-%d %H:%M:%S')}"
+
+    if update_info:
+        text_content += f"\n> TrendRadar 发现新版本 **{update_info['remote_version']}**，当前 **{update_info['current_version']}**"
+
+    return text_content
@@ -0,0 +1,580 @@
+# coding=utf-8
+"""
+消息分批处理模块
+
+提供消息内容分批拆分功能，确保消息大小不超过各平台限制
+"""
+
+from datetime import datetime
+from typing import Dict, List, Optional, Callable
+
+from trendradar.report.formatter import format_title_for_platform
+
+
+# 默认批次大小配置
+DEFAULT_BATCH_SIZES = {
+    "dingtalk": 20000,
+    "feishu": 29000,
+    "ntfy": 3800,
+    "default": 4000,
+}
+
+
+def split_content_into_batches(
+    report_data: Dict,
+    format_type: str,
+    update_info: Optional[Dict] = None,
+    max_bytes: Optional[int] = None,
+    mode: str = "daily",
+    batch_sizes: Optional[Dict[str, int]] = None,
+    feishu_separator: str = "---",
+    reverse_content_order: bool = False,
+    get_time_func: Optional[Callable[[], datetime]] = None,
+) -> List[str]:
+    """分批处理消息内容，确保词组标题+至少第一条新闻的完整性
+
+    Args:
+        report_data: 报告数据字典，包含 stats, new_titles, failed_ids, total_new_count
+        format_type: 格式类型 (feishu, dingtalk, wework, telegram, ntfy, bark, slack)
+        update_info: 版本更新信息（可选）
+        max_bytes: 最大字节数（可选，如果不指定则使用默认配置）
+        mode: 报告模式 (daily, incremental, current)
+        batch_sizes: 批次大小配置字典（可选）
+        feishu_separator: 飞书消息分隔符
+        reverse_content_order: 是否反转内容顺序（新增在前）
+        get_time_func: 获取当前时间的函数（可选）
+
+    Returns:
+        分批后的消息内容列表
+    """
+    # 合并批次大小配置
+    sizes = {**DEFAULT_BATCH_SIZES, **(batch_sizes or {})}
+
+    if max_bytes is None:
+        if format_type == "dingtalk":
+            max_bytes = sizes.get("dingtalk", 20000)
+        elif format_type == "feishu":
+            max_bytes = sizes.get("feishu", 29000)
+        elif format_type == "ntfy":
+            max_bytes = sizes.get("ntfy", 3800)
+        else:
+            max_bytes = sizes.get("default", 4000)
+
+    batches = []
+
+    total_titles = sum(
+        len(stat["titles"]) for stat in report_data["stats"] if stat["count"] > 0
+    )
+    now = get_time_func() if get_time_func else datetime.now()
+
+    base_header = ""
+    if format_type in ("wework", "bark"):
+        base_header = f"**总新闻数：** {total_titles}\n\n\n\n"
+    elif format_type == "telegram":
+        base_header = f"总新闻数： {total_titles}\n\n"
+    elif format_type == "ntfy":
+        base_header = f"**总新闻数：** {total_titles}\n\n"
+    elif format_type == "feishu":
+        base_header = ""
+    elif format_type == "dingtalk":
+        base_header = f"**总新闻数：** {total_titles}\n\n"
+        base_header += f"**时间：** {now.strftime('%Y-%m-%d %H:%M:%S')}\n\n"
+        base_header += f"**类型：** 热点分析报告\n\n"
+        base_header += "---\n\n"
+    elif format_type == "slack":
+        base_header = f"*总新闻数：* {total_titles}\n\n"
+
+    base_footer = ""
+    if format_type in ("wework", "bark"):
+        base_footer = f"\n\n\n> 更新时间：{now.strftime('%Y-%m-%d %H:%M:%S')}"
+        if update_info:
+            base_footer += f"\n> TrendRadar 发现新版本 **{update_info['remote_version']}**，当前 **{update_info['current_version']}**"
+    elif format_type == "telegram":
+        base_footer = f"\n\n更新时间：{now.strftime('%Y-%m-%d %H:%M:%S')}"
+        if update_info:
+            base_footer += f"\nTrendRadar 发现新版本 {update_info['remote_version']}，当前 {update_info['current_version']}"
+    elif format_type == "ntfy":
+        base_footer = f"\n\n> 更新时间：{now.strftime('%Y-%m-%d %H:%M:%S')}"
+        if update_info:
+            base_footer += f"\n> TrendRadar 发现新版本 **{update_info['remote_version']}**，当前 **{update_info['current_version']}**"
+    elif format_type == "feishu":
+        base_footer = f"\n\n<font color='grey'>更新时间：{now.strftime('%Y-%m-%d %H:%M:%S')}</font>"
+        if update_info:
+            base_footer += f"\n<font color='grey'>TrendRadar 发现新版本 {update_info['remote_version']}，当前 {update_info['current_version']}</font>"
+    elif format_type == "dingtalk":
+        base_footer = f"\n\n> 更新时间：{now.strftime('%Y-%m-%d %H:%M:%S')}"
+        if update_info:
+            base_footer += f"\n> TrendRadar 发现新版本 **{update_info['remote_version']}**，当前 **{update_info['current_version']}**"
+    elif format_type == "slack":
+        base_footer = f"\n\n_更新时间：{now.strftime('%Y-%m-%d %H:%M:%S')}_"
+        if update_info:
+            base_footer += f"\n_TrendRadar 发现新版本 *{update_info['remote_version']}*，当前 *{update_info['current_version']}_"
+
+    stats_header = ""
+    if report_data["stats"]:
+        if format_type in ("wework", "bark"):
+            stats_header = f"📊 **热点词汇统计**\n\n"
+        elif format_type == "telegram":
+            stats_header = f"📊 热点词汇统计\n\n"
+        elif format_type == "ntfy":
+            stats_header = f"📊 **热点词汇统计**\n\n"
+        elif format_type == "feishu":
+            stats_header = f"📊 **热点词汇统计**\n\n"
+        elif format_type == "dingtalk":
+            stats_header = f"📊 **热点词汇统计**\n\n"
+        elif format_type == "slack":
+            stats_header = f"📊 *热点词汇统计*\n\n"
+
+    current_batch = base_header
+    current_batch_has_content = False
+
+    if (
+        not report_data["stats"]
+        and not report_data["new_titles"]
+        and not report_data["failed_ids"]
+    ):
+        if mode == "incremental":
+            mode_text = "增量模式下暂无新增匹配的热点词汇"
+        elif mode == "current":
+            mode_text = "当前榜单模式下暂无匹配的热点词汇"
+        else:
+            mode_text = "暂无匹配的热点词汇"
+        simple_content = f"📭 {mode_text}\n\n"
+        final_content = base_header + simple_content + base_footer
+        batches.append(final_content)
+        return batches
+
+    # 定义处理热点词汇统计的函数
+    def process_stats_section(current_batch, current_batch_has_content, batches):
+        """处理热点词汇统计"""
+        if not report_data["stats"]:
+            return current_batch, current_batch_has_content, batches
+
+        total_count = len(report_data["stats"])
+
+        # 添加统计标题
+        test_content = current_batch + stats_header
+        if (
+            len(test_content.encode("utf-8")) + len(base_footer.encode("utf-8"))
+            < max_bytes
+        ):
+            current_batch = test_content
+            current_batch_has_content = True
+        else:
+            if current_batch_has_content:
+                batches.append(current_batch + base_footer)
+            current_batch = base_header + stats_header
+            current_batch_has_content = True
+
+        # 逐个处理词组（确保词组标题+第一条新闻的原子性）
+        for i, stat in enumerate(report_data["stats"]):
+            word = stat["word"]
+            count = stat["count"]
+            sequence_display = f"[{i + 1}/{total_count}]"
+
+            # 构建词组标题
+            word_header = ""
+            if format_type in ("wework", "bark"):
+                if count >= 10:
+                    word_header = (
+                        f"🔥 {sequence_display} **{word}** : **{count}** 条\n\n"
+                    )
+                elif count >= 5:
+                    word_header = (
+                        f"📈 {sequence_display} **{word}** : **{count}** 条\n\n"
+                    )
+                else:
+                    word_header = f"📌 {sequence_display} **{word}** : {count} 条\n\n"
+            elif format_type == "telegram":
+                if count >= 10:
+                    word_header = f"🔥 {sequence_display} {word} : {count} 条\n\n"
+                elif count >= 5:
+                    word_header = f"📈 {sequence_display} {word} : {count} 条\n\n"
+                else:
+                    word_header = f"📌 {sequence_display} {word} : {count} 条\n\n"
+            elif format_type == "ntfy":
+                if count >= 10:
+                    word_header = (
+                        f"🔥 {sequence_display} **{word}** : **{count}** 条\n\n"
+                    )
+                elif count >= 5:
+                    word_header = (
+                        f"📈 {sequence_display} **{word}** : **{count}** 条\n\n"
+                    )
+                else:
+                    word_header = f"📌 {sequence_display} **{word}** : {count} 条\n\n"
+            elif format_type == "feishu":
+                if count >= 10:
+                    word_header = f"🔥 <font color='grey'>{sequence_display}</font> **{word}** : <font color='red'>{count}</font> 条\n\n"
+                elif count >= 5:
+                    word_header = f"📈 <font color='grey'>{sequence_display}</font> **{word}** : <font color='orange'>{count}</font> 条\n\n"
+                else:
+                    word_header = f"📌 <font color='grey'>{sequence_display}</font> **{word}** : {count} 条\n\n"
+            elif format_type == "dingtalk":
+                if count >= 10:
+                    word_header = (
+                        f"🔥 {sequence_display} **{word}** : **{count}** 条\n\n"
+                    )
+                elif count >= 5:
+                    word_header = (
+                        f"📈 {sequence_display} **{word}** : **{count}** 条\n\n"
+                    )
+                else:
+                    word_header = f"📌 {sequence_display} **{word}** : {count} 条\n\n"
+            elif format_type == "slack":
+                if count >= 10:
+                    word_header = (
+                        f"🔥 {sequence_display} *{word}* : *{count}* 条\n\n"
+                    )
+                elif count >= 5:
+                    word_header = (
+                        f"📈 {sequence_display} *{word}* : *{count}* 条\n\n"
+                    )
+                else:
+                    word_header = f"📌 {sequence_display} *{word}* : {count} 条\n\n"
+
+            # 构建第一条新闻
+            first_news_line = ""
+            if stat["titles"]:
+                first_title_data = stat["titles"][0]
+                if format_type in ("wework", "bark"):
+                    formatted_title = format_title_for_platform(
+                        "wework", first_title_data, show_source=True
+                    )
+                elif format_type == "telegram":
+                    formatted_title = format_title_for_platform(
+                        "telegram", first_title_data, show_source=True
+                    )
+                elif format_type == "ntfy":
+                    formatted_title = format_title_for_platform(
+                        "ntfy", first_title_data, show_source=True
+                    )
+                elif format_type == "feishu":
+                    formatted_title = format_title_for_platform(
+                        "feishu", first_title_data, show_source=True
+                    )
+                elif format_type == "dingtalk":
+                    formatted_title = format_title_for_platform(
+                        "dingtalk", first_title_data, show_source=True
+                    )
+                elif format_type == "slack":
+                    formatted_title = format_title_for_platform(
+                        "slack", first_title_data, show_source=True
+                    )
+                else:
+                    formatted_title = f"{first_title_data['title']}"
+
+                first_news_line = f"  1. {formatted_title}\n"
+                if len(stat["titles"]) > 1:
+                    first_news_line += "\n"
+
+            # 原子性检查：词组标题+第一条新闻必须一起处理
+            word_with_first_news = word_header + first_news_line
+            test_content = current_batch + word_with_first_news
+
+            if (
+                len(test_content.encode("utf-8")) + len(base_footer.encode("utf-8"))
+                >= max_bytes
+            ):
+                # 当前批次容纳不下，开启新批次
+                if current_batch_has_content:
+                    batches.append(current_batch + base_footer)
+                current_batch = base_header + stats_header + word_with_first_news
+                current_batch_has_content = True
+                start_index = 1
+            else:
+                current_batch = test_content
+                current_batch_has_content = True
+                start_index = 1
+
+            # 处理剩余新闻条目
+            for j in range(start_index, len(stat["titles"])):
+                title_data = stat["titles"][j]
+                if format_type in ("wework", "bark"):
+                    formatted_title = format_title_for_platform(
+                        "wework", title_data, show_source=True
+                    )
+                elif format_type == "telegram":
+                    formatted_title = format_title_for_platform(
+                        "telegram", title_data, show_source=True
+                    )
+                elif format_type == "ntfy":
+                    formatted_title = format_title_for_platform(
+                        "ntfy", title_data, show_source=True
+                    )
+                elif format_type == "feishu":
+                    formatted_title = format_title_for_platform(
+                        "feishu", title_data, show_source=True
+                    )
+                elif format_type == "dingtalk":
+                    formatted_title = format_title_for_platform(
+                        "dingtalk", title_data, show_source=True
+                    )
+                elif format_type == "slack":
+                    formatted_title = format_title_for_platform(
+                        "slack", title_data, show_source=True
+                    )
+                else:
+                    formatted_title = f"{title_data['title']}"
+
+                news_line = f"  {j + 1}. {formatted_title}\n"
+                if j < len(stat["titles"]) - 1:
+                    news_line += "\n"
+
+                test_content = current_batch + news_line
+                if (
+                    len(test_content.encode("utf-8")) + len(base_footer.encode("utf-8"))
+                    >= max_bytes
+                ):
+                    if current_batch_has_content:
+                        batches.append(current_batch + base_footer)
+                    current_batch = base_header + stats_header + word_header + news_line
+                    current_batch_has_content = True
+                else:
+                    current_batch = test_content
+                    current_batch_has_content = True
+
+            # 词组间分隔符
+            if i < len(report_data["stats"]) - 1:
+                separator = ""
+                if format_type in ("wework", "bark"):
+                    separator = f"\n\n\n\n"
+                elif format_type == "telegram":
+                    separator = f"\n\n"
+                elif format_type == "ntfy":
+                    separator = f"\n\n"
+                elif format_type == "feishu":
+                    separator = f"\n{feishu_separator}\n\n"
+                elif format_type == "dingtalk":
+                    separator = f"\n---\n\n"
+                elif format_type == "slack":
+                    separator = f"\n\n"
+
+                test_content = current_batch + separator
+                if (
+                    len(test_content.encode("utf-8")) + len(base_footer.encode("utf-8"))
+                    < max_bytes
+                ):
+                    current_batch = test_content
+
+        return current_batch, current_batch_has_content, batches
+
+    # 定义处理新增新闻的函数
+    def process_new_titles_section(current_batch, current_batch_has_content, batches):
+        """处理新增新闻"""
+        if not report_data["new_titles"]:
+            return current_batch, current_batch_has_content, batches
+
+        new_header = ""
+        if format_type in ("wework", "bark"):
+            new_header = f"\n\n\n\n🆕 **本次新增热点新闻** (共 {report_data['total_new_count']} 条)\n\n"
+        elif format_type == "telegram":
+            new_header = (
+                f"\n\n🆕 本次新增热点新闻 (共 {report_data['total_new_count']} 条)\n\n"
+            )
+        elif format_type == "ntfy":
+            new_header = f"\n\n🆕 **本次新增热点新闻** (共 {report_data['total_new_count']} 条)\n\n"
+        elif format_type == "feishu":
+            new_header = f"\n{feishu_separator}\n\n🆕 **本次新增热点新闻** (共 {report_data['total_new_count']} 条)\n\n"
+        elif format_type == "dingtalk":
+            new_header = f"\n---\n\n🆕 **本次新增热点新闻** (共 {report_data['total_new_count']} 条)\n\n"
+        elif format_type == "slack":
+            new_header = f"\n\n🆕 *本次新增热点新闻* (共 {report_data['total_new_count']} 条)\n\n"
+
+        test_content = current_batch + new_header
+        if (
+            len(test_content.encode("utf-8")) + len(base_footer.encode("utf-8"))
+            >= max_bytes
+        ):
+            if current_batch_has_content:
+                batches.append(current_batch + base_footer)
+            current_batch = base_header + new_header
+            current_batch_has_content = True
+        else:
+            current_batch = test_content
+            current_batch_has_content = True
+
+        # 逐个处理新增新闻来源
+        for source_data in report_data["new_titles"]:
+            source_header = ""
+            if format_type in ("wework", "bark"):
+                source_header = f"**{source_data['source_name']}** ({len(source_data['titles'])} 条):\n\n"
+            elif format_type == "telegram":
+                source_header = f"{source_data['source_name']} ({len(source_data['titles'])} 条):\n\n"
+            elif format_type == "ntfy":
+                source_header = f"**{source_data['source_name']}** ({len(source_data['titles'])} 条):\n\n"
+            elif format_type == "feishu":
+                source_header = f"**{source_data['source_name']}** ({len(source_data['titles'])} 条):\n\n"
+            elif format_type == "dingtalk":
+                source_header = f"**{source_data['source_name']}** ({len(source_data['titles'])} 条):\n\n"
+            elif format_type == "slack":
+                source_header = f"*{source_data['source_name']}* ({len(source_data['titles'])} 条):\n\n"
+
+            # 构建第一条新增新闻
+            first_news_line = ""
+            if source_data["titles"]:
+                first_title_data = source_data["titles"][0]
+                title_data_copy = first_title_data.copy()
+                title_data_copy["is_new"] = False
+
+                if format_type in ("wework", "bark"):
+                    formatted_title = format_title_for_platform(
+                        "wework", title_data_copy, show_source=False
+                    )
+                elif format_type == "telegram":
+                    formatted_title = format_title_for_platform(
+                        "telegram", title_data_copy, show_source=False
+                    )
+                elif format_type == "feishu":
+                    formatted_title = format_title_for_platform(
+                        "feishu", title_data_copy, show_source=False
+                    )
+                elif format_type == "dingtalk":
+                    formatted_title = format_title_for_platform(
+                        "dingtalk", title_data_copy, show_source=False
+                    )
+                elif format_type == "slack":
+                    formatted_title = format_title_for_platform(
+                        "slack", title_data_copy, show_source=False
+                    )
+                else:
+                    formatted_title = f"{title_data_copy['title']}"
+
+                first_news_line = f"  1. {formatted_title}\n"
+
+            # 原子性检查：来源标题+第一条新闻
+            source_with_first_news = source_header + first_news_line
+            test_content = current_batch + source_with_first_news
+
+            if (
+                len(test_content.encode("utf-8")) + len(base_footer.encode("utf-8"))
+                >= max_bytes
+            ):
+                if current_batch_has_content:
+                    batches.append(current_batch + base_footer)
+                current_batch = base_header + new_header + source_with_first_news
+                current_batch_has_content = True
+                start_index = 1
+            else:
+                current_batch = test_content
+                current_batch_has_content = True
+                start_index = 1
+
+            # 处理剩余新增新闻
+            for j in range(start_index, len(source_data["titles"])):
+                title_data = source_data["titles"][j]
+                title_data_copy = title_data.copy()
+                title_data_copy["is_new"] = False
+
+                if format_type == "wework":
+                    formatted_title = format_title_for_platform(
+                        "wework", title_data_copy, show_source=False
+                    )
+                elif format_type == "telegram":
+                    formatted_title = format_title_for_platform(
+                        "telegram", title_data_copy, show_source=False
+                    )
+                elif format_type == "feishu":
+                    formatted_title = format_title_for_platform(
+                        "feishu", title_data_copy, show_source=False
+                    )
+                elif format_type == "dingtalk":
+                    formatted_title = format_title_for_platform(
+                        "dingtalk", title_data_copy, show_source=False
+                    )
+                elif format_type == "slack":
+                    formatted_title = format_title_for_platform(
+                        "slack", title_data_copy, show_source=False
+                    )
+                else:
+                    formatted_title = f"{title_data_copy['title']}"
+
+                news_line = f"  {j + 1}. {formatted_title}\n"
+
+                test_content = current_batch + news_line
+                if (
+                    len(test_content.encode("utf-8")) + len(base_footer.encode("utf-8"))
+                    >= max_bytes
+                ):
+                    if current_batch_has_content:
+                        batches.append(current_batch + base_footer)
+                    current_batch = base_header + new_header + source_header + news_line
+                    current_batch_has_content = True
+                else:
+                    current_batch = test_content
+                    current_batch_has_content = True
+
+            current_batch += "\n"
+
+        return current_batch, current_batch_has_content, batches
+
+    # 根据配置决定处理顺序
+    if reverse_content_order:
+        # 新增热点在前，热点词汇统计在后
+        current_batch, current_batch_has_content, batches = process_new_titles_section(
+            current_batch, current_batch_has_content, batches
+        )
+        current_batch, current_batch_has_content, batches = process_stats_section(
+            current_batch, current_batch_has_content, batches
+        )
+    else:
+        # 默认：热点词汇统计在前，新增热点在后
+        current_batch, current_batch_has_content, batches = process_stats_section(
+            current_batch, current_batch_has_content, batches
+        )
+        current_batch, current_batch_has_content, batches = process_new_titles_section(
+            current_batch, current_batch_has_content, batches
+        )
+
+    if report_data["failed_ids"]:
+        failed_header = ""
+        if format_type == "wework":
+            failed_header = f"\n\n\n\n⚠️ **数据获取失败的平台：**\n\n"
+        elif format_type == "telegram":
+            failed_header = f"\n\n⚠️ 数据获取失败的平台：\n\n"
+        elif format_type == "ntfy":
+            failed_header = f"\n\n⚠️ **数据获取失败的平台：**\n\n"
+        elif format_type == "feishu":
+            failed_header = f"\n{feishu_separator}\n\n⚠️ **数据获取失败的平台：**\n\n"
+        elif format_type == "dingtalk":
+            failed_header = f"\n---\n\n⚠️ **数据获取失败的平台：**\n\n"
+
+        test_content = current_batch + failed_header
+        if (
+            len(test_content.encode("utf-8")) + len(base_footer.encode("utf-8"))
+            >= max_bytes
+        ):
+            if current_batch_has_content:
+                batches.append(current_batch + base_footer)
+            current_batch = base_header + failed_header
+            current_batch_has_content = True
+        else:
+            current_batch = test_content
+            current_batch_has_content = True
+
+        for i, id_value in enumerate(report_data["failed_ids"], 1):
+            if format_type == "feishu":
+                failed_line = f"  • <font color='red'>{id_value}</font>\n"
+            elif format_type == "dingtalk":
+                failed_line = f"  • **{id_value}**\n"
+            else:
+                failed_line = f"  • {id_value}\n"
+
+            test_content = current_batch + failed_line
+            if (
+                len(test_content.encode("utf-8")) + len(base_footer.encode("utf-8"))
+                >= max_bytes
+            ):
+                if current_batch_has_content:
+                    batches.append(current_batch + base_footer)
+                current_batch = base_header + failed_header + failed_line
+                current_batch_has_content = True
+            else:
+                current_batch = test_content
+                current_batch_has_content = True
+
+    # 完成最后批次
+    if current_batch_has_content:
+        batches.append(current_batch + base_footer)
+
+    return batches
@@ -0,0 +1,40 @@
+# coding=utf-8
+"""
+报告生成模块
+
+提供报告生成和格式化功能，包括：
+- HTML 报告生成
+- 标题格式化工具
+
+模块结构：
+- helpers: 报告辅助函数（清理、转义、格式化）
+- formatter: 平台标题格式化
+- html: HTML 报告渲染
+- generator: 报告生成器
+"""
+
+from trendradar.report.helpers import (
+    clean_title,
+    html_escape,
+    format_rank_display,
+)
+from trendradar.report.formatter import format_title_for_platform
+from trendradar.report.html import render_html_content
+from trendradar.report.generator import (
+    prepare_report_data,
+    generate_html_report,
+)
+
+__all__ = [
+    # 辅助函数
+    "clean_title",
+    "html_escape",
+    "format_rank_display",
+    # 格式化函数
+    "format_title_for_platform",
+    # HTML 渲染
+    "render_html_content",
+    # 报告生成器
+    "prepare_report_data",
+    "generate_html_report",
+]
@@ -0,0 +1,223 @@
+# coding=utf-8
+"""
+平台标题格式化模块
+
+提供多平台标题格式化功能
+"""
+
+from typing import Dict
+
+from trendradar.report.helpers import clean_title, html_escape, format_rank_display
+
+
+def format_title_for_platform(
+    platform: str, title_data: Dict, show_source: bool = True
+) -> str:
+    """统一的标题格式化方法
+
+    为不同平台生成对应格式的标题字符串。
+
+    Args:
+        platform: 目标平台，支持:
+            - "feishu": 飞书
+            - "dingtalk": 钉钉
+            - "wework": 企业微信
+            - "bark": Bark
+            - "telegram": Telegram
+            - "ntfy": ntfy
+            - "slack": Slack
+            - "html": HTML 报告
+        title_data: 标题数据字典，包含以下字段:
+            - title: 标题文本
+            - source_name: 来源名称
+            - time_display: 时间显示
+            - count: 出现次数
+            - ranks: 排名列表
+            - rank_threshold: 高亮阈值
+            - url: PC端链接
+            - mobile_url: 移动端链接（优先使用）
+            - is_new: 是否为新增标题（可选）
+        show_source: 是否显示来源名称
+
+    Returns:
+        格式化后的标题字符串
+    """
+    rank_display = format_rank_display(
+        title_data["ranks"], title_data["rank_threshold"], platform
+    )
+
+    link_url = title_data["mobile_url"] or title_data["url"]
+    cleaned_title = clean_title(title_data["title"])
+
+    if platform == "feishu":
+        if link_url:
+            formatted_title = f"[{cleaned_title}]({link_url})"
+        else:
+            formatted_title = cleaned_title
+
+        title_prefix = "🆕 " if title_data.get("is_new") else ""
+
+        if show_source:
+            result = f"<font color='grey'>[{title_data['source_name']}]</font> {title_prefix}{formatted_title}"
+        else:
+            result = f"{title_prefix}{formatted_title}"
+
+        if rank_display:
+            result += f" {rank_display}"
+        if title_data["time_display"]:
+            result += f" <font color='grey'>- {title_data['time_display']}</font>"
+        if title_data["count"] > 1:
+            result += f" <font color='green'>({title_data['count']}次)</font>"
+
+        return result
+
+    elif platform == "dingtalk":
+        if link_url:
+            formatted_title = f"[{cleaned_title}]({link_url})"
+        else:
+            formatted_title = cleaned_title
+
+        title_prefix = "🆕 " if title_data.get("is_new") else ""
+
+        if show_source:
+            result = f"[{title_data['source_name']}] {title_prefix}{formatted_title}"
+        else:
+            result = f"{title_prefix}{formatted_title}"
+
+        if rank_display:
+            result += f" {rank_display}"
+        if title_data["time_display"]:
+            result += f" - {title_data['time_display']}"
+        if title_data["count"] > 1:
+            result += f" ({title_data['count']}次)"
+
+        return result
+
+    elif platform in ("wework", "bark"):
+        # WeWork 和 Bark 使用 markdown 格式
+        if link_url:
+            formatted_title = f"[{cleaned_title}]({link_url})"
+        else:
+            formatted_title = cleaned_title
+
+        title_prefix = "🆕 " if title_data.get("is_new") else ""
+
+        if show_source:
+            result = f"[{title_data['source_name']}] {title_prefix}{formatted_title}"
+        else:
+            result = f"{title_prefix}{formatted_title}"
+
+        if rank_display:
+            result += f" {rank_display}"
+        if title_data["time_display"]:
+            result += f" - {title_data['time_display']}"
+        if title_data["count"] > 1:
+            result += f" ({title_data['count']}次)"
+
+        return result
+
+    elif platform == "telegram":
+        if link_url:
+            formatted_title = f'<a href="{link_url}">{html_escape(cleaned_title)}</a>'
+        else:
+            formatted_title = cleaned_title
+
+        title_prefix = "🆕 " if title_data.get("is_new") else ""
+
+        if show_source:
+            result = f"[{title_data['source_name']}] {title_prefix}{formatted_title}"
+        else:
+            result = f"{title_prefix}{formatted_title}"
+
+        if rank_display:
+            result += f" {rank_display}"
+        if title_data["time_display"]:
+            result += f" <code>- {title_data['time_display']}</code>"
+        if title_data["count"] > 1:
+            result += f" <code>({title_data['count']}次)</code>"
+
+        return result
+
+    elif platform == "ntfy":
+        if link_url:
+            formatted_title = f"[{cleaned_title}]({link_url})"
+        else:
+            formatted_title = cleaned_title
+
+        title_prefix = "🆕 " if title_data.get("is_new") else ""
+
+        if show_source:
+            result = f"[{title_data['source_name']}] {title_prefix}{formatted_title}"
+        else:
+            result = f"{title_prefix}{formatted_title}"
+
+        if rank_display:
+            result += f" {rank_display}"
+        if title_data["time_display"]:
+            result += f" `- {title_data['time_display']}`"
+        if title_data["count"] > 1:
+            result += f" `({title_data['count']}次)`"
+
+        return result
+
+    elif platform == "slack":
+        # Slack 使用 mrkdwn 格式
+        if link_url:
+            # Slack 链接格式: <url|text>
+            formatted_title = f"<{link_url}|{cleaned_title}>"
+        else:
+            formatted_title = cleaned_title
+
+        title_prefix = "🆕 " if title_data.get("is_new") else ""
+
+        if show_source:
+            result = f"[{title_data['source_name']}] {title_prefix}{formatted_title}"
+        else:
+            result = f"{title_prefix}{formatted_title}"
+
+        # 排名（使用 * 加粗）
+        rank_display = format_rank_display(
+            title_data["ranks"], title_data["rank_threshold"], "slack"
+        )
+        if rank_display:
+            result += f" {rank_display}"
+        if title_data["time_display"]:
+            result += f" `- {title_data['time_display']}`"
+        if title_data["count"] > 1:
+            result += f" `({title_data['count']}次)`"
+
+        return result
+
+    elif platform == "html":
+        rank_display = format_rank_display(
+            title_data["ranks"], title_data["rank_threshold"], "html"
+        )
+
+        link_url = title_data["mobile_url"] or title_data["url"]
+
+        escaped_title = html_escape(cleaned_title)
+        escaped_source_name = html_escape(title_data["source_name"])
+
+        if link_url:
+            escaped_url = html_escape(link_url)
+            formatted_title = f'[{escaped_source_name}] <a href="{escaped_url}" target="_blank" class="news-link">{escaped_title}</a>'
+        else:
+            formatted_title = (
+                f'[{escaped_source_name}] <span class="no-link">{escaped_title}</span>'
+            )
+
+        if rank_display:
+            formatted_title += f" {rank_display}"
+        if title_data["time_display"]:
+            escaped_time = html_escape(title_data["time_display"])
+            formatted_title += f" <font color='grey'>- {escaped_time}</font>"
+        if title_data["count"] > 1:
+            formatted_title += f" <font color='green'>({title_data['count']}次)</font>"
+
+        if title_data.get("is_new"):
+            formatted_title = f"<div class='new-title'>🆕 {formatted_title}</div>"
+
+        return formatted_title
+
+    else:
+        return cleaned_title
@@ -0,0 +1,235 @@
+# coding=utf-8
+"""
+报告生成模块
+
+提供报告数据准备和 HTML 生成功能：
+- prepare_report_data: 准备报告数据
+- generate_html_report: 生成 HTML 报告
+"""
+
+from pathlib import Path
+from typing import Dict, List, Optional, Callable
+
+
+def prepare_report_data(
+    stats: List[Dict],
+    failed_ids: Optional[List] = None,
+    new_titles: Optional[Dict] = None,
+    id_to_name: Optional[Dict] = None,
+    mode: str = "daily",
+    rank_threshold: int = 3,
+    matches_word_groups_func: Optional[Callable] = None,
+    load_frequency_words_func: Optional[Callable] = None,
+) -> Dict:
+    """
+    准备报告数据
+
+    Args:
+        stats: 统计结果列表
+        failed_ids: 失败的 ID 列表
+        new_titles: 新增标题
+        id_to_name: ID 到名称的映射
+        mode: 报告模式 (daily/incremental/current)
+        rank_threshold: 排名阈值
+        matches_word_groups_func: 词组匹配函数
+        load_frequency_words_func: 加载频率词函数
+
+    Returns:
+        Dict: 准备好的报告数据
+    """
+    processed_new_titles = []
+
+    # 在增量模式下隐藏新增新闻区域
+    hide_new_section = mode == "incremental"
+
+    # 只有在非隐藏模式下才处理新增新闻部分
+    if not hide_new_section:
+        filtered_new_titles = {}
+        if new_titles and id_to_name:
+            # 如果提供了匹配函数，使用它过滤
+            if matches_word_groups_func and load_frequency_words_func:
+                word_groups, filter_words, global_filters = load_frequency_words_func()
+                for source_id, titles_data in new_titles.items():
+                    filtered_titles = {}
+                    for title, title_data in titles_data.items():
+                        if matches_word_groups_func(title, word_groups, filter_words, global_filters):
+                            filtered_titles[title] = title_data
+                    if filtered_titles:
+                        filtered_new_titles[source_id] = filtered_titles
+            else:
+                # 没有匹配函数时，使用全部
+                filtered_new_titles = new_titles
+
+            # 打印过滤后的新增热点数（与推送显示一致）
+            original_new_count = sum(len(titles) for titles in new_titles.values()) if new_titles else 0
+            filtered_new_count = sum(len(titles) for titles in filtered_new_titles.values()) if filtered_new_titles else 0
+            if original_new_count > 0:
+                print(f"频率词过滤后：{filtered_new_count} 条新增热点匹配（原始 {original_new_count} 条）")
+
+        if filtered_new_titles and id_to_name:
+            for source_id, titles_data in filtered_new_titles.items():
+                source_name = id_to_name.get(source_id, source_id)
+                source_titles = []
+
+                for title, title_data in titles_data.items():
+                    url = title_data.get("url", "")
+                    mobile_url = title_data.get("mobileUrl", "")
+                    ranks = title_data.get("ranks", [])
+
+                    processed_title = {
+                        "title": title,
+                        "source_name": source_name,
+                        "time_display": "",
+                        "count": 1,
+                        "ranks": ranks,
+                        "rank_threshold": rank_threshold,
+                        "url": url,
+                        "mobile_url": mobile_url,
+                        "is_new": True,
+                    }
+                    source_titles.append(processed_title)
+
+                if source_titles:
+                    processed_new_titles.append(
+                        {
+                            "source_id": source_id,
+                            "source_name": source_name,
+                            "titles": source_titles,
+                        }
+                    )
+
+    processed_stats = []
+    for stat in stats:
+        if stat["count"] <= 0:
+            continue
+
+        processed_titles = []
+        for title_data in stat["titles"]:
+            processed_title = {
+                "title": title_data["title"],
+                "source_name": title_data["source_name"],
+                "time_display": title_data["time_display"],
+                "count": title_data["count"],
+                "ranks": title_data["ranks"],
+                "rank_threshold": title_data["rank_threshold"],
+                "url": title_data.get("url", ""),
+                "mobile_url": title_data.get("mobileUrl", ""),
+                "is_new": title_data.get("is_new", False),
+            }
+            processed_titles.append(processed_title)
+
+        processed_stats.append(
+            {
+                "word": stat["word"],
+                "count": stat["count"],
+                "percentage": stat.get("percentage", 0),
+                "titles": processed_titles,
+            }
+        )
+
+    return {
+        "stats": processed_stats,
+        "new_titles": processed_new_titles,
+        "failed_ids": failed_ids or [],
+        "total_new_count": sum(
+            len(source["titles"]) for source in processed_new_titles
+        ),
+    }
+
+
+def generate_html_report(
+    stats: List[Dict],
+    total_titles: int,
+    failed_ids: Optional[List] = None,
+    new_titles: Optional[Dict] = None,
+    id_to_name: Optional[Dict] = None,
+    mode: str = "daily",
+    is_daily_summary: bool = False,
+    update_info: Optional[Dict] = None,
+    rank_threshold: int = 3,
+    output_dir: str = "output",
+    date_folder: str = "",
+    time_filename: str = "",
+    render_html_func: Optional[Callable] = None,
+    matches_word_groups_func: Optional[Callable] = None,
+    load_frequency_words_func: Optional[Callable] = None,
+    enable_index_copy: bool = True,
+) -> str:
+    """
+    生成 HTML 报告
+
+    Args:
+        stats: 统计结果列表
+        total_titles: 总标题数
+        failed_ids: 失败的 ID 列表
+        new_titles: 新增标题
+        id_to_name: ID 到名称的映射
+        mode: 报告模式 (daily/incremental/current)
+        is_daily_summary: 是否是每日汇总
+        update_info: 更新信息
+        rank_threshold: 排名阈值
+        output_dir: 输出目录
+        date_folder: 日期文件夹名称
+        time_filename: 时间文件名
+        render_html_func: HTML 渲染函数
+        matches_word_groups_func: 词组匹配函数
+        load_frequency_words_func: 加载频率词函数
+        enable_index_copy: 是否复制到 index.html
+
+    Returns:
+        str: 生成的 HTML 文件路径
+    """
+    if is_daily_summary:
+        if mode == "current":
+            filename = "当前榜单汇总.html"
+        elif mode == "incremental":
+            filename = "当日增量.html"
+        else:
+            filename = "当日汇总.html"
+    else:
+        filename = f"{time_filename}.html"
+
+    # 构建输出路径
+    output_path = Path(output_dir) / date_folder / "html"
+    output_path.mkdir(parents=True, exist_ok=True)
+    file_path = str(output_path / filename)
+
+    # 准备报告数据
+    report_data = prepare_report_data(
+        stats,
+        failed_ids,
+        new_titles,
+        id_to_name,
+        mode,
+        rank_threshold,
+        matches_word_groups_func,
+        load_frequency_words_func,
+    )
+
+    # 渲染 HTML 内容
+    if render_html_func:
+        html_content = render_html_func(
+            report_data, total_titles, is_daily_summary, mode, update_info
+        )
+    else:
+        # 默认简单 HTML
+        html_content = f"<html><body><h1>Report</h1><pre>{report_data}</pre></body></html>"
+
+    # 写入文件
+    with open(file_path, "w", encoding="utf-8") as f:
+        f.write(html_content)
+
+    # 如果是每日汇总且启用 index 复制
+    if is_daily_summary and enable_index_copy:
+        # 生成到根目录（供 GitHub Pages 访问）
+        root_index_path = Path("index.html")
+        with open(root_index_path, "w", encoding="utf-8") as f:
+            f.write(html_content)
+
+        # 同时生成到 output 目录（供 Docker Volume 挂载访问）
+        output_index_path = Path(output_dir) / "index.html"
+        Path(output_dir).mkdir(parents=True, exist_ok=True)
+        with open(output_index_path, "w", encoding="utf-8") as f:
+            f.write(html_content)
+
+    return file_path
@@ -0,0 +1,125 @@
+# coding=utf-8
+"""
+报告辅助函数模块
+
+提供报告生成相关的通用辅助函数
+"""
+
+import re
+from typing import List
+
+
+def clean_title(title: str) -> str:
+    """清理标题中的特殊字符
+
+    清理规则：
+    - 将换行符(\n, \r)替换为空格
+    - 将多个连续空白字符合并为单个空格
+    - 去除首尾空白
+
+    Args:
+        title: 原始标题字符串
+
+    Returns:
+        清理后的标题字符串
+    """
+    if not isinstance(title, str):
+        title = str(title)
+    cleaned_title = title.replace("\n", " ").replace("\r", " ")
+    cleaned_title = re.sub(r"\s+", " ", cleaned_title)
+    cleaned_title = cleaned_title.strip()
+    return cleaned_title
+
+
+def html_escape(text: str) -> str:
+    """HTML特殊字符转义
+
+    转义规则（按顺序）：
+    - & → &amp;
+    - < → &lt;
+    - > → &gt;
+    - " → &quot;
+    - ' → &#x27;
+
+    Args:
+        text: 原始文本
+
+    Returns:
+        转义后的文本
+    """
+    if not isinstance(text, str):
+        text = str(text)
+
+    return (
+        text.replace("&", "&amp;")
+        .replace("<", "&lt;")
+        .replace(">", "&gt;")
+        .replace('"', "&quot;")
+        .replace("'", "&#x27;")
+    )
+
+
+def format_rank_display(ranks: List[int], rank_threshold: int, format_type: str) -> str:
+    """格式化排名显示
+
+    根据不同平台类型生成对应格式的排名字符串。
+    当最小排名小于等于阈值时，使用高亮格式。
+
+    Args:
+        ranks: 排名列表（可能包含重复值）
+        rank_threshold: 高亮阈值，小于等于此值的排名会高亮显示
+        format_type: 平台类型，支持:
+            - "html": HTML格式
+            - "feishu": 飞书格式
+            - "dingtalk": 钉钉格式
+            - "wework": 企业微信格式
+            - "telegram": Telegram格式
+            - "slack": Slack格式
+            - 其他: 默认markdown格式
+
+    Returns:
+        格式化后的排名字符串，如 "[1]" 或 "[1 - 5]"
+        如果排名列表为空，返回空字符串
+    """
+    if not ranks:
+        return ""
+
+    unique_ranks = sorted(set(ranks))
+    min_rank = unique_ranks[0]
+    max_rank = unique_ranks[-1]
+
+    # 根据平台类型选择高亮格式
+    if format_type == "html":
+        highlight_start = "<font color='red'><strong>"
+        highlight_end = "</strong></font>"
+    elif format_type == "feishu":
+        highlight_start = "<font color='red'>**"
+        highlight_end = "**</font>"
+    elif format_type == "dingtalk":
+        highlight_start = "**"
+        highlight_end = "**"
+    elif format_type == "wework":
+        highlight_start = "**"
+        highlight_end = "**"
+    elif format_type == "telegram":
+        highlight_start = "<b>"
+        highlight_end = "</b>"
+    elif format_type == "slack":
+        highlight_start = "*"
+        highlight_end = "*"
+    else:
+        # 默认 markdown 格式
+        highlight_start = "**"
+        highlight_end = "**"
+
+    # 生成排名显示
+    if min_rank <= rank_threshold:
+        if min_rank == max_rank:
+            return f"{highlight_start}[{min_rank}]{highlight_end}"
+        else:
+            return f"{highlight_start}[{min_rank} - {max_rank}]{highlight_end}"
+    else:
+        if min_rank == max_rank:
+            return f"[{min_rank}]"
+        else:
+            return f"[{min_rank} - {max_rank}]"
@@ -0,0 +1,44 @@
+# coding=utf-8
+"""
+存储模块 - 支持多种存储后端
+
+支持的存储后端:
+- local: 本地 SQLite + TXT/HTML 文件
+- remote: 远程云存储（S3 兼容协议：R2/OSS/COS/S3 等）
+- auto: 根据环境自动选择（GitHub Actions 用 remote，其他用 local）
+"""
+
+from trendradar.storage.base import (
+    StorageBackend,
+    NewsItem,
+    NewsData,
+    convert_crawl_results_to_news_data,
+    convert_news_data_to_results,
+)
+from trendradar.storage.local import LocalStorageBackend
+from trendradar.storage.manager import StorageManager, get_storage_manager
+
+# 远程后端可选导入（需要 boto3）
+try:
+    from trendradar.storage.remote import RemoteStorageBackend
+    HAS_REMOTE = True
+except ImportError:
+    RemoteStorageBackend = None
+    HAS_REMOTE = False
+
+__all__ = [
+    # 基础类
+    "StorageBackend",
+    "NewsItem",
+    "NewsData",
+    # 转换函数
+    "convert_crawl_results_to_news_data",
+    "convert_news_data_to_results",
+    # 后端实现
+    "LocalStorageBackend",
+    "RemoteStorageBackend",
+    "HAS_REMOTE",
+    # 管理器
+    "StorageManager",
+    "get_storage_manager",
+]
@@ -0,0 +1,457 @@
+# coding=utf-8
+"""
+存储后端抽象基类和数据模型
+
+定义统一的存储接口，所有存储后端都需要实现这些方法
+"""
+
+from abc import ABC, abstractmethod
+from dataclasses import dataclass, field
+from datetime import datetime
+from typing import Dict, List, Optional, Any
+import json
+
+
+@dataclass
+class NewsItem:
+    """新闻条目数据模型"""
+
+    title: str                          # 新闻标题
+    source_id: str                      # 来源平台ID（如 toutiao, baidu）
+    source_name: str = ""               # 来源平台名称（运行时使用，数据库不存储）
+    rank: int = 0                       # 排名
+    url: str = ""                       # 链接 URL
+    mobile_url: str = ""                # 移动端 URL
+    crawl_time: str = ""                # 抓取时间（HH:MM 格式）
+
+    # 统计信息（用于分析）
+    ranks: List[int] = field(default_factory=list)  # 历史排名列表
+    first_time: str = ""                # 首次出现时间
+    last_time: str = ""                 # 最后出现时间
+    count: int = 1                      # 出现次数
+
+    def to_dict(self) -> Dict[str, Any]:
+        """转换为字典"""
+        return {
+            "title": self.title,
+            "source_id": self.source_id,
+            "source_name": self.source_name,
+            "rank": self.rank,
+            "url": self.url,
+            "mobile_url": self.mobile_url,
+            "crawl_time": self.crawl_time,
+            "ranks": self.ranks,
+            "first_time": self.first_time,
+            "last_time": self.last_time,
+            "count": self.count,
+        }
+
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> "NewsItem":
+        """从字典创建"""
+        return cls(
+            title=data.get("title", ""),
+            source_id=data.get("source_id", ""),
+            source_name=data.get("source_name", ""),
+            rank=data.get("rank", 0),
+            url=data.get("url", ""),
+            mobile_url=data.get("mobile_url", ""),
+            crawl_time=data.get("crawl_time", ""),
+            ranks=data.get("ranks", []),
+            first_time=data.get("first_time", ""),
+            last_time=data.get("last_time", ""),
+            count=data.get("count", 1),
+        )
+
+
+@dataclass
+class NewsData:
+    """
+    新闻数据集合
+
+    结构:
+    - date: 日期（YYYY-MM-DD）
+    - crawl_time: 抓取时间（HH时MM分）
+    - items: 按来源ID分组的新闻条目
+    - id_to_name: 来源ID到名称的映射
+    - failed_ids: 失败的来源ID列表
+    """
+
+    date: str                                   # 日期
+    crawl_time: str                             # 抓取时间
+    items: Dict[str, List[NewsItem]]            # 按来源分组的新闻
+    id_to_name: Dict[str, str] = field(default_factory=dict)   # ID到名称映射
+    failed_ids: List[str] = field(default_factory=list)        # 失败的ID
+
+    def to_dict(self) -> Dict[str, Any]:
+        """转换为字典"""
+        items_dict = {}
+        for source_id, news_list in self.items.items():
+            items_dict[source_id] = [item.to_dict() for item in news_list]
+
+        return {
+            "date": self.date,
+            "crawl_time": self.crawl_time,
+            "items": items_dict,
+            "id_to_name": self.id_to_name,
+            "failed_ids": self.failed_ids,
+        }
+
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> "NewsData":
+        """从字典创建"""
+        items = {}
+        items_data = data.get("items", {})
+        for source_id, news_list in items_data.items():
+            items[source_id] = [NewsItem.from_dict(item) for item in news_list]
+
+        return cls(
+            date=data.get("date", ""),
+            crawl_time=data.get("crawl_time", ""),
+            items=items,
+            id_to_name=data.get("id_to_name", {}),
+            failed_ids=data.get("failed_ids", []),
+        )
+
+    def get_total_count(self) -> int:
+        """获取新闻总数"""
+        return sum(len(news_list) for news_list in self.items.values())
+
+    def merge_with(self, other: "NewsData") -> "NewsData":
+        """
+        合并另一个 NewsData 到当前数据
+
+        合并规则:
+        - 相同 source_id + title 的新闻合并排名历史
+        - 更新 last_time 和 count
+        - 保留较早的 first_time
+        """
+        merged_items = {}
+
+        # 复制当前数据
+        for source_id, news_list in self.items.items():
+            merged_items[source_id] = {item.title: item for item in news_list}
+
+        # 合并其他数据
+        for source_id, news_list in other.items.items():
+            if source_id not in merged_items:
+                merged_items[source_id] = {}
+
+            for item in news_list:
+                if item.title in merged_items[source_id]:
+                    # 合并已存在的新闻
+                    existing = merged_items[source_id][item.title]
+
+                    # 合并排名
+                    existing_ranks = set(existing.ranks) if existing.ranks else set()
+                    new_ranks = set(item.ranks) if item.ranks else set()
+                    merged_ranks = sorted(existing_ranks | new_ranks)
+                    existing.ranks = merged_ranks
+
+                    # 更新时间
+                    if item.first_time and (not existing.first_time or item.first_time < existing.first_time):
+                        existing.first_time = item.first_time
+                    if item.last_time and (not existing.last_time or item.last_time > existing.last_time):
+                        existing.last_time = item.last_time
+
+                    # 更新计数
+                    existing.count += 1
+
+                    # 保留URL（如果原来没有）
+                    if not existing.url and item.url:
+                        existing.url = item.url
+                    if not existing.mobile_url and item.mobile_url:
+                        existing.mobile_url = item.mobile_url
+                else:
+                    # 添加新新闻
+                    merged_items[source_id][item.title] = item
+
+        # 转换回列表格式
+        final_items = {}
+        for source_id, items_dict in merged_items.items():
+            final_items[source_id] = list(items_dict.values())
+
+        # 合并 id_to_name
+        merged_id_to_name = {**self.id_to_name, **other.id_to_name}
+
+        # 合并 failed_ids（去重）
+        merged_failed_ids = list(set(self.failed_ids + other.failed_ids))
+
+        return NewsData(
+            date=self.date or other.date,
+            crawl_time=other.crawl_time,  # 使用较新的抓取时间
+            items=final_items,
+            id_to_name=merged_id_to_name,
+            failed_ids=merged_failed_ids,
+        )
+
+
+class StorageBackend(ABC):
+    """
+    存储后端抽象基类
+
+    所有存储后端都需要实现这些方法，以支持:
+    - 保存新闻数据
+    - 读取当天所有数据
+    - 检测新增新闻
+    - 生成报告文件（TXT/HTML）
+    """
+
+    @abstractmethod
+    def save_news_data(self, data: NewsData) -> bool:
+        """
+        保存新闻数据
+
+        Args:
+            data: 新闻数据
+
+        Returns:
+            是否保存成功
+        """
+        pass
+
+    @abstractmethod
+    def get_today_all_data(self, date: Optional[str] = None) -> Optional[NewsData]:
+        """
+        获取指定日期的所有新闻数据
+
+        Args:
+            date: 日期字符串（YYYY-MM-DD），默认为今天
+
+        Returns:
+            合并后的新闻数据，如果没有数据返回 None
+        """
+        pass
+
+    @abstractmethod
+    def get_latest_crawl_data(self, date: Optional[str] = None) -> Optional[NewsData]:
+        """
+        获取最新一次抓取的数据
+
+        Args:
+            date: 日期字符串，默认为今天
+
+        Returns:
+            最新抓取的新闻数据
+        """
+        pass
+
+    @abstractmethod
+    def detect_new_titles(self, current_data: NewsData) -> Dict[str, Dict]:
+        """
+        检测新增的标题
+
+        Args:
+            current_data: 当前抓取的数据
+
+        Returns:
+            新增的标题数据，格式: {source_id: {title: title_data}}
+        """
+        pass
+
+    @abstractmethod
+    def save_txt_snapshot(self, data: NewsData) -> Optional[str]:
+        """
+        保存 TXT 快照（可选功能，本地环境可用）
+
+        Args:
+            data: 新闻数据
+
+        Returns:
+            保存的文件路径，如果不支持返回 None
+        """
+        pass
+
+    @abstractmethod
+    def save_html_report(self, html_content: str, filename: str, is_summary: bool = False) -> Optional[str]:
+        """
+        保存 HTML 报告
+
+        Args:
+            html_content: HTML 内容
+            filename: 文件名
+            is_summary: 是否为汇总报告
+
+        Returns:
+            保存的文件路径
+        """
+        pass
+
+    @abstractmethod
+    def is_first_crawl_today(self, date: Optional[str] = None) -> bool:
+        """
+        检查是否是当天第一次抓取
+
+        Args:
+            date: 日期字符串，默认为今天
+
+        Returns:
+            是否是第一次抓取
+        """
+        pass
+
+    @abstractmethod
+    def cleanup(self) -> None:
+        """
+        清理资源（如临时文件、数据库连接等）
+        """
+        pass
+
+    @abstractmethod
+    def cleanup_old_data(self, retention_days: int) -> int:
+        """
+        清理过期数据
+
+        Args:
+            retention_days: 保留天数（0 表示不清理）
+
+        Returns:
+            删除的日期目录数量
+        """
+        pass
+
+    @property
+    @abstractmethod
+    def backend_name(self) -> str:
+        """
+        存储后端名称
+        """
+        pass
+
+    @property
+    @abstractmethod
+    def supports_txt(self) -> bool:
+        """
+        是否支持生成 TXT 快照
+        """
+        pass
+
+    # === 推送记录相关方法 ===
+
+    @abstractmethod
+    def has_pushed_today(self, date: Optional[str] = None) -> bool:
+        """
+        检查指定日期是否已推送过
+
+        Args:
+            date: 日期字符串（YYYY-MM-DD），默认为今天
+
+        Returns:
+            是否已推送
+        """
+        pass
+
+    @abstractmethod
+    def record_push(self, report_type: str, date: Optional[str] = None) -> bool:
+        """
+        记录推送
+
+        Args:
+            report_type: 报告类型
+            date: 日期字符串（YYYY-MM-DD），默认为今天
+
+        Returns:
+            是否记录成功
+        """
+        pass
+
+
+def convert_crawl_results_to_news_data(
+    results: Dict[str, Dict],
+    id_to_name: Dict[str, str],
+    failed_ids: List[str],
+    crawl_time: str,
+    crawl_date: str,
+) -> NewsData:
+    """
+    将爬虫结果转换为 NewsData 格式
+
+    Args:
+        results: 爬虫返回的结果 {source_id: {title: {ranks: [], url: "", mobileUrl: ""}}}
+        id_to_name: 来源ID到名称的映射
+        failed_ids: 失败的来源ID
+        crawl_time: 抓取时间（HH:MM）
+        crawl_date: 抓取日期（YYYY-MM-DD）
+
+    Returns:
+        NewsData 对象
+    """
+    items = {}
+
+    for source_id, titles_data in results.items():
+        source_name = id_to_name.get(source_id, source_id)
+        news_list = []
+
+        for title, data in titles_data.items():
+            if isinstance(data, dict):
+                ranks = data.get("ranks", [])
+                url = data.get("url", "")
+                mobile_url = data.get("mobileUrl", "")
+            else:
+                # 兼容旧格式
+                ranks = data if isinstance(data, list) else []
+                url = ""
+                mobile_url = ""
+
+            rank = ranks[0] if ranks else 99
+
+            news_item = NewsItem(
+                title=title,
+                source_id=source_id,
+                source_name=source_name,
+                rank=rank,
+                url=url,
+                mobile_url=mobile_url,
+                crawl_time=crawl_time,
+                ranks=ranks,
+                first_time=crawl_time,
+                last_time=crawl_time,
+                count=1,
+            )
+            news_list.append(news_item)
+
+        items[source_id] = news_list
+
+    return NewsData(
+        date=crawl_date,
+        crawl_time=crawl_time,
+        items=items,
+        id_to_name=id_to_name,
+        failed_ids=failed_ids,
+    )
+
+
+def convert_news_data_to_results(data: NewsData) -> tuple:
+    """
+    将 NewsData 转换回原有的 results 格式（用于兼容现有代码）
+
+    Args:
+        data: NewsData 对象
+
+    Returns:
+        (results, id_to_name, title_info) 元组
+    """
+    results = {}
+    title_info = {}
+
+    for source_id, news_list in data.items.items():
+        results[source_id] = {}
+        title_info[source_id] = {}
+
+        for item in news_list:
+            results[source_id][item.title] = {
+                "ranks": item.ranks,
+                "url": item.url,
+                "mobileUrl": item.mobile_url,
+            }
+
+            title_info[source_id][item.title] = {
+                "first_time": item.first_time,
+                "last_time": item.last_time,
+                "count": item.count,
+                "ranks": item.ranks,
+                "url": item.url,
+                "mobileUrl": item.mobile_url,
+            }
+
+    return results, data.id_to_name, title_info
@@ -0,0 +1,869 @@
+# coding=utf-8
+"""
+本地存储后端 - SQLite + TXT/HTML
+
+使用 SQLite 作为主存储，支持可选的 TXT 快照和 HTML 报告
+"""
+
+import sqlite3
+import os
+import shutil
+import pytz
+import re
+from datetime import datetime, timedelta
+from pathlib import Path
+from typing import Dict, List, Optional, Any
+
+from trendradar.storage.base import StorageBackend, NewsItem, NewsData
+from trendradar.utils.time import (
+    get_configured_time,
+    format_date_folder,
+    format_time_filename,
+)
+
+
+class LocalStorageBackend(StorageBackend):
+    """
+    本地存储后端
+
+    使用 SQLite 数据库存储新闻数据，支持：
+    - 按日期组织的 SQLite 数据库文件
+    - 可选的 TXT 快照（用于调试）
+    - HTML 报告生成
+    """
+
+    def __init__(
+        self,
+        data_dir: str = "output",
+        enable_txt: bool = True,
+        enable_html: bool = True,
+        timezone: str = "Asia/Shanghai",
+    ):
+        """
+        初始化本地存储后端
+
+        Args:
+            data_dir: 数据目录路径
+            enable_txt: 是否启用 TXT 快照
+            enable_html: 是否启用 HTML 报告
+            timezone: 时区配置（默认 Asia/Shanghai）
+        """
+        self.data_dir = Path(data_dir)
+        self.enable_txt = enable_txt
+        self.enable_html = enable_html
+        self.timezone = timezone
+        self._db_connections: Dict[str, sqlite3.Connection] = {}
+
+    @property
+    def backend_name(self) -> str:
+        return "local"
+
+    @property
+    def supports_txt(self) -> bool:
+        return self.enable_txt
+
+    def _get_configured_time(self) -> datetime:
+        """获取配置时区的当前时间"""
+        return get_configured_time(self.timezone)
+
+    def _format_date_folder(self, date: Optional[str] = None) -> str:
+        """格式化日期文件夹名 (ISO 格式: YYYY-MM-DD)"""
+        return format_date_folder(date, self.timezone)
+
+    def _format_time_filename(self) -> str:
+        """格式化时间文件名 (格式: HH-MM)"""
+        return format_time_filename(self.timezone)
+
+    def _get_db_path(self, date: Optional[str] = None) -> Path:
+        """获取 SQLite 数据库路径"""
+        date_folder = self._format_date_folder(date)
+        db_dir = self.data_dir / date_folder
+        db_dir.mkdir(parents=True, exist_ok=True)
+        return db_dir / "news.db"
+
+    def _get_connection(self, date: Optional[str] = None) -> sqlite3.Connection:
+        """获取数据库连接（带缓存）"""
+        db_path = str(self._get_db_path(date))
+
+        if db_path not in self._db_connections:
+            conn = sqlite3.connect(db_path)
+            conn.row_factory = sqlite3.Row
+            self._init_tables(conn)
+            self._db_connections[db_path] = conn
+
+        return self._db_connections[db_path]
+
+    def _get_schema_path(self) -> Path:
+        """获取 schema.sql 文件路径"""
+        return Path(__file__).parent / "schema.sql"
+
+    def _init_tables(self, conn: sqlite3.Connection) -> None:
+        """从 schema.sql 初始化数据库表结构"""
+        schema_path = self._get_schema_path()
+        
+        if schema_path.exists():
+            with open(schema_path, "r", encoding="utf-8") as f:
+                schema_sql = f.read()
+            conn.executescript(schema_sql)
+        else:
+            raise FileNotFoundError(f"Schema file not found: {schema_path}")
+        
+        conn.commit()
+
+    def save_news_data(self, data: NewsData) -> bool:
+        """
+        保存新闻数据到 SQLite（以 URL 为唯一标识，支持标题更新检测）
+
+        Args:
+            data: 新闻数据
+
+        Returns:
+            是否保存成功
+        """
+        try:
+            conn = self._get_connection(data.date)
+            cursor = conn.cursor()
+
+            # 获取配置时区的当前时间
+            now_str = self._get_configured_time().strftime("%Y-%m-%d %H:%M:%S")
+
+            # 首先同步平台信息到 platforms 表
+            for source_id, source_name in data.id_to_name.items():
+                cursor.execute("""
+                    INSERT INTO platforms (id, name, updated_at)
+                    VALUES (?, ?, ?)
+                    ON CONFLICT(id) DO UPDATE SET
+                        name = excluded.name,
+                        updated_at = excluded.updated_at
+                """, (source_id, source_name, now_str))
+
+            # 统计计数器
+            new_count = 0
+            updated_count = 0
+            title_changed_count = 0
+            success_sources = []
+
+            for source_id, news_list in data.items.items():
+                success_sources.append(source_id)
+
+                for item in news_list:
+                    try:
+                        # 检查是否已存在（通过 URL + platform_id）
+                        if item.url:
+                            cursor.execute("""
+                                SELECT id, title FROM news_items
+                                WHERE url = ? AND platform_id = ?
+                            """, (item.url, source_id))
+                            existing = cursor.fetchone()
+
+                            if existing:
+                                # 已存在，更新记录
+                                existing_id, existing_title = existing
+
+                                # 检查标题是否变化
+                                if existing_title != item.title:
+                                    # 记录标题变更
+                                    cursor.execute("""
+                                        INSERT INTO title_changes
+                                        (news_item_id, old_title, new_title, changed_at)
+                                        VALUES (?, ?, ?, ?)
+                                    """, (existing_id, existing_title, item.title, now_str))
+                                    title_changed_count += 1
+
+                                # 记录排名历史
+                                cursor.execute("""
+                                    INSERT INTO rank_history
+                                    (news_item_id, rank, crawl_time, created_at)
+                                    VALUES (?, ?, ?, ?)
+                                """, (existing_id, item.rank, data.crawl_time, now_str))
+
+                                # 更新现有记录
+                                cursor.execute("""
+                                    UPDATE news_items SET
+                                        title = ?,
+                                        rank = ?,
+                                        mobile_url = ?,
+                                        last_crawl_time = ?,
+                                        crawl_count = crawl_count + 1,
+                                        updated_at = ?
+                                    WHERE id = ?
+                                """, (item.title, item.rank, item.mobile_url,
+                                      data.crawl_time, now_str, existing_id))
+                                updated_count += 1
+                            else:
+                                # 不存在，插入新记录
+                                cursor.execute("""
+                                    INSERT INTO news_items
+                                    (title, platform_id, rank, url, mobile_url,
+                                     first_crawl_time, last_crawl_time, crawl_count,
+                                     created_at, updated_at)
+                                    VALUES (?, ?, ?, ?, ?, ?, ?, 1, ?, ?)
+                                """, (item.title, source_id, item.rank, item.url,
+                                      item.mobile_url, data.crawl_time, data.crawl_time,
+                                      now_str, now_str))
+                                new_id = cursor.lastrowid
+                                # 记录初始排名
+                                cursor.execute("""
+                                    INSERT INTO rank_history
+                                    (news_item_id, rank, crawl_time, created_at)
+                                    VALUES (?, ?, ?, ?)
+                                """, (new_id, item.rank, data.crawl_time, now_str))
+                                new_count += 1
+                        else:
+                            # URL 为空的情况，直接插入（不做去重）
+                            cursor.execute("""
+                                INSERT INTO news_items
+                                (title, platform_id, rank, url, mobile_url,
+                                 first_crawl_time, last_crawl_time, crawl_count,
+                                 created_at, updated_at)
+                                VALUES (?, ?, ?, ?, ?, ?, ?, 1, ?, ?)
+                            """, (item.title, source_id, item.rank, item.url,
+                                  item.mobile_url, data.crawl_time, data.crawl_time,
+                                  now_str, now_str))
+                            new_id = cursor.lastrowid
+                            # 记录初始排名
+                            cursor.execute("""
+                                INSERT INTO rank_history
+                                (news_item_id, rank, crawl_time, created_at)
+                                VALUES (?, ?, ?, ?)
+                            """, (new_id, item.rank, data.crawl_time, now_str))
+                            new_count += 1
+
+                    except sqlite3.Error as e:
+                        print(f"保存新闻条目失败 [{item.title[:30]}...]: {e}")
+
+            total_items = new_count + updated_count
+
+            # 记录抓取信息
+            cursor.execute("""
+                INSERT OR REPLACE INTO crawl_records
+                (crawl_time, total_items, created_at)
+                VALUES (?, ?, ?)
+            """, (data.crawl_time, total_items, now_str))
+
+            # 获取刚插入的 crawl_record 的 ID
+            cursor.execute("""
+                SELECT id FROM crawl_records WHERE crawl_time = ?
+            """, (data.crawl_time,))
+            record_row = cursor.fetchone()
+            if record_row:
+                crawl_record_id = record_row[0]
+
+                # 记录成功的来源
+                for source_id in success_sources:
+                    cursor.execute("""
+                        INSERT OR REPLACE INTO crawl_source_status
+                        (crawl_record_id, platform_id, status)
+                        VALUES (?, ?, 'success')
+                    """, (crawl_record_id, source_id))
+
+                # 记录失败的来源
+                for failed_id in data.failed_ids:
+                    # 确保失败的平台也在 platforms 表中
+                    cursor.execute("""
+                        INSERT OR IGNORE INTO platforms (id, name, updated_at)
+                        VALUES (?, ?, ?)
+                    """, (failed_id, failed_id, now_str))
+
+                    cursor.execute("""
+                        INSERT OR REPLACE INTO crawl_source_status
+                        (crawl_record_id, platform_id, status)
+                        VALUES (?, ?, 'failed')
+                    """, (crawl_record_id, failed_id))
+
+            conn.commit()
+
+            # 输出详细的存储统计日志
+            log_parts = [f"[本地存储] 处理完成：新增 {new_count} 条"]
+            if updated_count > 0:
+                log_parts.append(f"更新 {updated_count} 条")
+            if title_changed_count > 0:
+                log_parts.append(f"标题变更 {title_changed_count} 条")
+            print("，".join(log_parts))
+
+            return True
+
+        except Exception as e:
+            print(f"[本地存储] 保存失败: {e}")
+            return False
+
+    def get_today_all_data(self, date: Optional[str] = None) -> Optional[NewsData]:
+        """
+        获取指定日期的所有新闻数据（合并后）
+
+        Args:
+            date: 日期字符串，默认为今天
+
+        Returns:
+            合并后的新闻数据
+        """
+        try:
+            db_path = self._get_db_path(date)
+            if not db_path.exists():
+                return None
+
+            conn = self._get_connection(date)
+            cursor = conn.cursor()
+
+            # 获取所有新闻数据（包含 id 用于查询排名历史）
+            cursor.execute("""
+                SELECT n.id, n.title, n.platform_id, p.name as platform_name,
+                       n.rank, n.url, n.mobile_url,
+                       n.first_crawl_time, n.last_crawl_time, n.crawl_count
+                FROM news_items n
+                LEFT JOIN platforms p ON n.platform_id = p.id
+                ORDER BY n.platform_id, n.last_crawl_time
+            """)
+
+            rows = cursor.fetchall()
+            if not rows:
+                return None
+
+            # 收集所有 news_item_id
+            news_ids = [row[0] for row in rows]
+
+            # 批量查询排名历史
+            rank_history_map: Dict[int, List[int]] = {}
+            if news_ids:
+                placeholders = ",".join("?" * len(news_ids))
+                cursor.execute(f"""
+                    SELECT news_item_id, rank FROM rank_history
+                    WHERE news_item_id IN ({placeholders})
+                    ORDER BY news_item_id, crawl_time
+                """, news_ids)
+                for rh_row in cursor.fetchall():
+                    news_id, rank = rh_row[0], rh_row[1]
+                    if news_id not in rank_history_map:
+                        rank_history_map[news_id] = []
+                    if rank not in rank_history_map[news_id]:
+                        rank_history_map[news_id].append(rank)
+
+            # 按 platform_id 分组
+            items: Dict[str, List[NewsItem]] = {}
+            id_to_name: Dict[str, str] = {}
+            crawl_date = self._format_date_folder(date)
+
+            for row in rows:
+                news_id = row[0]
+                platform_id = row[2]
+                title = row[1]
+                platform_name = row[3] or platform_id
+
+                id_to_name[platform_id] = platform_name
+
+                if platform_id not in items:
+                    items[platform_id] = []
+
+                # 获取排名历史，如果没有则使用当前排名
+                ranks = rank_history_map.get(news_id, [row[4]])
+
+                items[platform_id].append(NewsItem(
+                    title=title,
+                    source_id=platform_id,
+                    source_name=platform_name,
+                    rank=row[4],
+                    url=row[5] or "",
+                    mobile_url=row[6] or "",
+                    crawl_time=row[8],  # last_crawl_time
+                    ranks=ranks,
+                    first_time=row[7],  # first_crawl_time
+                    last_time=row[8],   # last_crawl_time
+                    count=row[9],       # crawl_count
+                ))
+
+            final_items = items
+
+            # 获取失败的来源
+            cursor.execute("""
+                SELECT DISTINCT css.platform_id
+                FROM crawl_source_status css
+                JOIN crawl_records cr ON css.crawl_record_id = cr.id
+                WHERE css.status = 'failed'
+            """)
+            failed_ids = [row[0] for row in cursor.fetchall()]
+
+            # 获取最新的抓取时间
+            cursor.execute("""
+                SELECT crawl_time FROM crawl_records
+                ORDER BY crawl_time DESC
+                LIMIT 1
+            """)
+
+            time_row = cursor.fetchone()
+            crawl_time = time_row[0] if time_row else self._format_time_filename()
+
+            return NewsData(
+                date=crawl_date,
+                crawl_time=crawl_time,
+                items=final_items,
+                id_to_name=id_to_name,
+                failed_ids=failed_ids,
+            )
+
+        except Exception as e:
+            print(f"[本地存储] 读取数据失败: {e}")
+            return None
+
+    def get_latest_crawl_data(self, date: Optional[str] = None) -> Optional[NewsData]:
+        """
+        获取最新一次抓取的数据
+
+        Args:
+            date: 日期字符串，默认为今天
+
+        Returns:
+            最新抓取的新闻数据
+        """
+        try:
+            db_path = self._get_db_path(date)
+            if not db_path.exists():
+                return None
+
+            conn = self._get_connection(date)
+            cursor = conn.cursor()
+
+            # 获取最新的抓取时间
+            cursor.execute("""
+                SELECT crawl_time FROM crawl_records
+                ORDER BY crawl_time DESC
+                LIMIT 1
+            """)
+
+            time_row = cursor.fetchone()
+            if not time_row:
+                return None
+
+            latest_time = time_row[0]
+
+            # 获取该时间的新闻数据（包含 id 用于查询排名历史）
+            cursor.execute("""
+                SELECT n.id, n.title, n.platform_id, p.name as platform_name,
+                       n.rank, n.url, n.mobile_url,
+                       n.first_crawl_time, n.last_crawl_time, n.crawl_count
+                FROM news_items n
+                LEFT JOIN platforms p ON n.platform_id = p.id
+                WHERE n.last_crawl_time = ?
+            """, (latest_time,))
+
+            rows = cursor.fetchall()
+            if not rows:
+                return None
+
+            # 收集所有 news_item_id
+            news_ids = [row[0] for row in rows]
+
+            # 批量查询排名历史
+            rank_history_map: Dict[int, List[int]] = {}
+            if news_ids:
+                placeholders = ",".join("?" * len(news_ids))
+                cursor.execute(f"""
+                    SELECT news_item_id, rank FROM rank_history
+                    WHERE news_item_id IN ({placeholders})
+                    ORDER BY news_item_id, crawl_time
+                """, news_ids)
+                for rh_row in cursor.fetchall():
+                    news_id, rank = rh_row[0], rh_row[1]
+                    if news_id not in rank_history_map:
+                        rank_history_map[news_id] = []
+                    if rank not in rank_history_map[news_id]:
+                        rank_history_map[news_id].append(rank)
+
+            items: Dict[str, List[NewsItem]] = {}
+            id_to_name: Dict[str, str] = {}
+            crawl_date = self._format_date_folder(date)
+
+            for row in rows:
+                news_id = row[0]
+                platform_id = row[2]
+                platform_name = row[3] or platform_id
+                id_to_name[platform_id] = platform_name
+
+                if platform_id not in items:
+                    items[platform_id] = []
+
+                # 获取排名历史，如果没有则使用当前排名
+                ranks = rank_history_map.get(news_id, [row[4]])
+
+                items[platform_id].append(NewsItem(
+                    title=row[1],
+                    source_id=platform_id,
+                    source_name=platform_name,
+                    rank=row[4],
+                    url=row[5] or "",
+                    mobile_url=row[6] or "",
+                    crawl_time=row[8],  # last_crawl_time
+                    ranks=ranks,
+                    first_time=row[7],  # first_crawl_time
+                    last_time=row[8],   # last_crawl_time
+                    count=row[9],       # crawl_count
+                ))
+
+            # 获取失败的来源（针对最新一次抓取）
+            cursor.execute("""
+                SELECT css.platform_id
+                FROM crawl_source_status css
+                JOIN crawl_records cr ON css.crawl_record_id = cr.id
+                WHERE cr.crawl_time = ? AND css.status = 'failed'
+            """, (latest_time,))
+
+            failed_ids = [row[0] for row in cursor.fetchall()]
+
+            return NewsData(
+                date=crawl_date,
+                crawl_time=latest_time,
+                items=items,
+                id_to_name=id_to_name,
+                failed_ids=failed_ids,
+            )
+
+        except Exception as e:
+            print(f"[本地存储] 获取最新数据失败: {e}")
+            return None
+
+    def detect_new_titles(self, current_data: NewsData) -> Dict[str, Dict]:
+        """
+        检测新增的标题
+
+        Args:
+            current_data: 当前抓取的数据
+
+        Returns:
+            新增的标题数据 {source_id: {title: NewsItem}}
+        """
+        try:
+            # 获取历史数据
+            historical_data = self.get_today_all_data(current_data.date)
+
+            if not historical_data:
+                # 没有历史数据，所有都是新的
+                new_titles = {}
+                for source_id, news_list in current_data.items.items():
+                    new_titles[source_id] = {item.title: item for item in news_list}
+                return new_titles
+
+            # 收集历史标题
+            historical_titles: Dict[str, set] = {}
+            for source_id, news_list in historical_data.items.items():
+                historical_titles[source_id] = {item.title for item in news_list}
+
+            # 检测新增
+            new_titles = {}
+            for source_id, news_list in current_data.items.items():
+                hist_set = historical_titles.get(source_id, set())
+                for item in news_list:
+                    if item.title not in hist_set:
+                        if source_id not in new_titles:
+                            new_titles[source_id] = {}
+                        new_titles[source_id][item.title] = item
+
+            return new_titles
+
+        except Exception as e:
+            print(f"[本地存储] 检测新标题失败: {e}")
+            return {}
+
+    def save_txt_snapshot(self, data: NewsData) -> Optional[str]:
+        """
+        保存 TXT 快照
+
+        Args:
+            data: 新闻数据
+
+        Returns:
+            保存的文件路径
+        """
+        if not self.enable_txt:
+            return None
+
+        try:
+            date_folder = self._format_date_folder(data.date)
+            txt_dir = self.data_dir / date_folder / "txt"
+            txt_dir.mkdir(parents=True, exist_ok=True)
+
+            file_path = txt_dir / f"{data.crawl_time}.txt"
+
+            with open(file_path, "w", encoding="utf-8") as f:
+                for source_id, news_list in data.items.items():
+                    source_name = data.id_to_name.get(source_id, source_id)
+
+                    # 写入来源标题
+                    if source_name and source_name != source_id:
+                        f.write(f"{source_id} | {source_name}\n")
+                    else:
+                        f.write(f"{source_id}\n")
+
+                    # 按排名排序
+                    sorted_news = sorted(news_list, key=lambda x: x.rank)
+
+                    for item in sorted_news:
+                        line = f"{item.rank}. {item.title}"
+                        if item.url:
+                            line += f" [URL:{item.url}]"
+                        if item.mobile_url:
+                            line += f" [MOBILE:{item.mobile_url}]"
+                        f.write(line + "\n")
+
+                    f.write("\n")
+
+                # 写入失败的来源
+                if data.failed_ids:
+                    f.write("==== 以下ID请求失败 ====\n")
+                    for failed_id in data.failed_ids:
+                        f.write(f"{failed_id}\n")
+
+            print(f"[本地存储] TXT 快照已保存: {file_path}")
+            return str(file_path)
+
+        except Exception as e:
+            print(f"[本地存储] 保存 TXT 快照失败: {e}")
+            return None
+
+    def save_html_report(self, html_content: str, filename: str, is_summary: bool = False) -> Optional[str]:
+        """
+        保存 HTML 报告
+
+        Args:
+            html_content: HTML 内容
+            filename: 文件名
+            is_summary: 是否为汇总报告
+
+        Returns:
+            保存的文件路径
+        """
+        if not self.enable_html:
+            return None
+
+        try:
+            date_folder = self._format_date_folder()
+            html_dir = self.data_dir / date_folder / "html"
+            html_dir.mkdir(parents=True, exist_ok=True)
+
+            file_path = html_dir / filename
+
+            with open(file_path, "w", encoding="utf-8") as f:
+                f.write(html_content)
+
+            print(f"[本地存储] HTML 报告已保存: {file_path}")
+            return str(file_path)
+
+        except Exception as e:
+            print(f"[本地存储] 保存 HTML 报告失败: {e}")
+            return None
+
+    def is_first_crawl_today(self, date: Optional[str] = None) -> bool:
+        """
+        检查是否是当天第一次抓取
+
+        Args:
+            date: 日期字符串，默认为今天
+
+        Returns:
+            是否是第一次抓取
+        """
+        try:
+            db_path = self._get_db_path(date)
+            if not db_path.exists():
+                return True
+
+            conn = self._get_connection(date)
+            cursor = conn.cursor()
+
+            cursor.execute("""
+                SELECT COUNT(*) as count FROM crawl_records
+            """)
+
+            row = cursor.fetchone()
+            count = row[0] if row else 0
+
+            # 如果只有一条或没有记录，视为第一次抓取
+            return count <= 1
+
+        except Exception as e:
+            print(f"[本地存储] 检查首次抓取失败: {e}")
+            return True
+
+    def get_crawl_times(self, date: Optional[str] = None) -> List[str]:
+        """
+        获取指定日期的所有抓取时间列表
+
+        Args:
+            date: 日期字符串，默认为今天
+
+        Returns:
+            抓取时间列表（按时间排序）
+        """
+        try:
+            db_path = self._get_db_path(date)
+            if not db_path.exists():
+                return []
+
+            conn = self._get_connection(date)
+            cursor = conn.cursor()
+
+            cursor.execute("""
+                SELECT crawl_time FROM crawl_records
+                ORDER BY crawl_time
+            """)
+
+            rows = cursor.fetchall()
+            return [row[0] for row in rows]
+
+        except Exception as e:
+            print(f"[本地存储] 获取抓取时间列表失败: {e}")
+            return []
+
+    def cleanup(self) -> None:
+        """清理资源（关闭数据库连接）"""
+        for db_path, conn in self._db_connections.items():
+            try:
+                conn.close()
+                print(f"[本地存储] 关闭数据库连接: {db_path}")
+            except Exception as e:
+                print(f"[本地存储] 关闭连接失败 {db_path}: {e}")
+
+        self._db_connections.clear()
+
+    def cleanup_old_data(self, retention_days: int) -> int:
+        """
+        清理过期数据
+
+        Args:
+            retention_days: 保留天数（0 表示不清理）
+
+        Returns:
+            删除的日期目录数量
+        """
+        if retention_days <= 0:
+            return 0
+
+        deleted_count = 0
+        cutoff_date = self._get_configured_time() - timedelta(days=retention_days)
+
+        try:
+            if not self.data_dir.exists():
+                return 0
+
+            for date_folder in self.data_dir.iterdir():
+                if not date_folder.is_dir() or date_folder.name.startswith('.'):
+                    continue
+
+                # 解析日期文件夹名（支持两种格式）
+                folder_date = None
+                try:
+                    # ISO 格式: YYYY-MM-DD
+                    date_match = re.match(r'(\d{4})-(\d{2})-(\d{2})', date_folder.name)
+                    if date_match:
+                        folder_date = datetime(
+                            int(date_match.group(1)),
+                            int(date_match.group(2)),
+                            int(date_match.group(3)),
+                            tzinfo=pytz.timezone("Asia/Shanghai")
+                        )
+                    else:
+                        # 旧中文格式: YYYY年MM月DD日
+                        date_match = re.match(r'(\d{4})年(\d{2})月(\d{2})日', date_folder.name)
+                        if date_match:
+                            folder_date = datetime(
+                                int(date_match.group(1)),
+                                int(date_match.group(2)),
+                                int(date_match.group(3)),
+                                tzinfo=pytz.timezone("Asia/Shanghai")
+                            )
+                except Exception:
+                    continue
+
+                if folder_date and folder_date < cutoff_date:
+                    # 先关闭该日期的数据库连接
+                    db_path = str(self._get_db_path(date_folder.name))
+                    if db_path in self._db_connections:
+                        try:
+                            self._db_connections[db_path].close()
+                            del self._db_connections[db_path]
+                        except Exception:
+                            pass
+
+                    # 删除整个日期目录
+                    try:
+                        shutil.rmtree(date_folder)
+                        deleted_count += 1
+                        print(f"[本地存储] 清理过期数据: {date_folder.name}")
+                    except Exception as e:
+                        print(f"[本地存储] 删除目录失败 {date_folder.name}: {e}")
+
+            if deleted_count > 0:
+                print(f"[本地存储] 共清理 {deleted_count} 个过期日期目录")
+
+            return deleted_count
+
+        except Exception as e:
+            print(f"[本地存储] 清理过期数据失败: {e}")
+            return deleted_count
+
+    def has_pushed_today(self, date: Optional[str] = None) -> bool:
+        """
+        检查指定日期是否已推送过
+
+        Args:
+            date: 日期字符串（YYYY-MM-DD），默认为今天
+
+        Returns:
+            是否已推送
+        """
+        try:
+            conn = self._get_connection(date)
+            cursor = conn.cursor()
+
+            target_date = self._format_date_folder(date)
+
+            cursor.execute("""
+                SELECT pushed FROM push_records WHERE date = ?
+            """, (target_date,))
+
+            row = cursor.fetchone()
+            if row:
+                return bool(row[0])
+            return False
+
+        except Exception as e:
+            print(f"[本地存储] 检查推送记录失败: {e}")
+            return False
+
+    def record_push(self, report_type: str, date: Optional[str] = None) -> bool:
+        """
+        记录推送
+
+        Args:
+            report_type: 报告类型
+            date: 日期字符串（YYYY-MM-DD），默认为今天
+
+        Returns:
+            是否记录成功
+        """
+        try:
+            conn = self._get_connection(date)
+            cursor = conn.cursor()
+
+            target_date = self._format_date_folder(date)
+            now_str = self._get_configured_time().strftime("%Y-%m-%d %H:%M:%S")
+
+            cursor.execute("""
+                INSERT INTO push_records (date, pushed, push_time, report_type, created_at)
+                VALUES (?, 1, ?, ?, ?)
+                ON CONFLICT(date) DO UPDATE SET
+                    pushed = 1,
+                    push_time = excluded.push_time,
+                    report_type = excluded.report_type
+            """, (target_date, now_str, report_type, now_str))
+
+            conn.commit()
+
+            print(f"[本地存储] 推送记录已保存: {report_type} at {now_str}")
+            return True
+
+        except Exception as e:
+            print(f"[本地存储] 记录推送失败: {e}")
+            return False
+
+    def __del__(self):
+        """析构函数，确保关闭连接"""
+        self.cleanup()
@@ -0,0 +1,316 @@
+# coding=utf-8
+"""
+存储管理器 - 统一管理存储后端
+
+根据环境和配置自动选择合适的存储后端
+"""
+
+import os
+from typing import Optional
+
+from trendradar.storage.base import StorageBackend, NewsData
+
+
+# 存储管理器单例
+_storage_manager: Optional["StorageManager"] = None
+
+
+class StorageManager:
+    """
+    存储管理器
+
+    功能：
+    - 自动检测运行环境（GitHub Actions / Docker / 本地）
+    - 根据配置选择存储后端（local / remote / auto）
+    - 提供统一的存储接口
+    - 支持从远程拉取数据到本地
+    """
+
+    def __init__(
+        self,
+        backend_type: str = "auto",
+        data_dir: str = "output",
+        enable_txt: bool = True,
+        enable_html: bool = True,
+        remote_config: Optional[dict] = None,
+        local_retention_days: int = 0,
+        remote_retention_days: int = 0,
+        pull_enabled: bool = False,
+        pull_days: int = 0,
+        timezone: str = "Asia/Shanghai",
+    ):
+        """
+        初始化存储管理器
+
+        Args:
+            backend_type: 存储后端类型 (local / remote / auto)
+            data_dir: 本地数据目录
+            enable_txt: 是否启用 TXT 快照
+            enable_html: 是否启用 HTML 报告
+            remote_config: 远程存储配置（endpoint_url, bucket_name, access_key_id 等）
+            local_retention_days: 本地数据保留天数（0 = 无限制）
+            remote_retention_days: 远程数据保留天数（0 = 无限制）
+            pull_enabled: 是否启用启动时自动拉取
+            pull_days: 拉取最近 N 天的数据
+            timezone: 时区配置（默认 Asia/Shanghai）
+        """
+        self.backend_type = backend_type
+        self.data_dir = data_dir
+        self.enable_txt = enable_txt
+        self.enable_html = enable_html
+        self.remote_config = remote_config or {}
+        self.local_retention_days = local_retention_days
+        self.remote_retention_days = remote_retention_days
+        self.pull_enabled = pull_enabled
+        self.pull_days = pull_days
+        self.timezone = timezone
+
+        self._backend: Optional[StorageBackend] = None
+        self._remote_backend: Optional[StorageBackend] = None
+
+    @staticmethod
+    def is_github_actions() -> bool:
+        """检测是否在 GitHub Actions 环境中运行"""
+        return os.environ.get("GITHUB_ACTIONS") == "true"
+
+    @staticmethod
+    def is_docker() -> bool:
+        """检测是否在 Docker 容器中运行"""
+        # 方法1: 检查 /.dockerenv 文件
+        if os.path.exists("/.dockerenv"):
+            return True
+
+        # 方法2: 检查 cgroup（Linux）
+        try:
+            with open("/proc/1/cgroup", "r") as f:
+                return "docker" in f.read()
+        except (FileNotFoundError, PermissionError):
+            pass
+
+        # 方法3: 检查环境变量
+        return os.environ.get("DOCKER_CONTAINER") == "true"
+
+    def _resolve_backend_type(self) -> str:
+        """解析实际使用的后端类型"""
+        if self.backend_type == "auto":
+            if self.is_github_actions():
+                # GitHub Actions 环境，检查是否配置了远程存储
+                if self._has_remote_config():
+                    return "remote"
+                else:
+                    print("[存储管理器] GitHub Actions 环境但未配置远程存储，使用本地存储")
+                    return "local"
+            else:
+                return "local"
+        return self.backend_type
+
+    def _has_remote_config(self) -> bool:
+        """检查是否有有效的远程存储配置"""
+        # 检查配置或环境变量
+        bucket_name = self.remote_config.get("bucket_name") or os.environ.get("S3_BUCKET_NAME")
+        access_key = self.remote_config.get("access_key_id") or os.environ.get("S3_ACCESS_KEY_ID")
+        secret_key = self.remote_config.get("secret_access_key") or os.environ.get("S3_SECRET_ACCESS_KEY")
+        endpoint = self.remote_config.get("endpoint_url") or os.environ.get("S3_ENDPOINT_URL")
+
+        # 调试日志
+        has_config = bool(bucket_name and access_key and secret_key and endpoint)
+        if not has_config:
+            print(f"[存储管理器] 远程存储配置检查失败:")
+            print(f"  - bucket_name: {'已配置' if bucket_name else '未配置'}")
+            print(f"  - access_key_id: {'已配置' if access_key else '未配置'}")
+            print(f"  - secret_access_key: {'已配置' if secret_key else '未配置'}")
+            print(f"  - endpoint_url: {'已配置' if endpoint else '未配置'}")
+
+        return has_config
+
+    def _create_remote_backend(self) -> Optional[StorageBackend]:
+        """创建远程存储后端"""
+        try:
+            from trendradar.storage.remote import RemoteStorageBackend
+
+            return RemoteStorageBackend(
+                bucket_name=self.remote_config.get("bucket_name") or os.environ.get("S3_BUCKET_NAME", ""),
+                access_key_id=self.remote_config.get("access_key_id") or os.environ.get("S3_ACCESS_KEY_ID", ""),
+                secret_access_key=self.remote_config.get("secret_access_key") or os.environ.get("S3_SECRET_ACCESS_KEY", ""),
+                endpoint_url=self.remote_config.get("endpoint_url") or os.environ.get("S3_ENDPOINT_URL", ""),
+                region=self.remote_config.get("region") or os.environ.get("S3_REGION", ""),
+                enable_txt=self.enable_txt,
+                enable_html=self.enable_html,
+                timezone=self.timezone,
+            )
+        except ImportError as e:
+            print(f"[存储管理器] 远程后端导入失败: {e}")
+            print("[存储管理器] 请确保已安装 boto3: pip install boto3")
+            return None
+        except Exception as e:
+            print(f"[存储管理器] 远程后端初始化失败: {e}")
+            return None
+
+    def get_backend(self) -> StorageBackend:
+        """获取存储后端实例"""
+        if self._backend is None:
+            resolved_type = self._resolve_backend_type()
+
+            if resolved_type == "remote":
+                self._backend = self._create_remote_backend()
+                if self._backend:
+                    print(f"[存储管理器] 使用远程存储后端")
+                else:
+                    print("[存储管理器] 回退到本地存储")
+                    resolved_type = "local"
+
+            if resolved_type == "local" or self._backend is None:
+                from trendradar.storage.local import LocalStorageBackend
+
+                self._backend = LocalStorageBackend(
+                    data_dir=self.data_dir,
+                    enable_txt=self.enable_txt,
+                    enable_html=self.enable_html,
+                    timezone=self.timezone,
+                )
+                print(f"[存储管理器] 使用本地存储后端 (数据目录: {self.data_dir})")
+
+        return self._backend
+
+    def pull_from_remote(self) -> int:
+        """
+        从远程拉取数据到本地
+
+        Returns:
+            成功拉取的文件数量
+        """
+        if not self.pull_enabled or self.pull_days <= 0:
+            return 0
+
+        if not self._has_remote_config():
+            print("[存储管理器] 未配置远程存储，无法拉取")
+            return 0
+
+        # 创建远程后端（如果还没有）
+        if self._remote_backend is None:
+            self._remote_backend = self._create_remote_backend()
+
+        if self._remote_backend is None:
+            print("[存储管理器] 无法创建远程后端，拉取失败")
+            return 0
+
+        # 调用拉取方法
+        return self._remote_backend.pull_recent_days(self.pull_days, self.data_dir)
+
+    def save_news_data(self, data: NewsData) -> bool:
+        """保存新闻数据"""
+        return self.get_backend().save_news_data(data)
+
+    def get_today_all_data(self, date: Optional[str] = None) -> Optional[NewsData]:
+        """获取当天所有数据"""
+        return self.get_backend().get_today_all_data(date)
+
+    def get_latest_crawl_data(self, date: Optional[str] = None) -> Optional[NewsData]:
+        """获取最新抓取数据"""
+        return self.get_backend().get_latest_crawl_data(date)
+
+    def detect_new_titles(self, current_data: NewsData) -> dict:
+        """检测新增标题"""
+        return self.get_backend().detect_new_titles(current_data)
+
+    def save_txt_snapshot(self, data: NewsData) -> Optional[str]:
+        """保存 TXT 快照"""
+        return self.get_backend().save_txt_snapshot(data)
+
+    def save_html_report(self, html_content: str, filename: str, is_summary: bool = False) -> Optional[str]:
+        """保存 HTML 报告"""
+        return self.get_backend().save_html_report(html_content, filename, is_summary)
+
+    def is_first_crawl_today(self, date: Optional[str] = None) -> bool:
+        """检查是否是当天第一次抓取"""
+        return self.get_backend().is_first_crawl_today(date)
+
+    def cleanup(self) -> None:
+        """清理资源"""
+        if self._backend:
+            self._backend.cleanup()
+        if self._remote_backend:
+            self._remote_backend.cleanup()
+
+    def cleanup_old_data(self) -> int:
+        """
+        清理过期数据
+
+        Returns:
+            删除的日期目录数量
+        """
+        total_deleted = 0
+
+        # 清理本地数据
+        if self.local_retention_days > 0:
+            total_deleted += self.get_backend().cleanup_old_data(self.local_retention_days)
+
+        # 清理远程数据（如果配置了）
+        if self.remote_retention_days > 0 and self._has_remote_config():
+            if self._remote_backend is None:
+                self._remote_backend = self._create_remote_backend()
+            if self._remote_backend:
+                total_deleted += self._remote_backend.cleanup_old_data(self.remote_retention_days)
+
+        return total_deleted
+
+    @property
+    def backend_name(self) -> str:
+        """获取当前后端名称"""
+        return self.get_backend().backend_name
+
+    @property
+    def supports_txt(self) -> bool:
+        """是否支持 TXT 快照"""
+        return self.get_backend().supports_txt
+
+
+def get_storage_manager(
+    backend_type: str = "auto",
+    data_dir: str = "output",
+    enable_txt: bool = True,
+    enable_html: bool = True,
+    remote_config: Optional[dict] = None,
+    local_retention_days: int = 0,
+    remote_retention_days: int = 0,
+    pull_enabled: bool = False,
+    pull_days: int = 0,
+    timezone: str = "Asia/Shanghai",
+    force_new: bool = False,
+) -> StorageManager:
+    """
+    获取存储管理器单例
+
+    Args:
+        backend_type: 存储后端类型
+        data_dir: 本地数据目录
+        enable_txt: 是否启用 TXT 快照
+        enable_html: 是否启用 HTML 报告
+        remote_config: 远程存储配置
+        local_retention_days: 本地数据保留天数（0 = 无限制）
+        remote_retention_days: 远程数据保留天数（0 = 无限制）
+        pull_enabled: 是否启用启动时自动拉取
+        pull_days: 拉取最近 N 天的数据
+        timezone: 时区配置（默认 Asia/Shanghai）
+        force_new: 是否强制创建新实例
+
+    Returns:
+        StorageManager 实例
+    """
+    global _storage_manager
+
+    if _storage_manager is None or force_new:
+        _storage_manager = StorageManager(
+            backend_type=backend_type,
+            data_dir=data_dir,
+            enable_txt=enable_txt,
+            enable_html=enable_html,
+            remote_config=remote_config,
+            local_retention_days=local_retention_days,
+            remote_retention_days=remote_retention_days,
+            pull_enabled=pull_enabled,
+            pull_days=pull_days,
+            timezone=timezone,
+        )
+
+    return _storage_manager
@@ -0,0 +1,117 @@
+-- TrendRadar 数据库表结构
+
+-- ============================================
+-- 平台信息表
+-- 核心：id 不变，name 可变
+-- ============================================
+CREATE TABLE IF NOT EXISTS platforms (
+    id TEXT PRIMARY KEY,
+    name TEXT NOT NULL,
+    is_active INTEGER DEFAULT 1,
+    updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+);
+
+-- ============================================
+-- 新闻条目表
+-- 以 URL + platform_id 为唯一标识，支持去重存储
+-- ============================================
+CREATE TABLE IF NOT EXISTS news_items (
+    id INTEGER PRIMARY KEY AUTOINCREMENT,
+    title TEXT NOT NULL,
+    platform_id TEXT NOT NULL,
+    rank INTEGER NOT NULL,
+    url TEXT DEFAULT '',
+    mobile_url TEXT DEFAULT '',
+    first_crawl_time TEXT NOT NULL,      -- 首次抓取时间
+    last_crawl_time TEXT NOT NULL,       -- 最后抓取时间
+    crawl_count INTEGER DEFAULT 1,       -- 抓取次数
+    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+    updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+    FOREIGN KEY (platform_id) REFERENCES platforms(id)
+);
+
+-- ============================================
+-- 标题变更历史表
+-- 记录同一 URL 下标题的变化
+-- ============================================
+CREATE TABLE IF NOT EXISTS title_changes (
+    id INTEGER PRIMARY KEY AUTOINCREMENT,
+    news_item_id INTEGER NOT NULL,
+    old_title TEXT NOT NULL,
+    new_title TEXT NOT NULL,
+    changed_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+    FOREIGN KEY (news_item_id) REFERENCES news_items(id)
+);
+
+-- ============================================
+-- 排名历史表
+-- 记录每次抓取时的排名变化
+-- ============================================
+CREATE TABLE IF NOT EXISTS rank_history (
+    id INTEGER PRIMARY KEY AUTOINCREMENT,
+    news_item_id INTEGER NOT NULL,
+    rank INTEGER NOT NULL,
+    crawl_time TEXT NOT NULL,
+    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+    FOREIGN KEY (news_item_id) REFERENCES news_items(id)
+);
+
+-- ============================================
+-- 抓取记录表
+-- 记录每次抓取的时间和数量
+-- ============================================
+CREATE TABLE IF NOT EXISTS crawl_records (
+    id INTEGER PRIMARY KEY AUTOINCREMENT,
+    crawl_time TEXT NOT NULL UNIQUE,
+    total_items INTEGER DEFAULT 0,
+    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+);
+
+-- ============================================
+-- 抓取来源状态表
+-- 记录每次抓取各平台的成功/失败状态
+-- ============================================
+CREATE TABLE IF NOT EXISTS crawl_source_status (
+    crawl_record_id INTEGER NOT NULL,
+    platform_id TEXT NOT NULL,
+    status TEXT NOT NULL CHECK(status IN ('success', 'failed')),
+    PRIMARY KEY (crawl_record_id, platform_id),
+    FOREIGN KEY (crawl_record_id) REFERENCES crawl_records(id),
+    FOREIGN KEY (platform_id) REFERENCES platforms(id)
+);
+
+-- ============================================
+-- 推送记录表
+-- 用于 push_window once_per_day 功能
+-- ============================================
+CREATE TABLE IF NOT EXISTS push_records (
+    id INTEGER PRIMARY KEY AUTOINCREMENT,
+    date TEXT NOT NULL UNIQUE,
+    pushed INTEGER DEFAULT 0,
+    push_time TEXT,
+    report_type TEXT,
+    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+);
+
+-- ============================================
+-- 索引定义
+-- ============================================
+
+-- 平台索引
+CREATE INDEX IF NOT EXISTS idx_news_platform ON news_items(platform_id);
+
+-- 时间索引（用于查询最新数据）
+CREATE INDEX IF NOT EXISTS idx_news_crawl_time ON news_items(last_crawl_time);
+
+-- 标题索引（用于标题搜索）
+CREATE INDEX IF NOT EXISTS idx_news_title ON news_items(title);
+
+-- URL + platform_id 唯一索引（仅对非空 URL，实现去重）
+CREATE UNIQUE INDEX IF NOT EXISTS idx_news_url_platform
+    ON news_items(url, platform_id) WHERE url != '';
+
+-- 抓取状态索引
+CREATE INDEX IF NOT EXISTS idx_crawl_status_record ON crawl_source_status(crawl_record_id);
+
+-- 排名历史索引
+CREATE INDEX IF NOT EXISTS idx_rank_history_news ON rank_history(news_item_id);
@@ -0,0 +1,20 @@
+# coding=utf-8
+"""
+工具模块 - 公共工具函数
+"""
+
+from trendradar.utils.time import (
+    get_configured_time,
+    format_date_folder,
+    format_time_filename,
+    get_current_time_display,
+    convert_time_for_display,
+)
+
+__all__ = [
+    "get_configured_time",
+    "format_date_folder",
+    "format_time_filename",
+    "get_current_time_display",
+    "convert_time_for_display",
+]
@@ -0,0 +1,91 @@
+# coding=utf-8
+"""
+时间工具模块 - 统一时间处理函数
+"""
+
+from datetime import datetime
+from typing import Optional
+
+import pytz
+
+# 默认时区
+DEFAULT_TIMEZONE = "Asia/Shanghai"
+
+
+def get_configured_time(timezone: str = DEFAULT_TIMEZONE) -> datetime:
+    """
+    获取配置时区的当前时间
+
+    Args:
+        timezone: 时区名称，如 'Asia/Shanghai', 'America/Los_Angeles'
+
+    Returns:
+        带时区信息的当前时间
+    """
+    try:
+        tz = pytz.timezone(timezone)
+    except pytz.UnknownTimeZoneError:
+        print(f"[警告] 未知时区 '{timezone}'，使用默认时区 {DEFAULT_TIMEZONE}")
+        tz = pytz.timezone(DEFAULT_TIMEZONE)
+    return datetime.now(tz)
+
+
+def format_date_folder(
+    date: Optional[str] = None, timezone: str = DEFAULT_TIMEZONE
+) -> str:
+    """
+    格式化日期文件夹名 (ISO 格式: YYYY-MM-DD)
+
+    Args:
+        date: 指定日期字符串，为 None 则使用当前日期
+        timezone: 时区名称
+
+    Returns:
+        格式化后的日期字符串，如 '2025-12-09'
+    """
+    if date:
+        return date
+    return get_configured_time(timezone).strftime("%Y-%m-%d")
+
+
+def format_time_filename(timezone: str = DEFAULT_TIMEZONE) -> str:
+    """
+    格式化时间文件名 (格式: HH-MM，用于文件名)
+
+    Windows 系统不支持冒号作为文件名，因此使用连字符
+
+    Args:
+        timezone: 时区名称
+
+    Returns:
+        格式化后的时间字符串，如 '15-30'
+    """
+    return get_configured_time(timezone).strftime("%H-%M")
+
+
+def get_current_time_display(timezone: str = DEFAULT_TIMEZONE) -> str:
+    """
+    获取当前时间显示 (格式: HH:MM，用于显示)
+
+    Args:
+        timezone: 时区名称
+
+    Returns:
+        格式化后的时间字符串，如 '15:30'
+    """
+    return get_configured_time(timezone).strftime("%H:%M")
+
+
+def convert_time_for_display(time_str: str) -> str:
+    """
+    将 HH-MM 格式转换为 HH:MM 格式用于显示
+
+    Args:
+        time_str: 输入时间字符串，如 '15-30'
+
+    Returns:
+        转换后的时间字符串，如 '15:30'
+    """
+    if time_str and "-" in time_str and len(time_str) == 5:
+        return time_str.replace("-", ":")
+    return time_str