""" 数据查询工具 实现P0核心的数据查询工具。 """ from typing import Dict, List, Optional from ..services.data_service import DataService from ..utils.validators import ( validate_platforms, validate_limit, validate_keyword, validate_date_range, validate_top_n, validate_mode, validate_date_query ) from ..utils.errors import MCPError class DataQueryTools: """数据查询工具类""" def __init__(self, project_root: str = None): """ 初始化数据查询工具 Args: project_root: 项目根目录 """ self.data_service = DataService(project_root) def get_latest_news( self, platforms: Optional[List[str]] = None, limit: Optional[int] = None, include_url: bool = False ) -> Dict: """ 获取最新一批爬取的新闻数据 Args: platforms: 平台ID列表,如 ['zhihu', 'weibo'] limit: 返回条数限制,默认20 include_url: 是否包含URL链接,默认False(节省token) Returns: 新闻列表字典 Example: >>> tools = DataQueryTools() >>> result = tools.get_latest_news(platforms=['zhihu'], limit=10) >>> print(result['total']) 10 """ try: # 参数验证 platforms = validate_platforms(platforms) limit = validate_limit(limit, default=50) # 获取数据 news_list = self.data_service.get_latest_news( platforms=platforms, limit=limit, include_url=include_url ) return { "news": news_list, "total": len(news_list), "platforms": platforms, "success": True } except MCPError as e: return { "success": False, "error": e.to_dict() } except Exception as e: return { "success": False, "error": { "code": "INTERNAL_ERROR", "message": str(e) } } def search_news_by_keyword( self, keyword: str, date_range: Optional[Dict] = None, platforms: Optional[List[str]] = None, limit: Optional[int] = None ) -> Dict: """ 按关键词搜索历史新闻 Args: keyword: 搜索关键词(必需) date_range: 日期范围,格式: {"start": "YYYY-MM-DD", "end": "YYYY-MM-DD"} platforms: 平台过滤列表 limit: 返回条数限制(可选,默认返回所有) Returns: 搜索结果字典 Example (假设今天是 2025-11-17): >>> tools = DataQueryTools() >>> result = tools.search_news_by_keyword( ... keyword="人工智能", ... date_range={"start": "2025-11-08", "end": "2025-11-17"}, ... limit=50 ... ) >>> print(result['total']) """ try: # 参数验证 keyword = validate_keyword(keyword) date_range_tuple = validate_date_range(date_range) platforms = validate_platforms(platforms) if limit is not None: limit = validate_limit(limit, default=100) # 搜索数据 search_result = self.data_service.search_news_by_keyword( keyword=keyword, date_range=date_range_tuple, platforms=platforms, limit=limit ) return { **search_result, "success": True } except MCPError as e: return { "success": False, "error": e.to_dict() } except Exception as e: return { "success": False, "error": { "code": "INTERNAL_ERROR", "message": str(e) } } def get_trending_topics( self, top_n: Optional[int] = None, mode: Optional[str] = None ) -> Dict: """ 获取个人关注词的新闻出现频率统计 注意:本工具基于 config/frequency_words.txt 中的个人关注词列表进行统计, 而不是自动从新闻中提取热点话题。这是一个个人可定制的关注词列表, 用户可以根据自己的兴趣添加或删除关注词。 Args: top_n: 返回TOP N关注词,默认10 mode: 模式 - daily(当日累计), current(最新一批), incremental(增量) Returns: 关注词频率统计字典,包含每个关注词在新闻中出现的次数 Example: >>> tools = DataQueryTools() >>> result = tools.get_trending_topics(top_n=5, mode="current") >>> print(len(result['topics'])) 5 >>> # 返回的是你在 frequency_words.txt 中设置的关注词的频率统计 """ try: # 参数验证 top_n = validate_top_n(top_n, default=10) valid_modes = ["daily", "current", "incremental"] mode = validate_mode(mode, valid_modes, default="current") # 获取趋势话题 trending_result = self.data_service.get_trending_topics( top_n=top_n, mode=mode ) return { **trending_result, "success": True } except MCPError as e: return { "success": False, "error": e.to_dict() } except Exception as e: return { "success": False, "error": { "code": "INTERNAL_ERROR", "message": str(e) } } def get_news_by_date( self, date_query: Optional[str] = None, platforms: Optional[List[str]] = None, limit: Optional[int] = None, include_url: bool = False ) -> Dict: """ 按日期查询新闻,支持自然语言日期 Args: date_query: 日期查询字符串(可选,默认"今天"),支持: - 相对日期:今天、昨天、前天、3天前、yesterday、3 days ago - 星期:上周一、本周三、last monday、this friday - 绝对日期:2025-10-10、10月10日、2025年10月10日 platforms: 平台ID列表,如 ['zhihu', 'weibo'] limit: 返回条数限制,默认50 include_url: 是否包含URL链接,默认False(节省token) Returns: 新闻列表字典 Example: >>> tools = DataQueryTools() >>> # 不指定日期,默认查询今天 >>> result = tools.get_news_by_date(platforms=['zhihu'], limit=20) >>> # 指定日期 >>> result = tools.get_news_by_date( ... date_query="昨天", ... platforms=['zhihu'], ... limit=20 ... ) >>> print(result['total']) 20 """ try: # 参数验证 - 默认今天 if date_query is None: date_query = "今天" target_date = validate_date_query(date_query) platforms = validate_platforms(platforms) limit = validate_limit(limit, default=50) # 获取数据 news_list = self.data_service.get_news_by_date( target_date=target_date, platforms=platforms, limit=limit, include_url=include_url ) return { "news": news_list, "total": len(news_list), "date": target_date.strftime("%Y-%m-%d"), "date_query": date_query, "platforms": platforms, "success": True } except MCPError as e: return { "success": False, "error": e.to_dict() } except Exception as e: return { "success": False, "error": { "code": "INTERNAL_ERROR", "message": str(e) } }