TrendRadar/mcp_server/tools/data_query.py
2025-10-20 21:41:24 +08:00

285 lines
8.4 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
数据查询工具
实现P0核心的数据查询工具。
"""
from typing import Dict, List, Optional
from ..services.data_service import DataService
from ..utils.validators import (
validate_platforms,
validate_limit,
validate_keyword,
validate_date_range,
validate_top_n,
validate_mode,
validate_date_query
)
from ..utils.errors import MCPError
class DataQueryTools:
"""数据查询工具类"""
def __init__(self, project_root: str = None):
"""
初始化数据查询工具
Args:
project_root: 项目根目录
"""
self.data_service = DataService(project_root)
def get_latest_news(
self,
platforms: Optional[List[str]] = None,
limit: Optional[int] = None,
include_url: bool = False
) -> Dict:
"""
获取最新一批爬取的新闻数据
Args:
platforms: 平台ID列表如 ['zhihu', 'weibo']
limit: 返回条数限制默认20
include_url: 是否包含URL链接默认False节省token
Returns:
新闻列表字典
Example:
>>> tools = DataQueryTools()
>>> result = tools.get_latest_news(platforms=['zhihu'], limit=10)
>>> print(result['total'])
10
"""
try:
# 参数验证
platforms = validate_platforms(platforms)
limit = validate_limit(limit, default=50)
# 获取数据
news_list = self.data_service.get_latest_news(
platforms=platforms,
limit=limit,
include_url=include_url
)
return {
"news": news_list,
"total": len(news_list),
"platforms": platforms,
"success": True
}
except MCPError as e:
return {
"success": False,
"error": e.to_dict()
}
except Exception as e:
return {
"success": False,
"error": {
"code": "INTERNAL_ERROR",
"message": str(e)
}
}
def search_news_by_keyword(
self,
keyword: str,
date_range: Optional[Dict] = None,
platforms: Optional[List[str]] = None,
limit: Optional[int] = None
) -> Dict:
"""
按关键词搜索历史新闻
Args:
keyword: 搜索关键词(必需)
date_range: 日期范围,格式: {"start": "YYYY-MM-DD", "end": "YYYY-MM-DD"}
platforms: 平台过滤列表
limit: 返回条数限制(可选,默认返回所有)
Returns:
搜索结果字典
Example:
>>> tools = DataQueryTools()
>>> result = tools.search_news_by_keyword(
... keyword="人工智能",
... date_range={"start": "2025-10-01", "end": "2025-10-11"},
... limit=50
... )
>>> print(result['total'])
"""
try:
# 参数验证
keyword = validate_keyword(keyword)
date_range_tuple = validate_date_range(date_range)
platforms = validate_platforms(platforms)
if limit is not None:
limit = validate_limit(limit, default=100)
# 搜索数据
search_result = self.data_service.search_news_by_keyword(
keyword=keyword,
date_range=date_range_tuple,
platforms=platforms,
limit=limit
)
return {
**search_result,
"success": True
}
except MCPError as e:
return {
"success": False,
"error": e.to_dict()
}
except Exception as e:
return {
"success": False,
"error": {
"code": "INTERNAL_ERROR",
"message": str(e)
}
}
def get_trending_topics(
self,
top_n: Optional[int] = None,
mode: Optional[str] = None
) -> Dict:
"""
获取个人关注词的新闻出现频率统计
注意:本工具基于 config/frequency_words.txt 中的个人关注词列表进行统计,
而不是自动从新闻中提取热点话题。这是一个个人可定制的关注词列表,
用户可以根据自己的兴趣添加或删除关注词。
Args:
top_n: 返回TOP N关注词默认10
mode: 模式 - daily(当日累计), current(最新一批), incremental(增量)
Returns:
关注词频率统计字典,包含每个关注词在新闻中出现的次数
Example:
>>> tools = DataQueryTools()
>>> result = tools.get_trending_topics(top_n=5, mode="current")
>>> print(len(result['topics']))
5
>>> # 返回的是你在 frequency_words.txt 中设置的关注词的频率统计
"""
try:
# 参数验证
top_n = validate_top_n(top_n, default=10)
valid_modes = ["daily", "current", "incremental"]
mode = validate_mode(mode, valid_modes, default="current")
# 获取趋势话题
trending_result = self.data_service.get_trending_topics(
top_n=top_n,
mode=mode
)
return {
**trending_result,
"success": True
}
except MCPError as e:
return {
"success": False,
"error": e.to_dict()
}
except Exception as e:
return {
"success": False,
"error": {
"code": "INTERNAL_ERROR",
"message": str(e)
}
}
def get_news_by_date(
self,
date_query: Optional[str] = None,
platforms: Optional[List[str]] = None,
limit: Optional[int] = None,
include_url: bool = False
) -> Dict:
"""
按日期查询新闻,支持自然语言日期
Args:
date_query: 日期查询字符串(可选,默认"今天"),支持:
- 相对日期今天、昨天、前天、3天前、yesterday、3 days ago
- 星期上周一、本周三、last monday、this friday
- 绝对日期2025-10-10、10月10日、2025年10月10日
platforms: 平台ID列表如 ['zhihu', 'weibo']
limit: 返回条数限制默认50
include_url: 是否包含URL链接默认False节省token
Returns:
新闻列表字典
Example:
>>> tools = DataQueryTools()
>>> # 不指定日期,默认查询今天
>>> result = tools.get_news_by_date(platforms=['zhihu'], limit=20)
>>> # 指定日期
>>> result = tools.get_news_by_date(
... date_query="昨天",
... platforms=['zhihu'],
... limit=20
... )
>>> print(result['total'])
20
"""
try:
# 参数验证 - 默认今天
if date_query is None:
date_query = "今天"
target_date = validate_date_query(date_query)
platforms = validate_platforms(platforms)
limit = validate_limit(limit, default=50)
# 获取数据
news_list = self.data_service.get_news_by_date(
target_date=target_date,
platforms=platforms,
limit=limit,
include_url=include_url
)
return {
"news": news_list,
"total": len(news_list),
"date": target_date.strftime("%Y-%m-%d"),
"date_query": date_query,
"platforms": platforms,
"success": True
}
except MCPError as e:
return {
"success": False,
"error": e.to_dict()
}
except Exception as e:
return {
"success": False,
"error": {
"code": "INTERNAL_ERROR",
"message": str(e)
}
}