mirror of
https://gitee.com/houhuan/TrendRadar.git
synced 2026-05-01 01:12:42 +08:00
chore: 更新文档和 AI 对话效果图
- 更新至 mcp-v1.0.1 - 更新 AI 对话效果图 - 完善相关文档
This commit is contained in:
@@ -157,11 +157,10 @@ class AnalyticsTools:
|
||||
self,
|
||||
topic: str,
|
||||
analysis_type: str = "trend",
|
||||
time_range: str = "7d",
|
||||
date_range: Optional[Dict[str, str]] = None,
|
||||
granularity: str = "day",
|
||||
threshold: float = 3.0,
|
||||
time_window: int = 24,
|
||||
lookback_days: int = 7,
|
||||
lookahead_hours: int = 6,
|
||||
confidence_threshold: float = 0.7
|
||||
) -> Dict:
|
||||
@@ -175,11 +174,12 @@ class AnalyticsTools:
|
||||
- "lifecycle": 生命周期分析(从出现到消失的完整周期)
|
||||
- "viral": 异常热度检测(识别突然爆火的话题)
|
||||
- "predict": 话题预测(预测未来可能的热点)
|
||||
time_range: 时间范围(trend模式),默认"7d"(7d/24h/1w/1m/2m)
|
||||
date_range: 日期范围(trend和lifecycle模式),可选
|
||||
- **格式**: {"start": "YYYY-MM-DD", "end": "YYYY-MM-DD"}
|
||||
- **默认**: 不指定时默认分析最近7天
|
||||
granularity: 时间粒度(trend模式),默认"day"(hour/day)
|
||||
threshold: 热度突增倍数阈值(viral模式),默认3.0
|
||||
time_window: 检测时间窗口小时数(viral模式),默认24
|
||||
lookback_days: 回溯天数(lifecycle模式),默认7
|
||||
lookahead_hours: 预测未来小时数(predict模式),默认6
|
||||
confidence_threshold: 置信度阈值(predict模式),默认0.7
|
||||
|
||||
@@ -187,8 +187,8 @@ class AnalyticsTools:
|
||||
趋势分析结果字典
|
||||
|
||||
Examples:
|
||||
- analyze_topic_trend_unified(topic="人工智能", analysis_type="trend", time_range="7d")
|
||||
- analyze_topic_trend_unified(topic="特斯拉", analysis_type="lifecycle", lookback_days=7)
|
||||
- analyze_topic_trend_unified(topic="人工智能", analysis_type="trend", date_range={"start": "2025-10-18", "end": "2025-10-25"})
|
||||
- analyze_topic_trend_unified(topic="特斯拉", analysis_type="lifecycle", date_range={"start": "2025-10-18", "end": "2025-10-25"})
|
||||
- analyze_topic_trend_unified(topic="比特币", analysis_type="viral", threshold=3.0)
|
||||
- analyze_topic_trend_unified(topic="ChatGPT", analysis_type="predict", lookahead_hours=6)
|
||||
"""
|
||||
@@ -206,13 +206,13 @@ class AnalyticsTools:
|
||||
if analysis_type == "trend":
|
||||
return self.get_topic_trend_analysis(
|
||||
topic=topic,
|
||||
time_range=time_range,
|
||||
date_range=date_range,
|
||||
granularity=granularity
|
||||
)
|
||||
elif analysis_type == "lifecycle":
|
||||
return self.analyze_topic_lifecycle(
|
||||
topic=topic,
|
||||
lookback_days=lookback_days
|
||||
date_range=date_range
|
||||
)
|
||||
elif analysis_type == "viral":
|
||||
# viral模式不需要topic参数,使用通用检测
|
||||
@@ -244,7 +244,7 @@ class AnalyticsTools:
|
||||
def get_topic_trend_analysis(
|
||||
self,
|
||||
topic: str,
|
||||
time_range: str = "7d",
|
||||
date_range: Optional[Dict[str, str]] = None,
|
||||
granularity: str = "day"
|
||||
) -> Dict:
|
||||
"""
|
||||
@@ -252,7 +252,9 @@ class AnalyticsTools:
|
||||
|
||||
Args:
|
||||
topic: 话题关键词
|
||||
time_range: 时间范围,格式:7d(7天)、24h(24小时)、1w(1周)、1m(1个月)、2m(2个月)
|
||||
date_range: 日期范围(可选)
|
||||
- **格式**: {"start": "YYYY-MM-DD", "end": "YYYY-MM-DD"}
|
||||
- **默认**: 不指定时默认分析最近7天
|
||||
granularity: 时间粒度,仅支持 day(天)
|
||||
|
||||
Returns:
|
||||
@@ -264,20 +266,20 @@ class AnalyticsTools:
|
||||
- "查看'比特币'过去一周的热度变化"
|
||||
- "看看'iPhone'最近7天的趋势如何"
|
||||
- "分析'特斯拉'最近一个月的热度趋势"
|
||||
- "查看'ChatGPT'过去2个月的趋势变化"
|
||||
- "查看'ChatGPT'2024年12月的趋势变化"
|
||||
|
||||
代码调用示例:
|
||||
>>> tools = AnalyticsTools()
|
||||
>>> # 分析7天趋势
|
||||
>>> result = tools.get_topic_trend_analysis(
|
||||
... topic="人工智能",
|
||||
... time_range="7d",
|
||||
... date_range={"start": "2025-10-18", "end": "2025-10-25"},
|
||||
... granularity="day"
|
||||
... )
|
||||
>>> # 分析1个月趋势
|
||||
>>> # 分析历史月份趋势
|
||||
>>> result = tools.get_topic_trend_analysis(
|
||||
... topic="特斯拉",
|
||||
... time_range="1m",
|
||||
... date_range={"start": "2024-12-01", "end": "2024-12-31"},
|
||||
... granularity="day"
|
||||
... )
|
||||
>>> print(result['trend_data'])
|
||||
@@ -294,15 +296,21 @@ class AnalyticsTools:
|
||||
suggestion="当前仅支持 'day' 粒度,因为底层数据按天聚合"
|
||||
)
|
||||
|
||||
# 解析时间范围
|
||||
days = self._parse_time_range(time_range)
|
||||
# 处理日期范围(不指定时默认最近7天)
|
||||
if date_range:
|
||||
from ..utils.validators import validate_date_range
|
||||
date_range_tuple = validate_date_range(date_range)
|
||||
start_date, end_date = date_range_tuple
|
||||
else:
|
||||
# 默认最近7天
|
||||
end_date = datetime.now()
|
||||
start_date = end_date - timedelta(days=6)
|
||||
|
||||
# 收集趋势数据
|
||||
trend_data = []
|
||||
start_date = datetime.now() - timedelta(days=days)
|
||||
current_date = start_date
|
||||
|
||||
while current_date <= datetime.now():
|
||||
while current_date <= end_date:
|
||||
try:
|
||||
all_titles, _, _ = self.data_service.parser.read_all_titles_for_date(
|
||||
date=current_date
|
||||
@@ -336,6 +344,7 @@ class AnalyticsTools:
|
||||
|
||||
# 计算趋势指标
|
||||
counts = [item["count"] for item in trend_data]
|
||||
total_days = (end_date - start_date).days + 1
|
||||
|
||||
if len(counts) >= 2:
|
||||
# 计算涨跌幅度
|
||||
@@ -359,7 +368,11 @@ class AnalyticsTools:
|
||||
return {
|
||||
"success": True,
|
||||
"topic": topic,
|
||||
"time_range": time_range,
|
||||
"date_range": {
|
||||
"start": start_date.strftime("%Y-%m-%d"),
|
||||
"end": end_date.strftime("%Y-%m-%d"),
|
||||
"total_days": total_days
|
||||
},
|
||||
"granularity": granularity,
|
||||
"trend_data": trend_data,
|
||||
"statistics": {
|
||||
@@ -1452,14 +1465,16 @@ class AnalyticsTools:
|
||||
def analyze_topic_lifecycle(
|
||||
self,
|
||||
topic: str,
|
||||
lookback_days: int = 7
|
||||
date_range: Optional[Dict[str, str]] = None
|
||||
) -> Dict:
|
||||
"""
|
||||
话题生命周期分析 - 追踪话题从出现到消失的完整周期
|
||||
|
||||
Args:
|
||||
topic: 话题关键词
|
||||
lookback_days: 回溯天数
|
||||
date_range: 日期范围(可选)
|
||||
- **格式**: {"start": "YYYY-MM-DD", "end": "YYYY-MM-DD"}
|
||||
- **默认**: 不指定时默认分析最近7天
|
||||
|
||||
Returns:
|
||||
话题生命周期分析结果
|
||||
@@ -1474,21 +1489,28 @@ class AnalyticsTools:
|
||||
>>> tools = AnalyticsTools()
|
||||
>>> result = tools.analyze_topic_lifecycle(
|
||||
... topic="人工智能",
|
||||
... lookback_days=7
|
||||
... date_range={"start": "2025-10-18", "end": "2025-10-25"}
|
||||
... )
|
||||
>>> print(result['lifecycle_stage'])
|
||||
"""
|
||||
try:
|
||||
# 参数验证
|
||||
topic = validate_keyword(topic)
|
||||
lookback_days = validate_limit(lookback_days, default=7, max_limit=30)
|
||||
|
||||
# 处理日期范围(不指定时默认最近7天)
|
||||
if date_range:
|
||||
from ..utils.validators import validate_date_range
|
||||
date_range_tuple = validate_date_range(date_range)
|
||||
start_date, end_date = date_range_tuple
|
||||
else:
|
||||
# 默认最近7天
|
||||
end_date = datetime.now()
|
||||
start_date = end_date - timedelta(days=6)
|
||||
|
||||
# 收集话题历史数据
|
||||
lifecycle_data = []
|
||||
start_date = datetime.now() - timedelta(days=lookback_days)
|
||||
|
||||
current_date = start_date
|
||||
while current_date <= datetime.now():
|
||||
while current_date <= end_date:
|
||||
try:
|
||||
all_titles, _, _ = self.data_service.parser.read_all_titles_for_date(
|
||||
date=current_date
|
||||
@@ -1514,12 +1536,16 @@ class AnalyticsTools:
|
||||
|
||||
current_date += timedelta(days=1)
|
||||
|
||||
# 计算分析天数
|
||||
total_days = (end_date - start_date).days + 1
|
||||
|
||||
# 分析生命周期阶段
|
||||
counts = [item["count"] for item in lifecycle_data]
|
||||
|
||||
if not any(counts):
|
||||
time_desc = f"{start_date.strftime('%Y-%m-%d')} 至 {end_date.strftime('%Y-%m-%d')}"
|
||||
raise DataNotFoundError(
|
||||
f"在过去 {lookback_days} 天内未找到话题 '{topic}'",
|
||||
f"在 {time_desc} 内未找到话题 '{topic}'",
|
||||
suggestion="请尝试其他话题或扩大时间范围"
|
||||
)
|
||||
|
||||
@@ -1554,7 +1580,7 @@ class AnalyticsTools:
|
||||
|
||||
if active_days <= 2 and max_count > avg_count * 2:
|
||||
topic_type = "昙花一现"
|
||||
elif active_days >= lookback_days * 0.6:
|
||||
elif active_days >= total_days * 0.6:
|
||||
topic_type = "持续热点"
|
||||
else:
|
||||
topic_type = "周期性热点"
|
||||
@@ -1562,7 +1588,11 @@ class AnalyticsTools:
|
||||
return {
|
||||
"success": True,
|
||||
"topic": topic,
|
||||
"lookback_days": lookback_days,
|
||||
"date_range": {
|
||||
"start": start_date.strftime("%Y-%m-%d"),
|
||||
"end": end_date.strftime("%Y-%m-%d"),
|
||||
"total_days": total_days
|
||||
},
|
||||
"lifecycle_data": lifecycle_data,
|
||||
"analysis": {
|
||||
"first_appearance": first_appearance,
|
||||
@@ -1890,29 +1920,6 @@ class AnalyticsTools:
|
||||
|
||||
# ==================== 辅助方法 ====================
|
||||
|
||||
def _parse_time_range(self, time_range: str) -> int:
|
||||
"""解析时间范围字符串为天数"""
|
||||
match = re.match(r'(\d+)([dhwm])', time_range.lower())
|
||||
if not match:
|
||||
raise InvalidParameterError(
|
||||
f"无效的时间范围格式: {time_range}",
|
||||
suggestion="格式示例:7d(7天)、24h(24小时)、1w(1周)、1m(1个月)、2m(2个月)"
|
||||
)
|
||||
|
||||
value = int(match.group(1))
|
||||
unit = match.group(2)
|
||||
|
||||
if unit == 'h':
|
||||
return max(1, value // 24) # 转换为天数
|
||||
elif unit == 'd':
|
||||
return value
|
||||
elif unit == 'w':
|
||||
return value * 7
|
||||
elif unit == 'm':
|
||||
return value * 30 # 1个月按30天计算
|
||||
|
||||
return value
|
||||
|
||||
def _extract_keywords(self, title: str, min_length: int = 2) -> List[str]:
|
||||
"""
|
||||
从标题中提取关键词(简单实现)
|
||||
|
||||
@@ -55,8 +55,11 @@ class SearchTools:
|
||||
- "keyword": 精确关键词匹配(默认)
|
||||
- "fuzzy": 模糊内容匹配(使用相似度算法)
|
||||
- "entity": 实体名称搜索(自动按权重排序)
|
||||
date_range: 日期范围,格式: {"start": "YYYY-MM-DD", "end": "YYYY-MM-DD"}
|
||||
不指定则默认查询今天
|
||||
date_range: 日期范围(可选)
|
||||
- **格式**: {"start": "YYYY-MM-DD", "end": "YYYY-MM-DD"}
|
||||
- **示例**: {"start": "2025-01-01", "end": "2025-01-07"}
|
||||
- **默认**: 不指定时默认查询今天
|
||||
- **注意**: start和end可以相同(表示单日查询)
|
||||
platforms: 平台过滤列表,如 ['zhihu', 'weibo']
|
||||
limit: 返回条数限制,默认50
|
||||
sort_by: 排序方式,可选值:
|
||||
@@ -73,7 +76,7 @@ class SearchTools:
|
||||
- search_news_unified(query="人工智能", search_mode="keyword")
|
||||
- search_news_unified(query="特斯拉降价", search_mode="fuzzy", threshold=0.4)
|
||||
- search_news_unified(query="马斯克", search_mode="entity", limit=20)
|
||||
- search_news_unified(query="iPhone 16发布", search_mode="keyword")
|
||||
- search_news_unified(query="iPhone 16", date_range={"start": "2025-01-01", "end": "2025-01-07"})
|
||||
"""
|
||||
try:
|
||||
# 参数验证
|
||||
@@ -100,8 +103,22 @@ class SearchTools:
|
||||
date_range_tuple = validate_date_range(date_range)
|
||||
start_date, end_date = date_range_tuple
|
||||
else:
|
||||
# 默认今天
|
||||
start_date = end_date = datetime.now()
|
||||
# 不指定日期时,使用最新可用数据日期(而非 datetime.now())
|
||||
earliest, latest = self.data_service.get_available_date_range()
|
||||
|
||||
if latest is None:
|
||||
# 没有任何可用数据
|
||||
return {
|
||||
"success": False,
|
||||
"error": {
|
||||
"code": "NO_DATA_AVAILABLE",
|
||||
"message": "output 目录下没有可用的新闻数据",
|
||||
"suggestion": "请先运行爬虫生成数据,或检查 output 目录"
|
||||
}
|
||||
}
|
||||
|
||||
# 使用最新可用日期
|
||||
start_date = end_date = latest
|
||||
|
||||
# 收集所有匹配的新闻
|
||||
all_matches = []
|
||||
@@ -137,16 +154,34 @@ class SearchTools:
|
||||
current_date += timedelta(days=1)
|
||||
|
||||
if not all_matches:
|
||||
time_desc = "今天" if start_date == end_date else f"{start_date.strftime('%Y-%m-%d')} 至 {end_date.strftime('%Y-%m-%d')}"
|
||||
return {
|
||||
# 获取可用日期范围用于错误提示
|
||||
earliest, latest = self.data_service.get_available_date_range()
|
||||
|
||||
# 判断时间范围描述
|
||||
if start_date.date() == datetime.now().date() and start_date == end_date:
|
||||
time_desc = "今天"
|
||||
elif start_date == end_date:
|
||||
time_desc = start_date.strftime("%Y-%m-%d")
|
||||
else:
|
||||
time_desc = f"{start_date.strftime('%Y-%m-%d')} 至 {end_date.strftime('%Y-%m-%d')}"
|
||||
|
||||
# 构建错误消息
|
||||
if earliest and latest:
|
||||
available_desc = f"{earliest.strftime('%Y-%m-%d')} 至 {latest.strftime('%Y-%m-%d')}"
|
||||
message = f"未找到匹配的新闻(查询范围: {time_desc},可用数据: {available_desc})"
|
||||
else:
|
||||
message = f"未找到匹配的新闻({time_desc})"
|
||||
|
||||
result = {
|
||||
"success": True,
|
||||
"results": [],
|
||||
"total": 0,
|
||||
"query": query,
|
||||
"search_mode": search_mode,
|
||||
"time_range": time_desc,
|
||||
"message": f"未找到匹配的新闻({time_desc})"
|
||||
"message": message
|
||||
}
|
||||
return result
|
||||
|
||||
# 统一排序逻辑
|
||||
if sort_by == "relevance":
|
||||
@@ -160,8 +195,10 @@ class SearchTools:
|
||||
# 限制返回数量
|
||||
results = all_matches[:limit]
|
||||
|
||||
# 构建时间范围描述
|
||||
if start_date == end_date:
|
||||
# 构建时间范围描述(正确判断是否为今天)
|
||||
if start_date.date() == datetime.now().date() and start_date == end_date:
|
||||
time_range_desc = "今天"
|
||||
elif start_date == end_date:
|
||||
time_range_desc = start_date.strftime("%Y-%m-%d")
|
||||
else:
|
||||
time_range_desc = f"{start_date.strftime('%Y-%m-%d')} 至 {end_date.strftime('%Y-%m-%d')}"
|
||||
@@ -457,7 +494,7 @@ class SearchTools:
|
||||
def search_related_news_history(
|
||||
self,
|
||||
reference_text: str,
|
||||
time_range: str = "yesterday",
|
||||
time_preset: str = "yesterday",
|
||||
start_date: Optional[datetime] = None,
|
||||
end_date: Optional[datetime] = None,
|
||||
threshold: float = 0.4,
|
||||
@@ -469,13 +506,13 @@ class SearchTools:
|
||||
|
||||
Args:
|
||||
reference_text: 参考新闻标题或内容
|
||||
time_range: 时间范围预设值,可选:
|
||||
time_preset: 时间范围预设值,可选:
|
||||
- "yesterday": 昨天
|
||||
- "last_week": 上周 (7天)
|
||||
- "last_month": 上个月 (30天)
|
||||
- "custom": 自定义日期范围(需要提供 start_date 和 end_date)
|
||||
start_date: 自定义开始日期(仅当 time_range="custom" 时有效)
|
||||
end_date: 自定义结束日期(仅当 time_range="custom" 时有效)
|
||||
start_date: 自定义开始日期(仅当 time_preset="custom" 时有效)
|
||||
end_date: 自定义结束日期(仅当 time_preset="custom" 时有效)
|
||||
threshold: 相似度阈值 (0-1之间),默认0.4
|
||||
limit: 返回条数限制,默认50
|
||||
include_url: 是否包含URL链接,默认False(节省token)
|
||||
@@ -487,7 +524,7 @@ class SearchTools:
|
||||
>>> tools = SearchTools()
|
||||
>>> result = tools.search_related_news_history(
|
||||
... reference_text="人工智能技术突破",
|
||||
... time_range="last_week",
|
||||
... time_preset="last_week",
|
||||
... threshold=0.4,
|
||||
... limit=50
|
||||
... )
|
||||
@@ -503,16 +540,16 @@ class SearchTools:
|
||||
# 确定查询日期范围
|
||||
today = datetime.now()
|
||||
|
||||
if time_range == "yesterday":
|
||||
if time_preset == "yesterday":
|
||||
search_start = today - timedelta(days=1)
|
||||
search_end = today - timedelta(days=1)
|
||||
elif time_range == "last_week":
|
||||
elif time_preset == "last_week":
|
||||
search_start = today - timedelta(days=7)
|
||||
search_end = today - timedelta(days=1)
|
||||
elif time_range == "last_month":
|
||||
elif time_preset == "last_month":
|
||||
search_start = today - timedelta(days=30)
|
||||
search_end = today - timedelta(days=1)
|
||||
elif time_range == "custom":
|
||||
elif time_preset == "custom":
|
||||
if not start_date or not end_date:
|
||||
raise InvalidParameterError(
|
||||
"自定义时间范围需要提供 start_date 和 end_date",
|
||||
@@ -522,7 +559,7 @@ class SearchTools:
|
||||
search_end = end_date
|
||||
else:
|
||||
raise InvalidParameterError(
|
||||
f"不支持的时间范围: {time_range}",
|
||||
f"不支持的时间范围: {time_preset}",
|
||||
suggestion="请使用 'yesterday', 'last_week', 'last_month' 或 'custom'"
|
||||
)
|
||||
|
||||
@@ -600,7 +637,7 @@ class SearchTools:
|
||||
"results": [],
|
||||
"total": 0,
|
||||
"query": reference_text,
|
||||
"time_range": time_range,
|
||||
"time_preset": time_preset,
|
||||
"date_range": {
|
||||
"start": search_start.strftime("%Y-%m-%d"),
|
||||
"end": search_end.strftime("%Y-%m-%d")
|
||||
@@ -627,7 +664,7 @@ class SearchTools:
|
||||
"threshold": threshold,
|
||||
"reference_text": reference_text,
|
||||
"reference_keywords": reference_keywords,
|
||||
"time_range": time_range,
|
||||
"time_preset": time_preset,
|
||||
"date_range": {
|
||||
"start": search_start.strftime("%Y-%m-%d"),
|
||||
"end": search_end.strftime("%Y-%m-%d")
|
||||
|
||||
Reference in New Issue
Block a user