mirror of
https://gitee.com/houhuan/TrendRadar.git
synced 2025-12-21 15:57:16 +08:00
v1.2.0
This commit is contained in:
parent
44a3f9dc47
commit
293c659673
719
main.py
719
main.py
@ -13,18 +13,26 @@ import requests
|
|||||||
import pytz
|
import pytz
|
||||||
|
|
||||||
CONFIG = {
|
CONFIG = {
|
||||||
"VERSION": "1.1.0",
|
"VERSION": "1.2.0",
|
||||||
"VERSION_CHECK_URL": "https://raw.githubusercontent.com/sansan0/TrendRadar/refs/heads/master/version",
|
"VERSION_CHECK_URL": "https://raw.githubusercontent.com/sansan0/TrendRadar/refs/heads/master/version",
|
||||||
"FEISHU_SHOW_VERSION_UPDATE": True, # 控制显示版本更新提示,改成 False 将不接受新版本提示
|
"SHOW_VERSION_UPDATE": True, # 控制显示版本更新提示,改成 False 将不接受新版本提示
|
||||||
"FEISHU_SEPARATOR": "━━━━━━━━━━━━━━━━━━━", # 飞书消息分割线,注意,其它类型的分割线可能会被飞书过滤而不显示
|
"FEISHU_MESSAGE_SEPARATOR": "━━━━━━━━━━━━━━━━━━━", # feishu消息分割线
|
||||||
"REQUEST_INTERVAL": 1000, # 请求间隔(毫秒)
|
"REQUEST_INTERVAL": 1000, # 请求间隔(毫秒)
|
||||||
"FEISHU_REPORT_TYPE": "daily", # 飞书报告类型: "current"|"daily"|"both"
|
"REPORT_TYPE": "daily", # 报告类型: "current"|"daily"|"both"
|
||||||
"RANK_THRESHOLD": 5, # 排名高亮阈值
|
"RANK_THRESHOLD": 5, # 排名高亮阈值
|
||||||
"USE_PROXY": True, # 是否启用代理
|
"USE_PROXY": True, # 是否启用代理
|
||||||
"DEFAULT_PROXY": "http://127.0.0.1:10086",
|
"DEFAULT_PROXY": "http://127.0.0.1:10086",
|
||||||
"CONTINUE_WITHOUT_FEISHU": True, # 控制在没有飞书 webhook URL 时是否继续执行爬虫, 如果 True ,会依然进行爬虫行为,并在 github 上持续的生成爬取的新闻数据
|
"CONTINUE_WITHOUT_WEBHOOK": True, # 控制在没有webhook URL时是否继续执行爬虫
|
||||||
"FEISHU_WEBHOOK_URL": "", # 飞书机器人的 webhook URL,大概长这样:https://www.feishu.cn/flow/api/trigger-webhook/xxxx, 默认为空,推荐通过GitHub Secrets设置
|
# 飞书机器人的 webhook URL
|
||||||
# 用于让关注度更高的新闻在更前面显示,这里是权重排序配置,合起来是 1就行(你可以微调,虽然我不建议动嘿嘿)
|
"FEISHU_WEBHOOK_URL": "",
|
||||||
|
# 钉钉机器人的 webhook URL
|
||||||
|
"DINGTALK_WEBHOOK_URL": "",
|
||||||
|
# 企业微信机器人的 webhook URL
|
||||||
|
"WEWORK_WEBHOOK_URL": "",
|
||||||
|
# Telegram 要填两个
|
||||||
|
"TELEGRAM_BOT_TOKEN": "",
|
||||||
|
"TELEGRAM_CHAT_ID": "",
|
||||||
|
# 用于让关注度更高的新闻在更前面显示,这里是权重排序配置,合起来是 1 就行
|
||||||
"WEIGHT_CONFIG": {
|
"WEIGHT_CONFIG": {
|
||||||
"RANK_WEIGHT": 0.6, # 排名
|
"RANK_WEIGHT": 0.6, # 排名
|
||||||
"FREQUENCY_WEIGHT": 0.3, # 频次
|
"FREQUENCY_WEIGHT": 0.3, # 频次
|
||||||
@ -907,9 +915,21 @@ class StatisticsCalculator:
|
|||||||
if format_type == "html":
|
if format_type == "html":
|
||||||
highlight_start = "<font color='red'><strong>"
|
highlight_start = "<font color='red'><strong>"
|
||||||
highlight_end = "</strong></font>"
|
highlight_end = "</strong></font>"
|
||||||
else: # feishu
|
elif format_type == "feishu":
|
||||||
highlight_start = "<font color='red'>**"
|
highlight_start = "<font color='red'>**"
|
||||||
highlight_end = "**</font>"
|
highlight_end = "**</font>"
|
||||||
|
elif format_type == "dingtalk":
|
||||||
|
highlight_start = "**"
|
||||||
|
highlight_end = "**"
|
||||||
|
elif format_type == "wework":
|
||||||
|
highlight_start = "**"
|
||||||
|
highlight_end = "**"
|
||||||
|
elif format_type == "telegram":
|
||||||
|
highlight_start = "<b>"
|
||||||
|
highlight_end = "</b>"
|
||||||
|
else:
|
||||||
|
highlight_start = "**"
|
||||||
|
highlight_end = "**"
|
||||||
|
|
||||||
# 格式化排名显示
|
# 格式化排名显示
|
||||||
if min_rank <= rank_threshold:
|
if min_rank <= rank_threshold:
|
||||||
@ -933,6 +953,21 @@ class StatisticsCalculator:
|
|||||||
"""格式化飞书排名显示"""
|
"""格式化飞书排名显示"""
|
||||||
return StatisticsCalculator._format_rank_base(ranks, rank_threshold, "feishu")
|
return StatisticsCalculator._format_rank_base(ranks, rank_threshold, "feishu")
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _format_rank_for_dingtalk(ranks: List[int], rank_threshold: int = 5) -> str:
|
||||||
|
"""格式化钉钉排名显示"""
|
||||||
|
return StatisticsCalculator._format_rank_base(ranks, rank_threshold, "dingtalk")
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _format_rank_for_wework(ranks: List[int], rank_threshold: int = 5) -> str:
|
||||||
|
"""格式化企业微信排名显示"""
|
||||||
|
return StatisticsCalculator._format_rank_base(ranks, rank_threshold, "wework")
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _format_rank_for_telegram(ranks: List[int], rank_threshold: int = 5) -> str:
|
||||||
|
"""格式化Telegram排名显示"""
|
||||||
|
return StatisticsCalculator._format_rank_base(ranks, rank_threshold, "telegram")
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _format_time_display(first_time: str, last_time: str) -> str:
|
def _format_time_display(first_time: str, last_time: str) -> str:
|
||||||
"""格式化时间显示"""
|
"""格式化时间显示"""
|
||||||
@ -1327,6 +1362,93 @@ class ReportGenerator:
|
|||||||
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _format_title_dingtalk(title_data: Dict, show_source: bool = True) -> str:
|
||||||
|
"""格式化钉钉标题显示"""
|
||||||
|
rank_display = StatisticsCalculator._format_rank_for_dingtalk(
|
||||||
|
title_data["ranks"], title_data["rank_threshold"]
|
||||||
|
)
|
||||||
|
|
||||||
|
link_url = title_data["mobile_url"] or title_data["url"]
|
||||||
|
if link_url:
|
||||||
|
formatted_title = f"[{title_data['title']}]({link_url})"
|
||||||
|
else:
|
||||||
|
formatted_title = title_data["title"]
|
||||||
|
|
||||||
|
title_prefix = "🆕 " if title_data["is_new"] else ""
|
||||||
|
|
||||||
|
if show_source:
|
||||||
|
result = f"[{title_data['source_alias']}] {title_prefix}{formatted_title}"
|
||||||
|
else:
|
||||||
|
result = f"{title_prefix}{formatted_title}"
|
||||||
|
|
||||||
|
if rank_display:
|
||||||
|
result += f" {rank_display}"
|
||||||
|
if title_data["time_display"]:
|
||||||
|
result += f" - {title_data['time_display']}"
|
||||||
|
if title_data["count"] > 1:
|
||||||
|
result += f" ({title_data['count']}次)"
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _format_title_wework(title_data: Dict, show_source: bool = True) -> str:
|
||||||
|
"""格式化企业微信标题显示"""
|
||||||
|
rank_display = StatisticsCalculator._format_rank_for_wework(
|
||||||
|
title_data["ranks"], title_data["rank_threshold"]
|
||||||
|
)
|
||||||
|
|
||||||
|
link_url = title_data["mobile_url"] or title_data["url"]
|
||||||
|
if link_url:
|
||||||
|
formatted_title = f"[{title_data['title']}]({link_url})"
|
||||||
|
else:
|
||||||
|
formatted_title = title_data["title"]
|
||||||
|
|
||||||
|
title_prefix = "🆕 " if title_data["is_new"] else ""
|
||||||
|
|
||||||
|
if show_source:
|
||||||
|
result = f"[{title_data['source_alias']}] {title_prefix}{formatted_title}"
|
||||||
|
else:
|
||||||
|
result = f"{title_prefix}{formatted_title}"
|
||||||
|
|
||||||
|
if rank_display:
|
||||||
|
result += f" {rank_display}"
|
||||||
|
if title_data["time_display"]:
|
||||||
|
result += f" - {title_data['time_display']}"
|
||||||
|
if title_data["count"] > 1:
|
||||||
|
result += f" ({title_data['count']}次)"
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _format_title_telegram(title_data: Dict, show_source: bool = True) -> str:
|
||||||
|
"""格式化Telegram标题显示"""
|
||||||
|
rank_display = StatisticsCalculator._format_rank_for_telegram(
|
||||||
|
title_data["ranks"], title_data["rank_threshold"]
|
||||||
|
)
|
||||||
|
|
||||||
|
link_url = title_data["mobile_url"] or title_data["url"]
|
||||||
|
if link_url:
|
||||||
|
formatted_title = f'<a href="{link_url}">{ReportGenerator._html_escape(title_data["title"])}</a>'
|
||||||
|
else:
|
||||||
|
formatted_title = title_data["title"]
|
||||||
|
|
||||||
|
title_prefix = "🆕 " if title_data["is_new"] else ""
|
||||||
|
|
||||||
|
if show_source:
|
||||||
|
result = f"[{title_data['source_alias']}] {title_prefix}{formatted_title}"
|
||||||
|
else:
|
||||||
|
result = f"{title_prefix}{formatted_title}"
|
||||||
|
|
||||||
|
if rank_display:
|
||||||
|
result += f" {rank_display}"
|
||||||
|
if title_data["time_display"]:
|
||||||
|
result += f" <code>- {title_data['time_display']}</code>"
|
||||||
|
if title_data["count"] > 1:
|
||||||
|
result += f" <code>({title_data['count']}次)</code>"
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _render_feishu_content(
|
def _render_feishu_content(
|
||||||
report_data: Dict, update_info: Optional[Dict] = None
|
report_data: Dict, update_info: Optional[Dict] = None
|
||||||
@ -1363,7 +1485,7 @@ class ReportGenerator:
|
|||||||
text_content += "\n"
|
text_content += "\n"
|
||||||
|
|
||||||
if i < len(report_data["stats"]) - 1:
|
if i < len(report_data["stats"]) - 1:
|
||||||
text_content += f"\n{CONFIG['FEISHU_SEPARATOR']}\n\n"
|
text_content += f"\n{CONFIG['FEISHU_MESSAGE_SEPARATOR']}\n\n"
|
||||||
|
|
||||||
if not text_content:
|
if not text_content:
|
||||||
text_content = "📭 暂无匹配的热点词汇\n\n"
|
text_content = "📭 暂无匹配的热点词汇\n\n"
|
||||||
@ -1371,7 +1493,7 @@ class ReportGenerator:
|
|||||||
# 渲染新增新闻部分
|
# 渲染新增新闻部分
|
||||||
if report_data["new_titles"]:
|
if report_data["new_titles"]:
|
||||||
if text_content and "暂无匹配" not in text_content:
|
if text_content and "暂无匹配" not in text_content:
|
||||||
text_content += f"\n{CONFIG['FEISHU_SEPARATOR']}\n\n"
|
text_content += f"\n{CONFIG['FEISHU_MESSAGE_SEPARATOR']}\n\n"
|
||||||
|
|
||||||
text_content += (
|
text_content += (
|
||||||
f"🆕 **本次新增热点新闻** (共 {report_data['total_new_count']} 条)\n\n"
|
f"🆕 **本次新增热点新闻** (共 {report_data['total_new_count']} 条)\n\n"
|
||||||
@ -1393,7 +1515,7 @@ class ReportGenerator:
|
|||||||
# 渲染失败平台
|
# 渲染失败平台
|
||||||
if report_data["failed_ids"]:
|
if report_data["failed_ids"]:
|
||||||
if text_content and "暂无匹配" not in text_content:
|
if text_content and "暂无匹配" not in text_content:
|
||||||
text_content += f"\n{CONFIG['FEISHU_SEPARATOR']}\n\n"
|
text_content += f"\n{CONFIG['FEISHU_MESSAGE_SEPARATOR']}\n\n"
|
||||||
|
|
||||||
text_content += "⚠️ **数据获取失败的平台:**\n\n"
|
text_content += "⚠️ **数据获取失败的平台:**\n\n"
|
||||||
for i, id_value in enumerate(report_data["failed_ids"], 1):
|
for i, id_value in enumerate(report_data["failed_ids"], 1):
|
||||||
@ -1410,31 +1532,380 @@ class ReportGenerator:
|
|||||||
return text_content
|
return text_content
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def send_to_feishu(
|
def _render_dingtalk_content(
|
||||||
|
report_data: Dict, update_info: Optional[Dict] = None
|
||||||
|
) -> str:
|
||||||
|
"""渲染钉钉内容"""
|
||||||
|
text_content = ""
|
||||||
|
|
||||||
|
# 计算总标题数
|
||||||
|
total_titles = sum(
|
||||||
|
len(stat["titles"]) for stat in report_data["stats"] if stat["count"] > 0
|
||||||
|
)
|
||||||
|
now = TimeHelper.get_beijing_time()
|
||||||
|
|
||||||
|
# 顶部统计信息
|
||||||
|
text_content += f"**总新闻数:** {total_titles}\n\n"
|
||||||
|
text_content += f"**时间:** {now.strftime('%Y-%m-%d %H:%M:%S')}\n\n"
|
||||||
|
text_content += f"**类型:** 热点分析报告\n\n"
|
||||||
|
|
||||||
|
text_content += "---\n\n"
|
||||||
|
|
||||||
|
# 渲染热点词汇统计
|
||||||
|
if report_data["stats"]:
|
||||||
|
text_content += "📊 **热点词汇统计**\n\n"
|
||||||
|
|
||||||
|
total_count = len(report_data["stats"])
|
||||||
|
|
||||||
|
for i, stat in enumerate(report_data["stats"]):
|
||||||
|
word = stat["word"]
|
||||||
|
count = stat["count"]
|
||||||
|
|
||||||
|
sequence_display = f"[{i + 1}/{total_count}]"
|
||||||
|
|
||||||
|
if count >= 10:
|
||||||
|
text_content += (
|
||||||
|
f"🔥 {sequence_display} **{word}** : **{count}** 条\n\n"
|
||||||
|
)
|
||||||
|
elif count >= 5:
|
||||||
|
text_content += (
|
||||||
|
f"📈 {sequence_display} **{word}** : **{count}** 条\n\n"
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
text_content += f"📌 {sequence_display} **{word}** : {count} 条\n\n"
|
||||||
|
|
||||||
|
for j, title_data in enumerate(stat["titles"], 1):
|
||||||
|
formatted_title = ReportGenerator._format_title_dingtalk(
|
||||||
|
title_data, show_source=True
|
||||||
|
)
|
||||||
|
text_content += f" {j}. {formatted_title}\n"
|
||||||
|
|
||||||
|
if j < len(stat["titles"]):
|
||||||
|
text_content += "\n"
|
||||||
|
|
||||||
|
if i < len(report_data["stats"]) - 1:
|
||||||
|
text_content += f"\n---\n\n"
|
||||||
|
|
||||||
|
if not report_data["stats"]:
|
||||||
|
text_content += "📭 暂无匹配的热点词汇\n\n"
|
||||||
|
|
||||||
|
# 渲染新增新闻部分
|
||||||
|
if report_data["new_titles"]:
|
||||||
|
if text_content and "暂无匹配" not in text_content:
|
||||||
|
text_content += f"\n---\n\n"
|
||||||
|
|
||||||
|
text_content += (
|
||||||
|
f"🆕 **本次新增热点新闻** (共 {report_data['total_new_count']} 条)\n\n"
|
||||||
|
)
|
||||||
|
|
||||||
|
for source_data in report_data["new_titles"]:
|
||||||
|
text_content += f"**{source_data['source_alias']}** ({len(source_data['titles'])} 条):\n\n"
|
||||||
|
|
||||||
|
for j, title_data in enumerate(source_data["titles"], 1):
|
||||||
|
title_data_copy = title_data.copy()
|
||||||
|
title_data_copy["is_new"] = False
|
||||||
|
formatted_title = ReportGenerator._format_title_dingtalk(
|
||||||
|
title_data_copy, show_source=False
|
||||||
|
)
|
||||||
|
text_content += f" {j}. {formatted_title}\n"
|
||||||
|
|
||||||
|
text_content += "\n"
|
||||||
|
|
||||||
|
# 渲染失败平台
|
||||||
|
if report_data["failed_ids"]:
|
||||||
|
if text_content and "暂无匹配" not in text_content:
|
||||||
|
text_content += f"\n---\n\n"
|
||||||
|
|
||||||
|
text_content += "⚠️ **数据获取失败的平台:**\n\n"
|
||||||
|
for i, id_value in enumerate(report_data["failed_ids"], 1):
|
||||||
|
text_content += f" • **{id_value}**\n"
|
||||||
|
|
||||||
|
# 添加时间戳
|
||||||
|
text_content += f"\n\n> 更新时间:{now.strftime('%Y-%m-%d %H:%M:%S')}"
|
||||||
|
|
||||||
|
# 版本更新提示
|
||||||
|
if update_info:
|
||||||
|
text_content += f"\n> TrendRadar 发现新版本 **{update_info['remote_version']}**,当前 **{update_info['current_version']}**"
|
||||||
|
|
||||||
|
return text_content
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _render_wework_content(
|
||||||
|
report_data: Dict, update_info: Optional[Dict] = None
|
||||||
|
) -> str:
|
||||||
|
"""渲染企业微信内容"""
|
||||||
|
text_content = ""
|
||||||
|
|
||||||
|
# 计算总标题数
|
||||||
|
total_titles = sum(
|
||||||
|
len(stat["titles"]) for stat in report_data["stats"] if stat["count"] > 0
|
||||||
|
)
|
||||||
|
now = TimeHelper.get_beijing_time()
|
||||||
|
|
||||||
|
# 顶部统计信息
|
||||||
|
text_content += f"**总新闻数:** {total_titles}\n\n"
|
||||||
|
text_content += f"**时间:** {now.strftime('%Y-%m-%d %H:%M:%S')}\n\n"
|
||||||
|
text_content += f"**类型:** 热点分析报告\n\n\n\n"
|
||||||
|
|
||||||
|
# 渲染热点词汇统计
|
||||||
|
if report_data["stats"]:
|
||||||
|
text_content += "📊 **热点词汇统计**\n\n"
|
||||||
|
|
||||||
|
total_count = len(report_data["stats"])
|
||||||
|
|
||||||
|
for i, stat in enumerate(report_data["stats"]):
|
||||||
|
word = stat["word"]
|
||||||
|
count = stat["count"]
|
||||||
|
|
||||||
|
sequence_display = f"[{i + 1}/{total_count}]"
|
||||||
|
|
||||||
|
if count >= 10:
|
||||||
|
text_content += (
|
||||||
|
f"🔥 {sequence_display} **{word}** : **{count}** 条\n\n"
|
||||||
|
)
|
||||||
|
elif count >= 5:
|
||||||
|
text_content += (
|
||||||
|
f"📈 {sequence_display} **{word}** : **{count}** 条\n\n"
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
text_content += f"📌 {sequence_display} **{word}** : {count} 条\n\n"
|
||||||
|
|
||||||
|
for j, title_data in enumerate(stat["titles"], 1):
|
||||||
|
formatted_title = ReportGenerator._format_title_wework(
|
||||||
|
title_data, show_source=True
|
||||||
|
)
|
||||||
|
text_content += f" {j}. {formatted_title}\n"
|
||||||
|
|
||||||
|
if j < len(stat["titles"]):
|
||||||
|
text_content += "\n"
|
||||||
|
|
||||||
|
if i < len(report_data["stats"]) - 1:
|
||||||
|
text_content += f"\n\n\n\n"
|
||||||
|
|
||||||
|
if not report_data["stats"]:
|
||||||
|
text_content += "📭 暂无匹配的热点词汇\n\n"
|
||||||
|
|
||||||
|
# 渲染新增新闻部分
|
||||||
|
if report_data["new_titles"]:
|
||||||
|
if text_content and "暂无匹配" not in text_content:
|
||||||
|
text_content += f"\n\n\n\n"
|
||||||
|
|
||||||
|
text_content += (
|
||||||
|
f"🆕 **本次新增热点新闻** (共 {report_data['total_new_count']} 条)\n\n"
|
||||||
|
)
|
||||||
|
|
||||||
|
for source_data in report_data["new_titles"]:
|
||||||
|
text_content += f"**{source_data['source_alias']}** ({len(source_data['titles'])} 条):\n\n"
|
||||||
|
|
||||||
|
for j, title_data in enumerate(source_data["titles"], 1):
|
||||||
|
title_data_copy = title_data.copy()
|
||||||
|
title_data_copy["is_new"] = False
|
||||||
|
formatted_title = ReportGenerator._format_title_wework(
|
||||||
|
title_data_copy, show_source=False
|
||||||
|
)
|
||||||
|
text_content += f" {j}. {formatted_title}\n"
|
||||||
|
|
||||||
|
text_content += "\n"
|
||||||
|
|
||||||
|
# 渲染失败平台
|
||||||
|
if report_data["failed_ids"]:
|
||||||
|
if text_content and "暂无匹配" not in text_content:
|
||||||
|
text_content += f"\n\n\n\n"
|
||||||
|
|
||||||
|
text_content += "⚠️ **数据获取失败的平台:**\n\n"
|
||||||
|
for i, id_value in enumerate(report_data["failed_ids"], 1):
|
||||||
|
text_content += f" • {id_value}\n"
|
||||||
|
|
||||||
|
# 添加时间戳
|
||||||
|
text_content += f"\n\n\n> 更新时间:{now.strftime('%Y-%m-%d %H:%M:%S')}"
|
||||||
|
|
||||||
|
# 版本更新提示
|
||||||
|
if update_info:
|
||||||
|
text_content += f"\n> TrendRadar 发现新版本 **{update_info['remote_version']}**,当前 **{update_info['current_version']}**"
|
||||||
|
|
||||||
|
return text_content
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _render_telegram_content(
|
||||||
|
report_data: Dict, update_info: Optional[Dict] = None
|
||||||
|
) -> str:
|
||||||
|
"""渲染Telegram内容"""
|
||||||
|
text_content = ""
|
||||||
|
|
||||||
|
# 计算总标题数
|
||||||
|
total_titles = sum(
|
||||||
|
len(stat["titles"]) for stat in report_data["stats"] if stat["count"] > 0
|
||||||
|
)
|
||||||
|
now = TimeHelper.get_beijing_time()
|
||||||
|
|
||||||
|
# 顶部统计信息
|
||||||
|
text_content += f"<b>总新闻数:</b> <code>{total_titles}</code>\n"
|
||||||
|
text_content += (
|
||||||
|
f"<b>时间:</b> <code>{now.strftime('%Y-%m-%d %H:%M:%S')}</code>\n"
|
||||||
|
)
|
||||||
|
text_content += f"<b>类型:</b> <code>热点分析报告</code>\n\n"
|
||||||
|
|
||||||
|
text_content += "━━━━━━━━━━━━━━━━━━━\n\n"
|
||||||
|
|
||||||
|
# 渲染热点词汇统计
|
||||||
|
if report_data["stats"]:
|
||||||
|
text_content += "📊 <b>热点词汇统计</b>\n\n"
|
||||||
|
|
||||||
|
total_count = len(report_data["stats"])
|
||||||
|
|
||||||
|
for i, stat in enumerate(report_data["stats"]):
|
||||||
|
word = stat["word"]
|
||||||
|
count = stat["count"]
|
||||||
|
|
||||||
|
sequence_display = f"<code>[{i + 1}/{total_count}]</code>"
|
||||||
|
|
||||||
|
if count >= 10:
|
||||||
|
text_content += (
|
||||||
|
f"🔥 {sequence_display} <b>{word}</b> : <b>{count}</b> 条\n\n"
|
||||||
|
)
|
||||||
|
elif count >= 5:
|
||||||
|
text_content += (
|
||||||
|
f"📈 {sequence_display} <b>{word}</b> : <b>{count}</b> 条\n\n"
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
text_content += (
|
||||||
|
f"📌 {sequence_display} <b>{word}</b> : {count} 条\n\n"
|
||||||
|
)
|
||||||
|
|
||||||
|
for j, title_data in enumerate(stat["titles"], 1):
|
||||||
|
formatted_title = ReportGenerator._format_title_telegram(
|
||||||
|
title_data, show_source=True
|
||||||
|
)
|
||||||
|
text_content += f" {j}. {formatted_title}\n"
|
||||||
|
|
||||||
|
if j < len(stat["titles"]):
|
||||||
|
text_content += "\n"
|
||||||
|
|
||||||
|
if i < len(report_data["stats"]) - 1:
|
||||||
|
text_content += f"\n━━━━━━━━━━━━━━━━━━━\n\n"
|
||||||
|
|
||||||
|
if not report_data["stats"]:
|
||||||
|
text_content += "📭 暂无匹配的热点词汇\n\n"
|
||||||
|
|
||||||
|
# 渲染新增新闻部分
|
||||||
|
if report_data["new_titles"]:
|
||||||
|
if text_content and "暂无匹配" not in text_content:
|
||||||
|
text_content += f"\n━━━━━━━━━━━━━━━━━━━\n\n"
|
||||||
|
|
||||||
|
text_content += f"🆕 <b>本次新增热点新闻</b> (共 {report_data['total_new_count']} 条)\n\n"
|
||||||
|
|
||||||
|
for source_data in report_data["new_titles"]:
|
||||||
|
text_content += f"<b>{source_data['source_alias']}</b> ({len(source_data['titles'])} 条):\n\n"
|
||||||
|
|
||||||
|
for j, title_data in enumerate(source_data["titles"], 1):
|
||||||
|
title_data_copy = title_data.copy()
|
||||||
|
title_data_copy["is_new"] = False
|
||||||
|
formatted_title = ReportGenerator._format_title_telegram(
|
||||||
|
title_data_copy, show_source=False
|
||||||
|
)
|
||||||
|
text_content += f" {j}. {formatted_title}\n"
|
||||||
|
|
||||||
|
text_content += "\n"
|
||||||
|
|
||||||
|
# 渲染失败平台
|
||||||
|
if report_data["failed_ids"]:
|
||||||
|
if text_content and "暂无匹配" not in text_content:
|
||||||
|
text_content += f"\n━━━━━━━━━━━━━━━━━━━\n\n"
|
||||||
|
|
||||||
|
text_content += "<b>⚠️ 数据获取失败的平台:</b>\n\n"
|
||||||
|
for i, id_value in enumerate(report_data["failed_ids"], 1):
|
||||||
|
text_content += f" • <code>{id_value}</code>\n"
|
||||||
|
|
||||||
|
text_content += f"\n\n<i>更新时间:{now.strftime('%Y-%m-%d %H:%M:%S')}</i>"
|
||||||
|
|
||||||
|
# 版本更新提示
|
||||||
|
if update_info:
|
||||||
|
text_content += f"\n<i>TrendRadar 发现新版本 <b>{update_info['remote_version']}</b>,当前 <b>{update_info['current_version']}</b></i>"
|
||||||
|
|
||||||
|
return text_content
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def send_to_webhooks(
|
||||||
stats: List[Dict],
|
stats: List[Dict],
|
||||||
failed_ids: Optional[List] = None,
|
failed_ids: Optional[List] = None,
|
||||||
report_type: str = "单次爬取",
|
report_type: str = "单次爬取",
|
||||||
new_titles: Optional[Dict] = None,
|
new_titles: Optional[Dict] = None,
|
||||||
id_to_alias: Optional[Dict] = None,
|
id_to_alias: Optional[Dict] = None,
|
||||||
update_info: Optional[Dict] = None,
|
update_info: Optional[Dict] = None,
|
||||||
) -> bool:
|
proxy_url: Optional[str] = None,
|
||||||
"""发送数据到飞书"""
|
) -> Dict[str, bool]:
|
||||||
webhook_url = os.environ.get("FEISHU_WEBHOOK_URL", CONFIG["FEISHU_WEBHOOK_URL"])
|
"""发送数据到多个webhook平台"""
|
||||||
|
results = {}
|
||||||
if not webhook_url:
|
|
||||||
print(f"FEISHU_WEBHOOK_URL未设置,跳过飞书通知")
|
|
||||||
return False
|
|
||||||
|
|
||||||
headers = {"Content-Type": "application/json"}
|
|
||||||
total_titles = sum(len(stat["titles"]) for stat in stats if stat["count"] > 0)
|
|
||||||
|
|
||||||
# 数据处理层
|
# 数据处理层
|
||||||
report_data = ReportGenerator._prepare_report_data(
|
report_data = ReportGenerator._prepare_report_data(
|
||||||
stats, failed_ids, new_titles, id_to_alias
|
stats, failed_ids, new_titles, id_to_alias
|
||||||
)
|
)
|
||||||
|
|
||||||
# 渲染层
|
# 获取环境变量中的webhook配置
|
||||||
|
feishu_url = os.environ.get("FEISHU_WEBHOOK_URL", CONFIG["FEISHU_WEBHOOK_URL"])
|
||||||
|
dingtalk_url = os.environ.get(
|
||||||
|
"DINGTALK_WEBHOOK_URL", CONFIG["DINGTALK_WEBHOOK_URL"]
|
||||||
|
)
|
||||||
|
wework_url = os.environ.get("WEWORK_WEBHOOK_URL", CONFIG["WEWORK_WEBHOOK_URL"])
|
||||||
|
telegram_token = os.environ.get(
|
||||||
|
"TELEGRAM_BOT_TOKEN", CONFIG["TELEGRAM_BOT_TOKEN"]
|
||||||
|
)
|
||||||
|
telegram_chat_id = os.environ.get(
|
||||||
|
"TELEGRAM_CHAT_ID", CONFIG["TELEGRAM_CHAT_ID"]
|
||||||
|
)
|
||||||
|
|
||||||
|
update_info_to_send = update_info if CONFIG["SHOW_VERSION_UPDATE"] else None
|
||||||
|
|
||||||
|
# 发送到飞书
|
||||||
|
if feishu_url:
|
||||||
|
results["feishu"] = ReportGenerator._send_to_feishu(
|
||||||
|
feishu_url, report_data, report_type, update_info_to_send, proxy_url
|
||||||
|
)
|
||||||
|
|
||||||
|
# 发送到钉钉
|
||||||
|
if dingtalk_url:
|
||||||
|
results["dingtalk"] = ReportGenerator._send_to_dingtalk(
|
||||||
|
dingtalk_url, report_data, report_type, update_info_to_send, proxy_url
|
||||||
|
)
|
||||||
|
|
||||||
|
# 发送到企业微信
|
||||||
|
if wework_url:
|
||||||
|
results["wework"] = ReportGenerator._send_to_wework(
|
||||||
|
wework_url, report_data, report_type, update_info_to_send, proxy_url
|
||||||
|
)
|
||||||
|
|
||||||
|
# 发送到Telegram
|
||||||
|
if telegram_token and telegram_chat_id:
|
||||||
|
results["telegram"] = ReportGenerator._send_to_telegram(
|
||||||
|
telegram_token,
|
||||||
|
telegram_chat_id,
|
||||||
|
report_data,
|
||||||
|
report_type,
|
||||||
|
update_info_to_send,
|
||||||
|
proxy_url,
|
||||||
|
)
|
||||||
|
|
||||||
|
if not results:
|
||||||
|
print("未配置任何webhook URL,跳过通知发送")
|
||||||
|
|
||||||
|
return results
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _send_to_feishu(
|
||||||
|
webhook_url: str,
|
||||||
|
report_data: Dict,
|
||||||
|
report_type: str,
|
||||||
|
update_info: Optional[Dict] = None,
|
||||||
|
proxy_url: Optional[str] = None,
|
||||||
|
) -> bool:
|
||||||
|
"""发送到飞书"""
|
||||||
|
headers = {"Content-Type": "application/json"}
|
||||||
|
|
||||||
text_content = ReportGenerator._render_feishu_content(report_data, update_info)
|
text_content = ReportGenerator._render_feishu_content(report_data, update_info)
|
||||||
|
total_titles = sum(
|
||||||
|
len(stat["titles"]) for stat in report_data["stats"] if stat["count"] > 0
|
||||||
|
)
|
||||||
|
|
||||||
now = TimeHelper.get_beijing_time()
|
now = TimeHelper.get_beijing_time()
|
||||||
payload = {
|
payload = {
|
||||||
@ -1447,8 +1918,14 @@ class ReportGenerator:
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
proxies = None
|
||||||
|
if proxy_url:
|
||||||
|
proxies = {"http": proxy_url, "https": proxy_url}
|
||||||
|
|
||||||
try:
|
try:
|
||||||
response = requests.post(webhook_url, headers=headers, json=payload)
|
response = requests.post(
|
||||||
|
webhook_url, headers=headers, json=payload, proxies=proxies, timeout=30
|
||||||
|
)
|
||||||
if response.status_code == 200:
|
if response.status_code == 200:
|
||||||
print(f"飞书通知发送成功 [{report_type}]")
|
print(f"飞书通知发送成功 [{report_type}]")
|
||||||
return True
|
return True
|
||||||
@ -1461,6 +1938,150 @@ class ReportGenerator:
|
|||||||
print(f"飞书通知发送出错 [{report_type}]:{e}")
|
print(f"飞书通知发送出错 [{report_type}]:{e}")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _send_to_dingtalk(
|
||||||
|
webhook_url: str,
|
||||||
|
report_data: Dict,
|
||||||
|
report_type: str,
|
||||||
|
update_info: Optional[Dict] = None,
|
||||||
|
proxy_url: Optional[str] = None,
|
||||||
|
) -> bool:
|
||||||
|
"""发送到钉钉"""
|
||||||
|
headers = {"Content-Type": "application/json"}
|
||||||
|
|
||||||
|
text_content = ReportGenerator._render_dingtalk_content(
|
||||||
|
report_data, update_info
|
||||||
|
)
|
||||||
|
|
||||||
|
payload = {
|
||||||
|
"msgtype": "markdown",
|
||||||
|
"markdown": {
|
||||||
|
"title": f"TrendRadar 热点分析报告 - {report_type}",
|
||||||
|
"text": text_content,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
proxies = None
|
||||||
|
if proxy_url:
|
||||||
|
proxies = {"http": proxy_url, "https": proxy_url}
|
||||||
|
|
||||||
|
try:
|
||||||
|
response = requests.post(
|
||||||
|
webhook_url, headers=headers, json=payload, proxies=proxies, timeout=30
|
||||||
|
)
|
||||||
|
if response.status_code == 200:
|
||||||
|
result = response.json()
|
||||||
|
if result.get("errcode") == 0:
|
||||||
|
print(f"钉钉通知发送成功 [{report_type}]")
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
print(
|
||||||
|
f"钉钉通知发送失败 [{report_type}],错误:{result.get('errmsg')}"
|
||||||
|
)
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
print(
|
||||||
|
f"钉钉通知发送失败 [{report_type}],状态码:{response.status_code}"
|
||||||
|
)
|
||||||
|
return False
|
||||||
|
except Exception as e:
|
||||||
|
print(f"钉钉通知发送出错 [{report_type}]:{e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _send_to_wework(
|
||||||
|
webhook_url: str,
|
||||||
|
report_data: Dict,
|
||||||
|
report_type: str,
|
||||||
|
update_info: Optional[Dict] = None,
|
||||||
|
proxy_url: Optional[str] = None,
|
||||||
|
) -> bool:
|
||||||
|
"""发送到企业微信"""
|
||||||
|
headers = {"Content-Type": "application/json"}
|
||||||
|
|
||||||
|
text_content = ReportGenerator._render_wework_content(report_data, update_info)
|
||||||
|
|
||||||
|
payload = {"msgtype": "markdown", "markdown": {"content": text_content}}
|
||||||
|
|
||||||
|
proxies = None
|
||||||
|
if proxy_url:
|
||||||
|
proxies = {"http": proxy_url, "https": proxy_url}
|
||||||
|
|
||||||
|
try:
|
||||||
|
response = requests.post(
|
||||||
|
webhook_url, headers=headers, json=payload, proxies=proxies, timeout=30
|
||||||
|
)
|
||||||
|
if response.status_code == 200:
|
||||||
|
result = response.json()
|
||||||
|
if result.get("errcode") == 0:
|
||||||
|
print(f"企业微信通知发送成功 [{report_type}]")
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
print(
|
||||||
|
f"企业微信通知发送失败 [{report_type}],错误:{result.get('errmsg')}"
|
||||||
|
)
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
print(
|
||||||
|
f"企业微信通知发送失败 [{report_type}],状态码:{response.status_code}"
|
||||||
|
)
|
||||||
|
return False
|
||||||
|
except Exception as e:
|
||||||
|
print(f"企业微信通知发送出错 [{report_type}]:{e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _send_to_telegram(
|
||||||
|
bot_token: str,
|
||||||
|
chat_id: str,
|
||||||
|
report_data: Dict,
|
||||||
|
report_type: str,
|
||||||
|
update_info: Optional[Dict] = None,
|
||||||
|
proxy_url: Optional[str] = None,
|
||||||
|
) -> bool:
|
||||||
|
"""发送到Telegram"""
|
||||||
|
headers = {"Content-Type": "application/json"}
|
||||||
|
|
||||||
|
text_content = ReportGenerator._render_telegram_content(
|
||||||
|
report_data, update_info
|
||||||
|
)
|
||||||
|
|
||||||
|
url = f"https://api.telegram.org/bot{bot_token}/sendMessage"
|
||||||
|
|
||||||
|
payload = {
|
||||||
|
"chat_id": chat_id,
|
||||||
|
"text": text_content,
|
||||||
|
"parse_mode": "HTML",
|
||||||
|
"disable_web_page_preview": True,
|
||||||
|
}
|
||||||
|
|
||||||
|
proxies = None
|
||||||
|
if proxy_url:
|
||||||
|
proxies = {"http": proxy_url, "https": proxy_url}
|
||||||
|
|
||||||
|
try:
|
||||||
|
response = requests.post(
|
||||||
|
url, headers=headers, json=payload, proxies=proxies, timeout=30
|
||||||
|
)
|
||||||
|
if response.status_code == 200:
|
||||||
|
result = response.json()
|
||||||
|
if result.get("ok"):
|
||||||
|
print(f"Telegram通知发送成功 [{report_type}]")
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
print(
|
||||||
|
f"Telegram通知发送失败 [{report_type}],错误:{result.get('description')}"
|
||||||
|
)
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
print(
|
||||||
|
f"Telegram通知发送失败 [{report_type}],状态码:{response.status_code}"
|
||||||
|
)
|
||||||
|
return False
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Telegram通知发送出错 [{report_type}]:{e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
class NewsAnalyzer:
|
class NewsAnalyzer:
|
||||||
"""新闻分析器"""
|
"""新闻分析器"""
|
||||||
@ -1468,11 +2089,11 @@ class NewsAnalyzer:
|
|||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
request_interval: int = CONFIG["REQUEST_INTERVAL"],
|
request_interval: int = CONFIG["REQUEST_INTERVAL"],
|
||||||
feishu_report_type: str = CONFIG["FEISHU_REPORT_TYPE"],
|
report_type: str = CONFIG["REPORT_TYPE"],
|
||||||
rank_threshold: int = CONFIG["RANK_THRESHOLD"],
|
rank_threshold: int = CONFIG["RANK_THRESHOLD"],
|
||||||
):
|
):
|
||||||
self.request_interval = request_interval
|
self.request_interval = request_interval
|
||||||
self.feishu_report_type = feishu_report_type
|
self.report_type = report_type
|
||||||
self.rank_threshold = rank_threshold
|
self.rank_threshold = rank_threshold
|
||||||
self.is_github_actions = os.environ.get("GITHUB_ACTIONS") == "true"
|
self.is_github_actions = os.environ.get("GITHUB_ACTIONS") == "true"
|
||||||
self.update_info = None
|
self.update_info = None
|
||||||
@ -1547,17 +2168,15 @@ class NewsAnalyzer:
|
|||||||
)
|
)
|
||||||
print(f"当日HTML统计报告已生成: {html_file}")
|
print(f"当日HTML统计报告已生成: {html_file}")
|
||||||
|
|
||||||
if self.feishu_report_type in ["daily", "both"]:
|
if self.report_type in ["daily", "both"]:
|
||||||
update_info_for_feishu = (
|
ReportGenerator.send_to_webhooks(
|
||||||
self.update_info if CONFIG["FEISHU_SHOW_VERSION_UPDATE"] else None
|
|
||||||
)
|
|
||||||
ReportGenerator.send_to_feishu(
|
|
||||||
stats,
|
stats,
|
||||||
[],
|
[],
|
||||||
"当日汇总",
|
"当日汇总",
|
||||||
latest_new_titles,
|
latest_new_titles,
|
||||||
id_to_alias,
|
id_to_alias,
|
||||||
update_info_for_feishu,
|
self.update_info,
|
||||||
|
self.proxy_url,
|
||||||
)
|
)
|
||||||
|
|
||||||
return html_file
|
return html_file
|
||||||
@ -1567,17 +2186,29 @@ class NewsAnalyzer:
|
|||||||
now = TimeHelper.get_beijing_time()
|
now = TimeHelper.get_beijing_time()
|
||||||
print(f"当前北京时间: {now.strftime('%Y-%m-%d %H:%M:%S')}")
|
print(f"当前北京时间: {now.strftime('%Y-%m-%d %H:%M:%S')}")
|
||||||
|
|
||||||
webhook_url = os.environ.get("FEISHU_WEBHOOK_URL", CONFIG["FEISHU_WEBHOOK_URL"])
|
# 检查是否配置了任何webhook URL
|
||||||
if not webhook_url and not CONFIG["CONTINUE_WITHOUT_FEISHU"]:
|
has_webhook = any(
|
||||||
|
[
|
||||||
|
os.environ.get("FEISHU_WEBHOOK_URL", CONFIG["FEISHU_WEBHOOK_URL"]),
|
||||||
|
os.environ.get("DINGTALK_WEBHOOK_URL", CONFIG["DINGTALK_WEBHOOK_URL"]),
|
||||||
|
os.environ.get("WEWORK_WEBHOOK_URL", CONFIG["WEWORK_WEBHOOK_URL"]),
|
||||||
|
(
|
||||||
|
os.environ.get("TELEGRAM_BOT_TOKEN", CONFIG["TELEGRAM_BOT_TOKEN"])
|
||||||
|
and os.environ.get("TELEGRAM_CHAT_ID", CONFIG["TELEGRAM_CHAT_ID"])
|
||||||
|
),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
if not has_webhook and not CONFIG["CONTINUE_WITHOUT_WEBHOOK"]:
|
||||||
print(
|
print(
|
||||||
"错误: FEISHU_WEBHOOK_URL未设置且CONTINUE_WITHOUT_FEISHU为False,程序退出"
|
"错误: 未配置任何webhook URL且CONTINUE_WITHOUT_WEBHOOK为False,程序退出"
|
||||||
)
|
)
|
||||||
return
|
return
|
||||||
|
|
||||||
if not webhook_url:
|
if not has_webhook:
|
||||||
print("FEISHU_WEBHOOK_URL未设置,将继续执行爬虫但不发送飞书通知")
|
print("未配置任何webhook URL,将继续执行爬虫但不发送通知")
|
||||||
|
|
||||||
print(f"飞书报告类型: {self.feishu_report_type}")
|
print(f"报告类型: {self.report_type}")
|
||||||
|
|
||||||
ids = [
|
ids = [
|
||||||
("toutiao", "今日头条"),
|
("toutiao", "今日头条"),
|
||||||
@ -1636,17 +2267,15 @@ class NewsAnalyzer:
|
|||||||
new_titles,
|
new_titles,
|
||||||
)
|
)
|
||||||
|
|
||||||
if self.feishu_report_type in ["current", "both"]:
|
if self.report_type in ["current", "both"]:
|
||||||
update_info_for_feishu = (
|
ReportGenerator.send_to_webhooks(
|
||||||
self.update_info if CONFIG["FEISHU_SHOW_VERSION_UPDATE"] else None
|
|
||||||
)
|
|
||||||
ReportGenerator.send_to_feishu(
|
|
||||||
stats,
|
stats,
|
||||||
failed_ids,
|
failed_ids,
|
||||||
"单次爬取",
|
"单次爬取",
|
||||||
new_titles,
|
new_titles,
|
||||||
id_to_alias,
|
id_to_alias,
|
||||||
update_info_for_feishu,
|
self.update_info,
|
||||||
|
self.proxy_url,
|
||||||
)
|
)
|
||||||
|
|
||||||
html_file = ReportGenerator.generate_html_report(
|
html_file = ReportGenerator.generate_html_report(
|
||||||
@ -1670,7 +2299,7 @@ class NewsAnalyzer:
|
|||||||
def main():
|
def main():
|
||||||
analyzer = NewsAnalyzer(
|
analyzer = NewsAnalyzer(
|
||||||
request_interval=CONFIG["REQUEST_INTERVAL"],
|
request_interval=CONFIG["REQUEST_INTERVAL"],
|
||||||
feishu_report_type=CONFIG["FEISHU_REPORT_TYPE"],
|
report_type=CONFIG["REPORT_TYPE"],
|
||||||
rank_threshold=CONFIG["RANK_THRESHOLD"],
|
rank_threshold=CONFIG["RANK_THRESHOLD"],
|
||||||
)
|
)
|
||||||
analyzer.run()
|
analyzer.run()
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user