This commit is contained in:
sansan 2025-09-13 14:33:58 +08:00
parent 241c4c6d54
commit 47a6fba9b5
4 changed files with 133 additions and 33 deletions

View File

@ -30,6 +30,7 @@ report:
notification: notification:
enable_notification: true # 是否启用通知功能,如果 false则不发送手机通知 enable_notification: true # 是否启用通知功能,如果 false则不发送手机通知
message_batch_size: 4000 # 消息分批大小(字节)(这个配置别动) message_batch_size: 4000 # 消息分批大小(字节)(这个配置别动)
dingtalk_batch_size: 20000 # 钉钉消息分批大小(字节)(这个配置也别动)
batch_send_interval: 1 # 批次发送间隔(秒) batch_send_interval: 1 # 批次发送间隔(秒)
feishu_message_separator: "━━━━━━━━━━━━━━━━━━━" # feishu 消息分割线 feishu_message_separator: "━━━━━━━━━━━━━━━━━━━" # feishu 消息分割线

122
main.py
View File

@ -15,7 +15,7 @@ import requests
import yaml import yaml
VERSION = "2.1.1" VERSION = "2.1.2"
# === 配置管理 === # === 配置管理 ===
@ -43,6 +43,7 @@ def load_config():
"ENABLE_CRAWLER": config_data["crawler"]["enable_crawler"], "ENABLE_CRAWLER": config_data["crawler"]["enable_crawler"],
"ENABLE_NOTIFICATION": config_data["notification"]["enable_notification"], "ENABLE_NOTIFICATION": config_data["notification"]["enable_notification"],
"MESSAGE_BATCH_SIZE": config_data["notification"]["message_batch_size"], "MESSAGE_BATCH_SIZE": config_data["notification"]["message_batch_size"],
"DINGTALK_BATCH_SIZE": config_data["notification"].get("dingtalk_batch_size", 20000),
"BATCH_SEND_INTERVAL": config_data["notification"]["batch_send_interval"], "BATCH_SEND_INTERVAL": config_data["notification"]["batch_send_interval"],
"FEISHU_MESSAGE_SEPARATOR": config_data["notification"][ "FEISHU_MESSAGE_SEPARATOR": config_data["notification"][
"feishu_message_separator" "feishu_message_separator"
@ -2273,10 +2274,16 @@ def split_content_into_batches(
report_data: Dict, report_data: Dict,
format_type: str, format_type: str,
update_info: Optional[Dict] = None, update_info: Optional[Dict] = None,
max_bytes: int = CONFIG["MESSAGE_BATCH_SIZE"], max_bytes: int = None,
mode: str = "daily", mode: str = "daily",
) -> List[str]: ) -> List[str]:
"""分批处理消息内容,确保词组标题+至少第一条新闻的完整性""" """分批处理消息内容,确保词组标题+至少第一条新闻的完整性"""
if max_bytes is None:
if format_type == "dingtalk":
max_bytes = CONFIG.get("DINGTALK_BATCH_SIZE", 20000)
else:
max_bytes = CONFIG.get("MESSAGE_BATCH_SIZE", 4000)
batches = [] batches = []
total_titles = sum( total_titles = sum(
@ -2289,6 +2296,11 @@ def split_content_into_batches(
base_header = f"**总新闻数:** {total_titles}\n\n\n\n" base_header = f"**总新闻数:** {total_titles}\n\n\n\n"
elif format_type == "telegram": elif format_type == "telegram":
base_header = f"总新闻数: {total_titles}\n\n" base_header = f"总新闻数: {total_titles}\n\n"
elif format_type == "dingtalk":
base_header = f"**总新闻数:** {total_titles}\n\n"
base_header += f"**时间:** {now.strftime('%Y-%m-%d %H:%M:%S')}\n\n"
base_header += f"**类型:** 热点分析报告\n\n"
base_header += "---\n\n"
base_footer = "" base_footer = ""
if format_type == "wework": if format_type == "wework":
@ -2299,6 +2311,10 @@ def split_content_into_batches(
base_footer = f"\n\n更新时间:{now.strftime('%Y-%m-%d %H:%M:%S')}" base_footer = f"\n\n更新时间:{now.strftime('%Y-%m-%d %H:%M:%S')}"
if update_info: if update_info:
base_footer += f"\nTrendRadar 发现新版本 {update_info['remote_version']},当前 {update_info['current_version']}" base_footer += f"\nTrendRadar 发现新版本 {update_info['remote_version']},当前 {update_info['current_version']}"
elif format_type == "dingtalk":
base_footer = f"\n\n> 更新时间:{now.strftime('%Y-%m-%d %H:%M:%S')}"
if update_info:
base_footer += f"\n> TrendRadar 发现新版本 **{update_info['remote_version']}**,当前 **{update_info['current_version']}**"
stats_header = "" stats_header = ""
if report_data["stats"]: if report_data["stats"]:
@ -2306,6 +2322,8 @@ def split_content_into_batches(
stats_header = f"📊 **热点词汇统计**\n\n" stats_header = f"📊 **热点词汇统计**\n\n"
elif format_type == "telegram": elif format_type == "telegram":
stats_header = f"📊 热点词汇统计\n\n" stats_header = f"📊 热点词汇统计\n\n"
elif format_type == "dingtalk":
stats_header = f"📊 **热点词汇统计**\n\n"
current_batch = base_header current_batch = base_header
current_batch_has_content = False current_batch_has_content = False
@ -2370,6 +2388,17 @@ def split_content_into_batches(
word_header = f"📈 {sequence_display} {word} : {count}\n\n" word_header = f"📈 {sequence_display} {word} : {count}\n\n"
else: else:
word_header = f"📌 {sequence_display} {word} : {count}\n\n" word_header = f"📌 {sequence_display} {word} : {count}\n\n"
elif format_type == "dingtalk":
if count >= 10:
word_header = (
f"🔥 {sequence_display} **{word}** : **{count}** 条\n\n"
)
elif count >= 5:
word_header = (
f"📈 {sequence_display} **{word}** : **{count}** 条\n\n"
)
else:
word_header = f"📌 {sequence_display} **{word}** : {count}\n\n"
# 构建第一条新闻 # 构建第一条新闻
first_news_line = "" first_news_line = ""
@ -2383,6 +2412,10 @@ def split_content_into_batches(
formatted_title = format_title_for_platform( formatted_title = format_title_for_platform(
"telegram", first_title_data, show_source=True "telegram", first_title_data, show_source=True
) )
elif format_type == "dingtalk":
formatted_title = format_title_for_platform(
"dingtalk", first_title_data, show_source=True
)
else: else:
formatted_title = f"{first_title_data['title']}" formatted_title = f"{first_title_data['title']}"
@ -2420,6 +2453,10 @@ def split_content_into_batches(
formatted_title = format_title_for_platform( formatted_title = format_title_for_platform(
"telegram", title_data, show_source=True "telegram", title_data, show_source=True
) )
elif format_type == "dingtalk":
formatted_title = format_title_for_platform(
"dingtalk", title_data, show_source=True
)
else: else:
formatted_title = f"{title_data['title']}" formatted_title = f"{title_data['title']}"
@ -2447,6 +2484,8 @@ def split_content_into_batches(
separator = f"\n\n\n\n" separator = f"\n\n\n\n"
elif format_type == "telegram": elif format_type == "telegram":
separator = f"\n\n" separator = f"\n\n"
elif format_type == "dingtalk":
separator = f"\n---\n\n"
test_content = current_batch + separator test_content = current_batch + separator
if ( if (
@ -2464,6 +2503,8 @@ def split_content_into_batches(
new_header = ( new_header = (
f"\n\n🆕 本次新增热点新闻 (共 {report_data['total_new_count']} 条)\n\n" f"\n\n🆕 本次新增热点新闻 (共 {report_data['total_new_count']} 条)\n\n"
) )
elif format_type == "dingtalk":
new_header = f"\n---\n\n🆕 **本次新增热点新闻** (共 {report_data['total_new_count']} 条)\n\n"
test_content = current_batch + new_header test_content = current_batch + new_header
if ( if (
@ -2485,6 +2526,8 @@ def split_content_into_batches(
source_header = f"**{source_data['source_name']}** ({len(source_data['titles'])} 条):\n\n" source_header = f"**{source_data['source_name']}** ({len(source_data['titles'])} 条):\n\n"
elif format_type == "telegram": elif format_type == "telegram":
source_header = f"{source_data['source_name']} ({len(source_data['titles'])} 条):\n\n" source_header = f"{source_data['source_name']} ({len(source_data['titles'])} 条):\n\n"
elif format_type == "dingtalk":
source_header = f"**{source_data['source_name']}** ({len(source_data['titles'])} 条):\n\n"
# 构建第一条新增新闻 # 构建第一条新增新闻
first_news_line = "" first_news_line = ""
@ -2501,6 +2544,10 @@ def split_content_into_batches(
formatted_title = format_title_for_platform( formatted_title = format_title_for_platform(
"telegram", title_data_copy, show_source=False "telegram", title_data_copy, show_source=False
) )
elif format_type == "dingtalk":
formatted_title = format_title_for_platform(
"dingtalk", title_data_copy, show_source=False
)
else: else:
formatted_title = f"{title_data_copy['title']}" formatted_title = f"{title_data_copy['title']}"
@ -2538,6 +2585,10 @@ def split_content_into_batches(
formatted_title = format_title_for_platform( formatted_title = format_title_for_platform(
"telegram", title_data_copy, show_source=False "telegram", title_data_copy, show_source=False
) )
elif format_type == "dingtalk":
formatted_title = format_title_for_platform(
"dingtalk", title_data_copy, show_source=False
)
else: else:
formatted_title = f"{title_data_copy['title']}" formatted_title = f"{title_data_copy['title']}"
@ -2564,6 +2615,8 @@ def split_content_into_batches(
failed_header = f"\n\n\n\n⚠️ **数据获取失败的平台:**\n\n" failed_header = f"\n\n\n\n⚠️ **数据获取失败的平台:**\n\n"
elif format_type == "telegram": elif format_type == "telegram":
failed_header = f"\n\n⚠️ 数据获取失败的平台:\n\n" failed_header = f"\n\n⚠️ 数据获取失败的平台:\n\n"
elif format_type == "dingtalk":
failed_header = f"\n---\n\n⚠️ **数据获取失败的平台:**\n\n"
test_content = current_batch + failed_header test_content = current_batch + failed_header
if ( if (
@ -2579,7 +2632,11 @@ def split_content_into_batches(
current_batch_has_content = True current_batch_has_content = True
for i, id_value in enumerate(report_data["failed_ids"], 1): for i, id_value in enumerate(report_data["failed_ids"], 1):
if format_type == "dingtalk":
failed_line = f" • **{id_value}**\n"
else:
failed_line = f"{id_value}\n" failed_line = f"{id_value}\n"
test_content = current_batch + failed_line test_content = current_batch + failed_line
if ( if (
len(test_content.encode("utf-8")) + len(base_footer.encode("utf-8")) len(test_content.encode("utf-8")) + len(base_footer.encode("utf-8"))
@ -2735,23 +2792,51 @@ def send_to_dingtalk(
proxy_url: Optional[str] = None, proxy_url: Optional[str] = None,
mode: str = "daily", mode: str = "daily",
) -> bool: ) -> bool:
"""发送到钉钉""" """发送到钉钉(支持分批发送)"""
headers = {"Content-Type": "application/json"} headers = {"Content-Type": "application/json"}
proxies = None
if proxy_url:
proxies = {"http": proxy_url, "https": proxy_url}
text_content = render_dingtalk_content(report_data, update_info, mode) # 获取分批内容,使用钉钉专用的批次大小
batches = split_content_into_batches(
report_data,
"dingtalk",
update_info,
max_bytes=CONFIG.get("DINGTALK_BATCH_SIZE", 20000),
mode=mode
)
print(f"钉钉消息分为 {len(batches)} 批次发送 [{report_type}]")
# 逐批发送
for i, batch_content in enumerate(batches, 1):
batch_size = len(batch_content.encode("utf-8"))
print(
f"发送钉钉第 {i}/{len(batches)} 批次,大小:{batch_size} 字节 [{report_type}]"
)
# 添加批次标识
if len(batches) > 1:
batch_header = f"**[第 {i}/{len(batches)} 批次]**\n\n"
# 将批次标识插入到适当位置(在标题之后)
if "📊 **热点词汇统计**" in batch_content:
batch_content = batch_content.replace(
"📊 **热点词汇统计**\n\n",
f"📊 **热点词汇统计** {batch_header}\n\n"
)
else:
# 如果没有统计标题,直接在开头添加
batch_content = batch_header + batch_content
payload = { payload = {
"msgtype": "markdown", "msgtype": "markdown",
"markdown": { "markdown": {
"title": f"TrendRadar 热点分析报告 - {report_type}", "title": f"TrendRadar 热点分析报告 - {report_type}",
"text": text_content, "text": batch_content,
}, },
} }
proxies = None
if proxy_url:
proxies = {"http": proxy_url, "https": proxy_url}
try: try:
response = requests.post( response = requests.post(
webhook_url, headers=headers, json=payload, proxies=proxies, timeout=30 webhook_url, headers=headers, json=payload, proxies=proxies, timeout=30
@ -2759,18 +2844,27 @@ def send_to_dingtalk(
if response.status_code == 200: if response.status_code == 200:
result = response.json() result = response.json()
if result.get("errcode") == 0: if result.get("errcode") == 0:
print(f"钉钉通知发送成功 [{report_type}]") print(f"钉钉第 {i}/{len(batches)} 批次发送成功 [{report_type}]")
return True # 批次间间隔
if i < len(batches):
time.sleep(CONFIG["BATCH_SEND_INTERVAL"])
else: else:
print(f"钉钉通知发送失败 [{report_type}],错误:{result.get('errmsg')}") print(
f"钉钉第 {i}/{len(batches)} 批次发送失败 [{report_type}],错误:{result.get('errmsg')}"
)
return False return False
else: else:
print(f"钉钉通知发送失败 [{report_type}],状态码:{response.status_code}") print(
f"钉钉第 {i}/{len(batches)} 批次发送失败 [{report_type}],状态码:{response.status_code}"
)
return False return False
except Exception as e: except Exception as e:
print(f"钉钉通知发送出错 [{report_type}]{e}") print(f"钉钉{i}/{len(batches)} 批次发送出错 [{report_type}]{e}")
return False return False
print(f"钉钉所有 {len(batches)} 批次发送完成 [{report_type}]")
return True
def send_to_wework( def send_to_wework(
webhook_url: str, webhook_url: str,

View File

@ -231,15 +231,20 @@ GitHub 一键 Fork 即可使用,无需编程基础。
> >
> 下一次**新功能**,大概会是 ai 分析功能(大概(●'◡'●) > 下一次**新功能**,大概会是 ai 分析功能(大概(●'◡'●)
### 2025/09/13 - v2.1.2
- 解决钉钉的推送容量限制导致的新闻推送失败问题(采用分批推送)
<details>
<summary><strong>👉 历史更新</strong></summary>
### 2025/09/04 - v2.1.1 ### 2025/09/04 - v2.1.1
- 修复docker在某些架构中无法正常运行的问题 - 修复docker在某些架构中无法正常运行的问题
- 正式发布官方 Docker 镜像 wantcat/trendradar支持多架构 - 正式发布官方 Docker 镜像 wantcat/trendradar支持多架构
- 优化 Docker 部署流程,无需本地构建即可快速使用 - 优化 Docker 部署流程,无需本地构建即可快速使用
<details>
<summary><strong>👉 历史更新</strong></summary>
### 2025/08/30 - v2.1.0 ### 2025/08/30 - v2.1.0
**核心改进** **核心改进**

View File

@ -1 +1 @@
2.1.1 2.1.2