feat: 更新样式,分离飞书消息和html样式

This commit is contained in:
sansan 2025-05-28 19:42:22 +08:00 committed by GitHub
parent 1a46317946
commit 800f85f12f

180
main.py
View File

@ -549,14 +549,27 @@ class StatisticsCalculator:
if "ranks" in info and info["ranks"]: if "ranks" in info and info["ranks"]:
ranks = info["ranks"] ranks = info["ranks"]
# 添加带信息的标题 # 确保排名是有效的
if not ranks:
ranks = [99] # 使用默认排名
# 格式化时间信息
time_display = StatisticsCalculator._format_time_display(
first_time, last_time
)
# 添加带完整信息的标题数据,保存原始数据用于后续格式化
source_alias = id_to_alias.get(source_id, source_id)
word_stats[group_key]["titles"][source_id].append( word_stats[group_key]["titles"][source_id].append(
{ {
"title": title, "title": title,
"source_alias": source_alias,
"first_time": first_time, "first_time": first_time,
"last_time": last_time, "last_time": last_time,
"time_display": time_display,
"count": count_info, "count": count_info,
"ranks": ranks, "ranks": ranks,
"rank_threshold": rank_threshold,
} }
) )
@ -566,53 +579,18 @@ class StatisticsCalculator:
processed_titles[source_id][title] = True processed_titles[source_id][title] = True
break # 找到第一个匹配的词组后退出循环 break # 找到第一个匹配的词组后退出循环
# 转换统计结果 # 转换统计结果 - 这里不再进行格式化,保留原始数据
stats = [] stats = []
for group_key, data in word_stats.items(): for group_key, data in word_stats.items():
titles_with_info = [] all_titles = []
for source_id, title_list in data["titles"].items(): for source_id, title_list in data["titles"].items():
source_alias = id_to_alias.get(source_id, source_id) all_titles.extend(title_list)
for title_data in title_list:
title = title_data["title"]
first_time = title_data["first_time"]
last_time = title_data["last_time"]
count_info = title_data["count"]
ranks = title_data.get("ranks", [])
# 确保排名是有效的
if not ranks:
ranks = [99] # 使用默认排名
# 格式化排名信息
rank_display = StatisticsCalculator._format_rank_display(
ranks, rank_threshold
)
# 格式化时间信息
time_display = StatisticsCalculator._format_time_display(
first_time, last_time
)
# 格式化标题信息
formatted_title = f"[{source_alias}] {title}"
if rank_display:
formatted_title += f" {rank_display}"
if time_display:
formatted_title += (
f" <font color='grey'>- {time_display}</font>"
)
if count_info > 1:
formatted_title += (
f" <font color='green'>({count_info}次)</font>"
)
titles_with_info.append(formatted_title)
stats.append( stats.append(
{ {
"word": group_key, "word": group_key,
"count": data["count"], "count": data["count"],
"titles": titles_with_info, "titles": all_titles, # 保存原始标题数据,用于后续格式化
"percentage": ( "percentage": (
round(data["count"] / total_titles * 100, 2) round(data["count"] / total_titles * 100, 2)
if total_titles > 0 if total_titles > 0
@ -627,8 +605,33 @@ class StatisticsCalculator:
return stats, total_titles return stats, total_titles
@staticmethod @staticmethod
def _format_rank_display(ranks: List[int], rank_threshold: int = 5) -> str: def _format_rank_for_html(ranks: List[int], rank_threshold: int = 5) -> str:
"""格式化排名显示前5名使用红色数字""" """格式化排名显示用于HTML前5名使用红色粗体"""
if not ranks:
return ""
# 排序排名并确保不重复
unique_ranks = sorted(set(ranks))
min_rank = unique_ranks[0]
max_rank = unique_ranks[-1]
# 所有排名都使用[]只有前5名显示红色粗体
if min_rank <= rank_threshold:
if min_rank == max_rank:
# 单一排名且在前5
return f"<font color='red'><strong>[{min_rank}]</strong></font>"
else:
return f"<font color='red'><strong>[{min_rank} - {max_rank}]</strong></font>"
else:
# 排名在5名之后使用普通显示
if min_rank == max_rank:
return f"[{min_rank}]"
else:
return f"[{min_rank} - {max_rank}]"
@staticmethod
def _format_rank_for_feishu(ranks: List[int], rank_threshold: int = 5) -> str:
"""格式化排名显示用于飞书前5名使用红色粗体markdown格式"""
if not ranks: if not ranks:
return "" return ""
@ -644,7 +647,6 @@ class StatisticsCalculator:
return f"<font color='red'>**[{min_rank}]**</font>" return f"<font color='red'>**[{min_rank}]**</font>"
else: else:
return f"<font color='red'>**[{min_rank} - {max_rank}]**</font>" return f"<font color='red'>**[{min_rank} - {max_rank}]**</font>"
else: else:
# 排名在5名之后使用普通显示 # 排名在5名之后使用普通显示
if min_rank == max_rank: if min_rank == max_rank:
@ -781,13 +783,39 @@ class ReportGenerator:
# 表格内容 # 表格内容
for i, stat in enumerate(stats, 1): for i, stat in enumerate(stats, 1):
# 格式化标题列表用于HTML显示
formatted_titles = []
for title_data in stat["titles"]:
title = title_data["title"]
source_alias = title_data["source_alias"]
time_display = title_data["time_display"]
count_info = title_data["count"]
ranks = title_data["ranks"]
rank_threshold = title_data["rank_threshold"]
# 使用HTML格式化排名
rank_display = StatisticsCalculator._format_rank_for_html(
ranks, rank_threshold
)
# 格式化标题信息
formatted_title = f"[{source_alias}] {title}"
if rank_display:
formatted_title += f" {rank_display}"
if time_display:
formatted_title += f" <font color='grey'>- {time_display}</font>"
if count_info > 1:
formatted_title += f" <font color='green'>({count_info}次)</font>"
formatted_titles.append(formatted_title)
html += f""" html += f"""
<tr> <tr>
<td>{i}</td> <td>{i}</td>
<td class="word">{stat['word']}</td> <td class="word">{stat['word']}</td>
<td class="count">{stat['count']}</td> <td class="count">{stat['count']}</td>
<td class="percentage">{stat['percentage']}%</td> <td class="percentage">{stat['percentage']}%</td>
<td class="titles">{"<br>".join(stat['titles'])}</td> <td class="titles">{"<br>".join(formatted_titles)}</td>
</tr> </tr>
""" """
@ -869,39 +897,61 @@ class ReportGenerator:
if filtered_stats: if filtered_stats:
text_content += "📊 **热点词汇统计**\n\n" text_content += "📊 **热点词汇统计**\n\n"
# 获取总数用于序号显示
total_count = len(filtered_stats)
for i, stat in enumerate(filtered_stats): for i, stat in enumerate(filtered_stats):
word = stat["word"] word = stat["word"]
count = stat["count"] count = stat["count"]
# 关键词加粗,计数和百分比使用不同颜色 # 构建序号显示,格式为 [当前序号/总数],使用灰色且不加粗
sequence_display = f"<font color='grey'>[{i + 1}/{total_count}]</font>"
# 关键词加粗,计数和百分比使用不同颜色,序号单独显示为灰色
if count >= 10: if count >= 10:
# 高频词使用红色 # 高频词使用红色
text_content += ( text_content += f"🔥 {sequence_display} **{word}** : <font color='red'>{count}</font> 条\n\n"
f"🔥 **{word}** : <font color='red'>{count}</font> 条\n\n"
)
elif count >= 5: elif count >= 5:
# 中频词使用橙色 # 中频词使用橙色
text_content += ( text_content += f"📈 {sequence_display} **{word}** : <font color='orange'>{count}</font> 条\n\n"
f"📈 **{word}** : <font color='orange'>{count}</font> 条\n\n"
)
else: else:
# 低频词使用默认颜色 # 低频词使用默认颜色
text_content += f"📌 **{word}** : {count}\n\n" text_content += f"📌 {sequence_display} **{word}** : {count}\n\n"
# 添加相关标题 # 格式化标题列表用于飞书显示
for j, title in enumerate(stat["titles"], 1): for j, title_data in enumerate(stat["titles"], 1):
# 提取来源信息 title = title_data["title"]
if title.startswith("[") and "]" in title: source_alias = title_data["source_alias"]
source_end = title.index("]") + 1 time_display = title_data["time_display"]
source = title[:source_end] count_info = title_data["count"]
rest = title[source_end:].strip() ranks = title_data["ranks"]
rank_threshold = title_data["rank_threshold"]
# 使用灰色显示来源 # 使用飞书格式化排名
text_content += ( rank_display = StatisticsCalculator._format_rank_for_feishu(
f" {j}. <font color='grey'>{source}</font> {rest}\n" ranks, rank_threshold
) )
else:
text_content += f" {j}. {title}\n" # 格式化标题信息
formatted_title = f"[{source_alias}] {title}"
if rank_display:
formatted_title += f" {rank_display}"
if time_display:
formatted_title += f" <font color='grey'>- {time_display}</font>"
if count_info > 1:
formatted_title += f" <font color='green'>({count_info}次)</font>"
# 使用灰色显示来源
text_content += (
f" {j}. <font color='grey'>[{source_alias}]</font> {title}"
)
if rank_display:
text_content += f" {rank_display}"
if time_display:
text_content += f" <font color='grey'>- {time_display}</font>"
if count_info > 1:
text_content += f" <font color='green'>({count_info}次)</font>"
text_content += "\n"
# 在每条新闻后添加额外间隔(除了最后一条) # 在每条新闻后添加额外间隔(除了最后一条)
if j < len(stat["titles"]): if j < len(stat["titles"]):