From 800f85f12f06e972c414a5a745b50284bd64b309 Mon Sep 17 00:00:00 2001 From: sansan <77180927+sansan0@users.noreply.github.com> Date: Wed, 28 May 2025 19:42:22 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E6=9B=B4=E6=96=B0=E6=A0=B7=E5=BC=8F?= =?UTF-8?q?=EF=BC=8C=E5=88=86=E7=A6=BB=E9=A3=9E=E4=B9=A6=E6=B6=88=E6=81=AF?= =?UTF-8?q?=E5=92=8Chtml=E6=A0=B7=E5=BC=8F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- main.py | 180 ++++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 115 insertions(+), 65 deletions(-) diff --git a/main.py b/main.py index ea41365..83e1c56 100644 --- a/main.py +++ b/main.py @@ -549,14 +549,27 @@ class StatisticsCalculator: if "ranks" in info and info["ranks"]: ranks = info["ranks"] - # 添加带信息的标题 + # 确保排名是有效的 + if not ranks: + ranks = [99] # 使用默认排名 + + # 格式化时间信息 + time_display = StatisticsCalculator._format_time_display( + first_time, last_time + ) + + # 添加带完整信息的标题数据,保存原始数据用于后续格式化 + source_alias = id_to_alias.get(source_id, source_id) word_stats[group_key]["titles"][source_id].append( { "title": title, + "source_alias": source_alias, "first_time": first_time, "last_time": last_time, + "time_display": time_display, "count": count_info, "ranks": ranks, + "rank_threshold": rank_threshold, } ) @@ -566,53 +579,18 @@ class StatisticsCalculator: processed_titles[source_id][title] = True break # 找到第一个匹配的词组后退出循环 - # 转换统计结果 + # 转换统计结果 - 这里不再进行格式化,保留原始数据 stats = [] for group_key, data in word_stats.items(): - titles_with_info = [] + all_titles = [] for source_id, title_list in data["titles"].items(): - source_alias = id_to_alias.get(source_id, source_id) - for title_data in title_list: - title = title_data["title"] - first_time = title_data["first_time"] - last_time = title_data["last_time"] - count_info = title_data["count"] - ranks = title_data.get("ranks", []) - - # 确保排名是有效的 - if not ranks: - ranks = [99] # 使用默认排名 - - # 格式化排名信息 - rank_display = StatisticsCalculator._format_rank_display( - ranks, rank_threshold - ) - - # 格式化时间信息 - time_display = StatisticsCalculator._format_time_display( - first_time, last_time - ) - - # 格式化标题信息 - formatted_title = f"[{source_alias}] {title}" - if rank_display: - formatted_title += f" {rank_display}" - if time_display: - formatted_title += ( - f" - {time_display}" - ) - if count_info > 1: - formatted_title += ( - f" ({count_info}次)" - ) - - titles_with_info.append(formatted_title) + all_titles.extend(title_list) stats.append( { "word": group_key, "count": data["count"], - "titles": titles_with_info, + "titles": all_titles, # 保存原始标题数据,用于后续格式化 "percentage": ( round(data["count"] / total_titles * 100, 2) if total_titles > 0 @@ -627,8 +605,33 @@ class StatisticsCalculator: return stats, total_titles @staticmethod - def _format_rank_display(ranks: List[int], rank_threshold: int = 5) -> str: - """格式化排名显示,前5名使用红色数字""" + def _format_rank_for_html(ranks: List[int], rank_threshold: int = 5) -> str: + """格式化排名显示用于HTML,前5名使用红色粗体""" + if not ranks: + return "" + + # 排序排名并确保不重复 + unique_ranks = sorted(set(ranks)) + min_rank = unique_ranks[0] + max_rank = unique_ranks[-1] + + # 所有排名都使用[],只有前5名显示红色粗体 + if min_rank <= rank_threshold: + if min_rank == max_rank: + # 单一排名且在前5 + return f"[{min_rank}]" + else: + return f"[{min_rank} - {max_rank}]" + else: + # 排名在5名之后,使用普通显示 + if min_rank == max_rank: + return f"[{min_rank}]" + else: + return f"[{min_rank} - {max_rank}]" + + @staticmethod + def _format_rank_for_feishu(ranks: List[int], rank_threshold: int = 5) -> str: + """格式化排名显示用于飞书,前5名使用红色粗体markdown格式""" if not ranks: return "" @@ -644,7 +647,6 @@ class StatisticsCalculator: return f"**[{min_rank}]**" else: return f"**[{min_rank} - {max_rank}]**" - else: # 排名在5名之后,使用普通显示 if min_rank == max_rank: @@ -781,13 +783,39 @@ class ReportGenerator: # 表格内容 for i, stat in enumerate(stats, 1): + # 格式化标题列表用于HTML显示 + formatted_titles = [] + for title_data in stat["titles"]: + title = title_data["title"] + source_alias = title_data["source_alias"] + time_display = title_data["time_display"] + count_info = title_data["count"] + ranks = title_data["ranks"] + rank_threshold = title_data["rank_threshold"] + + # 使用HTML格式化排名 + rank_display = StatisticsCalculator._format_rank_for_html( + ranks, rank_threshold + ) + + # 格式化标题信息 + formatted_title = f"[{source_alias}] {title}" + if rank_display: + formatted_title += f" {rank_display}" + if time_display: + formatted_title += f" - {time_display}" + if count_info > 1: + formatted_title += f" ({count_info}次)" + + formatted_titles.append(formatted_title) + html += f""" {i} {stat['word']} {stat['count']} {stat['percentage']}% - {"
".join(stat['titles'])} + {"
".join(formatted_titles)} """ @@ -869,39 +897,61 @@ class ReportGenerator: if filtered_stats: text_content += "📊 **热点词汇统计**\n\n" + # 获取总数用于序号显示 + total_count = len(filtered_stats) + for i, stat in enumerate(filtered_stats): word = stat["word"] count = stat["count"] - # 关键词加粗,计数和百分比使用不同颜色 + # 构建序号显示,格式为 [当前序号/总数],使用灰色且不加粗 + sequence_display = f"[{i + 1}/{total_count}]" + + # 关键词加粗,计数和百分比使用不同颜色,序号单独显示为灰色 if count >= 10: # 高频词使用红色 - text_content += ( - f"🔥 **{word}** : {count} 条\n\n" - ) + text_content += f"🔥 {sequence_display} **{word}** : {count} 条\n\n" elif count >= 5: # 中频词使用橙色 - text_content += ( - f"📈 **{word}** : {count} 条\n\n" - ) + text_content += f"📈 {sequence_display} **{word}** : {count} 条\n\n" else: # 低频词使用默认颜色 - text_content += f"📌 **{word}** : {count} 条\n\n" + text_content += f"📌 {sequence_display} **{word}** : {count} 条\n\n" - # 添加相关标题 - for j, title in enumerate(stat["titles"], 1): - # 提取来源信息 - if title.startswith("[") and "]" in title: - source_end = title.index("]") + 1 - source = title[:source_end] - rest = title[source_end:].strip() + # 格式化标题列表用于飞书显示 + for j, title_data in enumerate(stat["titles"], 1): + title = title_data["title"] + source_alias = title_data["source_alias"] + time_display = title_data["time_display"] + count_info = title_data["count"] + ranks = title_data["ranks"] + rank_threshold = title_data["rank_threshold"] - # 使用灰色显示来源 - text_content += ( - f" {j}. {source} {rest}\n" - ) - else: - text_content += f" {j}. {title}\n" + # 使用飞书格式化排名 + rank_display = StatisticsCalculator._format_rank_for_feishu( + ranks, rank_threshold + ) + + # 格式化标题信息 + formatted_title = f"[{source_alias}] {title}" + if rank_display: + formatted_title += f" {rank_display}" + if time_display: + formatted_title += f" - {time_display}" + if count_info > 1: + formatted_title += f" ({count_info}次)" + + # 使用灰色显示来源 + text_content += ( + f" {j}. [{source_alias}] {title}" + ) + if rank_display: + text_content += f" {rank_display}" + if time_display: + text_content += f" - {time_display}" + if count_info > 1: + text_content += f" ({count_info}次)" + text_content += "\n" # 在每条新闻后添加额外间隔(除了最后一条) if j < len(stat["titles"]):