mirror of
https://gitee.com/houhuan/TrendRadar.git
synced 2025-12-21 17:17:17 +08:00
v1.1.0
This commit is contained in:
parent
a8f3de0fa4
commit
c0d45e1940
71
main.py
71
main.py
@ -13,7 +13,7 @@ import requests
|
|||||||
import pytz
|
import pytz
|
||||||
|
|
||||||
CONFIG = {
|
CONFIG = {
|
||||||
"VERSION": "1.0.0",
|
"VERSION": "1.1.0",
|
||||||
"VERSION_CHECK_URL": "https://raw.githubusercontent.com/sansan0/TrendRadar/refs/heads/master/version",
|
"VERSION_CHECK_URL": "https://raw.githubusercontent.com/sansan0/TrendRadar/refs/heads/master/version",
|
||||||
"FEISHU_SHOW_VERSION_UPDATE": True, # 控制显示版本更新提示,改成 False 将不接受新版本提示
|
"FEISHU_SHOW_VERSION_UPDATE": True, # 控制显示版本更新提示,改成 False 将不接受新版本提示
|
||||||
"FEISHU_SEPARATOR": "━━━━━━━━━━━━━━━━━━━", # 飞书消息分割线,注意,其它类型的分割线可能会被飞书过滤而不显示
|
"FEISHU_SEPARATOR": "━━━━━━━━━━━━━━━━━━━", # 飞书消息分割线,注意,其它类型的分割线可能会被飞书过滤而不显示
|
||||||
@ -24,6 +24,12 @@ CONFIG = {
|
|||||||
"DEFAULT_PROXY": "http://127.0.0.1:10086",
|
"DEFAULT_PROXY": "http://127.0.0.1:10086",
|
||||||
"CONTINUE_WITHOUT_FEISHU": True, # 控制在没有飞书 webhook URL 时是否继续执行爬虫, 如果 True ,会依然进行爬虫行为,并在 github 上持续的生成爬取的新闻数据
|
"CONTINUE_WITHOUT_FEISHU": True, # 控制在没有飞书 webhook URL 时是否继续执行爬虫, 如果 True ,会依然进行爬虫行为,并在 github 上持续的生成爬取的新闻数据
|
||||||
"FEISHU_WEBHOOK_URL": "", # 飞书机器人的 webhook URL,大概长这样:https://www.feishu.cn/flow/api/trigger-webhook/xxxx, 默认为空,推荐通过GitHub Secrets设置
|
"FEISHU_WEBHOOK_URL": "", # 飞书机器人的 webhook URL,大概长这样:https://www.feishu.cn/flow/api/trigger-webhook/xxxx, 默认为空,推荐通过GitHub Secrets设置
|
||||||
|
# 用于让关注度更高的新闻在更前面显示,这里是权重排序配置,合起来是 1就行(你可以微调,虽然我不建议动嘿嘿)
|
||||||
|
"WEIGHT_CONFIG": {
|
||||||
|
"RANK_WEIGHT": 0.6, # 排名
|
||||||
|
"FREQUENCY_WEIGHT": 0.3, # 频次
|
||||||
|
"HOTNESS_WEIGHT": 0.1, # 热度
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -642,6 +648,62 @@ class DataProcessor:
|
|||||||
class StatisticsCalculator:
|
class StatisticsCalculator:
|
||||||
"""统计计算器"""
|
"""统计计算器"""
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def calculate_news_weight(
|
||||||
|
title_data: Dict, rank_threshold: int = CONFIG["RANK_THRESHOLD"]
|
||||||
|
) -> float:
|
||||||
|
"""计算新闻权重,用于排序"""
|
||||||
|
ranks = title_data.get("ranks", [])
|
||||||
|
if not ranks:
|
||||||
|
return 0.0
|
||||||
|
|
||||||
|
count = title_data.get("count", len(ranks))
|
||||||
|
weight_config = CONFIG["WEIGHT_CONFIG"]
|
||||||
|
|
||||||
|
# 排名权重:Σ(11 - min(rank, 10)) / 出现次数
|
||||||
|
rank_scores = []
|
||||||
|
for rank in ranks:
|
||||||
|
score = 11 - min(rank, 10)
|
||||||
|
rank_scores.append(score)
|
||||||
|
|
||||||
|
rank_weight = sum(rank_scores) / len(ranks) if ranks else 0
|
||||||
|
|
||||||
|
# 频次权重:min(出现次数, 10) × 10
|
||||||
|
frequency_weight = min(count, 10) * 10
|
||||||
|
|
||||||
|
# 热度加成:高排名次数 / 总出现次数 × 100
|
||||||
|
high_rank_count = sum(1 for rank in ranks if rank <= rank_threshold)
|
||||||
|
hotness_ratio = high_rank_count / len(ranks) if ranks else 0
|
||||||
|
hotness_weight = hotness_ratio * 100
|
||||||
|
|
||||||
|
# 综合权重计算
|
||||||
|
total_weight = (
|
||||||
|
rank_weight * weight_config["RANK_WEIGHT"]
|
||||||
|
+ frequency_weight * weight_config["FREQUENCY_WEIGHT"]
|
||||||
|
+ hotness_weight * weight_config["HOTNESS_WEIGHT"]
|
||||||
|
)
|
||||||
|
|
||||||
|
return total_weight
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def sort_titles_by_weight(
|
||||||
|
titles_list: List[Dict], rank_threshold: int = CONFIG["RANK_THRESHOLD"]
|
||||||
|
) -> List[Dict]:
|
||||||
|
"""按权重对新闻标题列表进行排序"""
|
||||||
|
|
||||||
|
def get_sort_key(title_data):
|
||||||
|
weight = StatisticsCalculator.calculate_news_weight(
|
||||||
|
title_data, rank_threshold
|
||||||
|
)
|
||||||
|
ranks = title_data.get("ranks", [])
|
||||||
|
count = title_data.get("count", 1)
|
||||||
|
|
||||||
|
# 主要按权重排序,权重相同时按最高排名排序,再相同时按出现次数排序
|
||||||
|
min_rank = min(ranks) if ranks else 999
|
||||||
|
return (-weight, min_rank, -count)
|
||||||
|
|
||||||
|
return sorted(titles_list, key=get_sort_key)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _matches_word_groups(
|
def _matches_word_groups(
|
||||||
title: str, word_groups: List[Dict], filter_words: List[str]
|
title: str, word_groups: List[Dict], filter_words: List[str]
|
||||||
@ -808,11 +870,16 @@ class StatisticsCalculator:
|
|||||||
for source_id, title_list in data["titles"].items():
|
for source_id, title_list in data["titles"].items():
|
||||||
all_titles.extend(title_list)
|
all_titles.extend(title_list)
|
||||||
|
|
||||||
|
# 按权重排序标题
|
||||||
|
sorted_titles = StatisticsCalculator.sort_titles_by_weight(
|
||||||
|
all_titles, rank_threshold
|
||||||
|
)
|
||||||
|
|
||||||
stats.append(
|
stats.append(
|
||||||
{
|
{
|
||||||
"word": group_key,
|
"word": group_key,
|
||||||
"count": data["count"],
|
"count": data["count"],
|
||||||
"titles": all_titles,
|
"titles": sorted_titles,
|
||||||
"percentage": (
|
"percentage": (
|
||||||
round(data["count"] / total_titles * 100, 2)
|
round(data["count"] / total_titles * 100, 2)
|
||||||
if total_titles > 0
|
if total_titles > 0
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user