mirror of
https://gitee.com/houhuan/TrendRadar.git
synced 2025-12-21 16:17:17 +08:00
v2.0.1
This commit is contained in:
parent
e500cc7364
commit
8b7f274fd2
73
main.py
73
main.py
@ -825,6 +825,10 @@ class StatisticsCalculator:
|
|||||||
title: str, word_groups: List[Dict], filter_words: List[str]
|
title: str, word_groups: List[Dict], filter_words: List[str]
|
||||||
) -> bool:
|
) -> bool:
|
||||||
"""检查标题是否匹配词组规则"""
|
"""检查标题是否匹配词组规则"""
|
||||||
|
# 如果没有配置词组,则匹配所有标题(支持显示全部新闻)
|
||||||
|
if not word_groups:
|
||||||
|
return True
|
||||||
|
|
||||||
title_lower = title.lower()
|
title_lower = title.lower()
|
||||||
|
|
||||||
# 过滤词检查
|
# 过滤词检查
|
||||||
@ -869,6 +873,12 @@ class StatisticsCalculator:
|
|||||||
) -> Tuple[List[Dict], int]:
|
) -> Tuple[List[Dict], int]:
|
||||||
"""统计词频,支持必须词、频率词、过滤词,并标记新增标题"""
|
"""统计词频,支持必须词、频率词、过滤词,并标记新增标题"""
|
||||||
|
|
||||||
|
# 如果没有配置词组,创建一个包含所有新闻的虚拟词组
|
||||||
|
if not word_groups:
|
||||||
|
print("频率词配置为空,将显示所有新闻")
|
||||||
|
word_groups = [{"required": [], "normal": [], "group_key": "全部新闻"}]
|
||||||
|
filter_words = [] # 清空过滤词,显示所有新闻
|
||||||
|
|
||||||
is_first_today = DataProcessor.is_first_crawl_today()
|
is_first_today = DataProcessor.is_first_crawl_today()
|
||||||
|
|
||||||
# 确定处理的数据源和新增标记逻辑
|
# 确定处理的数据源和新增标记逻辑
|
||||||
@ -919,7 +929,8 @@ class StatisticsCalculator:
|
|||||||
results_to_process = results
|
results_to_process = results
|
||||||
all_news_are_new = False
|
all_news_are_new = False
|
||||||
total_input_news = sum(len(titles) for titles in results.values())
|
total_input_news = sum(len(titles) for titles in results.values())
|
||||||
print(f"当日汇总模式:处理 {total_input_news} 条新闻")
|
filter_status = "全部显示" if len(word_groups) == 1 and word_groups[0]["group_key"] == "全部新闻" else "频率词过滤"
|
||||||
|
print(f"当日汇总模式:处理 {total_input_news} 条新闻,模式:{filter_status}")
|
||||||
|
|
||||||
word_stats = {}
|
word_stats = {}
|
||||||
total_titles = 0
|
total_titles = 0
|
||||||
@ -969,27 +980,34 @@ class StatisticsCalculator:
|
|||||||
required_words = group["required"]
|
required_words = group["required"]
|
||||||
normal_words = group["normal"]
|
normal_words = group["normal"]
|
||||||
|
|
||||||
# 再次检查匹配
|
# 如果是"全部新闻"模式,所有标题都匹配第一个(唯一的)词组
|
||||||
if required_words:
|
if len(word_groups) == 1 and word_groups[0]["group_key"] == "全部新闻":
|
||||||
all_required_present = all(
|
group_key = group["group_key"]
|
||||||
req_word.lower() in title_lower
|
word_stats[group_key]["count"] += 1
|
||||||
for req_word in required_words
|
if source_id not in word_stats[group_key]["titles"]:
|
||||||
)
|
word_stats[group_key]["titles"][source_id] = []
|
||||||
if not all_required_present:
|
else:
|
||||||
continue
|
# 原有的匹配逻辑
|
||||||
|
if required_words:
|
||||||
|
all_required_present = all(
|
||||||
|
req_word.lower() in title_lower
|
||||||
|
for req_word in required_words
|
||||||
|
)
|
||||||
|
if not all_required_present:
|
||||||
|
continue
|
||||||
|
|
||||||
if normal_words:
|
if normal_words:
|
||||||
any_normal_present = any(
|
any_normal_present = any(
|
||||||
normal_word.lower() in title_lower
|
normal_word.lower() in title_lower
|
||||||
for normal_word in normal_words
|
for normal_word in normal_words
|
||||||
)
|
)
|
||||||
if not any_normal_present:
|
if not any_normal_present:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
group_key = group["group_key"]
|
group_key = group["group_key"]
|
||||||
word_stats[group_key]["count"] += 1
|
word_stats[group_key]["count"] += 1
|
||||||
if source_id not in word_stats[group_key]["titles"]:
|
if source_id not in word_stats[group_key]["titles"]:
|
||||||
word_stats[group_key]["titles"][source_id] = []
|
word_stats[group_key]["titles"][source_id] = []
|
||||||
|
|
||||||
first_time = ""
|
first_time = ""
|
||||||
last_time = ""
|
last_time = ""
|
||||||
@ -1065,22 +1083,25 @@ class StatisticsCalculator:
|
|||||||
if source_id not in processed_titles:
|
if source_id not in processed_titles:
|
||||||
processed_titles[source_id] = {}
|
processed_titles[source_id] = {}
|
||||||
processed_titles[source_id][title] = True
|
processed_titles[source_id][title] = True
|
||||||
|
|
||||||
break
|
break
|
||||||
|
|
||||||
# 最后统一打印汇总信息
|
# 最后统一打印汇总信息
|
||||||
if mode == "incremental":
|
if mode == "incremental":
|
||||||
if is_first_today:
|
if is_first_today:
|
||||||
total_input_news = sum(len(titles) for titles in results.values())
|
total_input_news = sum(len(titles) for titles in results.values())
|
||||||
|
filter_status = "全部显示" if len(word_groups) == 1 and word_groups[0]["group_key"] == "全部新闻" else "频率词匹配"
|
||||||
print(
|
print(
|
||||||
f"增量模式:当天第一次爬取,{total_input_news} 条新闻中有 {matched_new_count} 条匹配频率词"
|
f"增量模式:当天第一次爬取,{total_input_news} 条新闻中有 {matched_new_count} 条{filter_status}"
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
if new_titles:
|
if new_titles:
|
||||||
total_new_count = sum(len(titles) for titles in new_titles.values())
|
total_new_count = sum(len(titles) for titles in new_titles.values())
|
||||||
|
filter_status = "全部显示" if len(word_groups) == 1 and word_groups[0]["group_key"] == "全部新闻" else "匹配频率词"
|
||||||
print(
|
print(
|
||||||
f"增量模式:{total_new_count} 条新增新闻中,有 {matched_new_count} 条匹配频率词"
|
f"增量模式:{total_new_count} 条新增新闻中,有 {matched_new_count} 条{filter_status}"
|
||||||
)
|
)
|
||||||
if matched_new_count == 0:
|
if matched_new_count == 0 and len(word_groups) > 1:
|
||||||
print("增量模式:没有新增新闻匹配频率词,将不会发送通知")
|
print("增量模式:没有新增新闻匹配频率词,将不会发送通知")
|
||||||
else:
|
else:
|
||||||
print("增量模式:未检测到新增新闻")
|
print("增量模式:未检测到新增新闻")
|
||||||
@ -1089,13 +1110,15 @@ class StatisticsCalculator:
|
|||||||
len(titles) for titles in results_to_process.values()
|
len(titles) for titles in results_to_process.values()
|
||||||
)
|
)
|
||||||
if is_first_today:
|
if is_first_today:
|
||||||
|
filter_status = "全部显示" if len(word_groups) == 1 and word_groups[0]["group_key"] == "全部新闻" else "频率词匹配"
|
||||||
print(
|
print(
|
||||||
f"当前榜单模式:当天第一次爬取,{total_input_news} 条当前榜单新闻中有 {matched_new_count} 条匹配频率词"
|
f"当前榜单模式:当天第一次爬取,{total_input_news} 条当前榜单新闻中有 {matched_new_count} 条{filter_status}"
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
matched_count = sum(stat["count"] for stat in word_stats.values())
|
matched_count = sum(stat["count"] for stat in word_stats.values())
|
||||||
|
filter_status = "全部显示" if len(word_groups) == 1 and word_groups[0]["group_key"] == "全部新闻" else "频率词匹配"
|
||||||
print(
|
print(
|
||||||
f"当前榜单模式:{total_input_news} 条当前榜单新闻中有 {matched_count} 条匹配频率词"
|
f"当前榜单模式:{total_input_news} 条当前榜单新闻中有 {matched_count} 条{filter_status}"
|
||||||
)
|
)
|
||||||
|
|
||||||
stats = []
|
stats = []
|
||||||
|
|||||||
37
readme.md
37
readme.md
@ -2,15 +2,12 @@
|
|||||||
|
|
||||||
# 🎯TrendRadar
|
# 🎯TrendRadar
|
||||||
|
|
||||||
**你的专属热点助手 —— 让手机只推送你真正关心的新闻**
|
<strong>🚀 最快一分钟部署的热点助手 —— 告别无效刷屏,只看真正关心的新闻资讯</strong>
|
||||||
|
|
||||||
<strong>🚀 最快一分钟部署完毕!从此告别无效刷屏,只看有价值的信息</strong>
|
|
||||||
|
|
||||||
[](https://github.com/sansan0/TrendRadar/stargazers)
|
[](https://github.com/sansan0/TrendRadar/stargazers)
|
||||||
[](https://github.com/sansan0/TrendRadar/network/members)
|
[](https://github.com/sansan0/TrendRadar/network/members)
|
||||||
[](LICENSE)
|
[](LICENSE)
|
||||||
[](https://www.python.org/)
|
[](https://github.com/sansan0/TrendRadar)
|
||||||
[](https://github.com/sansan0/TrendRadar)
|
|
||||||
|
|
||||||
[](https://work.weixin.qq.com/)
|
[](https://work.weixin.qq.com/)
|
||||||
[](https://telegram.org/)
|
[](https://telegram.org/)
|
||||||
@ -23,11 +20,13 @@
|
|||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
|
||||||
> 如果本项目对你有所帮助,**点个 Star ⭐** 就是对我最大的支持。
|
> 如果本项目帮到了你,**点个 Star ⭐**
|
||||||
>
|
>
|
||||||
> 本项目以轻量,易部署为目标,不进行过多新功能的堆叠,主要处理 issues
|
> 遇到问题提 issues,或【硅基茶水间】公众号留言
|
||||||
>
|
>
|
||||||
> 遇到问题提 issues,或【硅基茶水间】公众号留言。
|
> 详细步骤和使用说明都有,耐心往下翻,很多地方可以点击展开
|
||||||
|
>
|
||||||
|
> 本项目以轻量,易部署为目标,主要处理 issues
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -49,7 +48,7 @@
|
|||||||
- 抖音
|
- 抖音
|
||||||
- 知乎
|
- 知乎
|
||||||
|
|
||||||
> _理论上支持 35 个左右,如果你想增加额外的金融类等相关资讯推送,可看最下方的**自定义监控平台**_
|
> _理论上支持 35 个左右,如果想额外增加,可看最下方的**自定义监控平台**_
|
||||||
|
|
||||||
### **智能推送策略**
|
### **智能推送策略**
|
||||||
|
|
||||||
@ -73,7 +72,7 @@
|
|||||||
|
|
||||||
### **多渠道实时推送**
|
### **多渠道实时推送**
|
||||||
|
|
||||||
支持企业微信、飞书、钉钉、Telegram 主流聊天工具,消息直达手机
|
支持**企业微信**、**飞书**、**钉钉**、**Telegram**,消息直达手机
|
||||||
|
|
||||||
### **零技术门槛部署**
|
### **零技术门槛部署**
|
||||||
|
|
||||||
@ -103,6 +102,20 @@ GitHub 一键 Fork 即可使用,无需编程基础。
|
|||||||
- **小版本更新**:直接在 GitHub 网页编辑器中,用本项目的 `main.py` 代码替换你 fork 仓库中的对应文件
|
- **小版本更新**:直接在 GitHub 网页编辑器中,用本项目的 `main.py` 代码替换你 fork 仓库中的对应文件
|
||||||
- **大版本升级**:从 v1.x 升级到 v2.0 建议删除现有 fork 后重新 fork,这样更省力且避免配置冲突
|
- **大版本升级**:从 v1.x 升级到 v2.0 建议删除现有 fork 后重新 fork,这样更省力且避免配置冲突
|
||||||
|
|
||||||
|
### 2025/07/27 - v2.0.1
|
||||||
|
|
||||||
|
**修复问题**:
|
||||||
|
|
||||||
|
1. docker 的 shell 脚本的换行符为 CRLF 导致的执行异常问题
|
||||||
|
2. frequency_words.txt 为空时,导致新闻发送也为空的逻辑问题
|
||||||
|
- 修复后,当你选择 frequency_words.txt 为空时,将**推送所有新闻**,但受限于消息推送大小限制,请做如下调整
|
||||||
|
- 方案一:关闭手机推送,只选择 Github Pages 布置(这是能获得最完整信息的方案,将把所有平台的热点按照你**自定义的热搜算法**进行重新排序)
|
||||||
|
- 方案二:减少推送平台,优先选择**企业微信**或**Telegram**,这两个推送我做了分批推送功能(因为分批推送影响推送体验,且只有这两个平台只给一点点推送容量,所以才不得已做了分批推送功能,但至少能保证获得的信息完整)
|
||||||
|
- 方案三:可与方案二结合,模式选择 current 或 incremental 可有效减少一次性推送的内容
|
||||||
|
|
||||||
|
<details>
|
||||||
|
<summary><strong>📝 点击查看历史更新</strong></summary>
|
||||||
|
|
||||||
### 2025/07/17 - v2.0.0
|
### 2025/07/17 - v2.0.0
|
||||||
|
|
||||||
**重大重构**:
|
**重大重构**:
|
||||||
@ -114,10 +127,6 @@ GitHub 一键 Fork 即可使用,无需编程基础。
|
|||||||
- `config/config.yaml` - 主配置文件(应用设置、爬虫配置、通知配置、平台配置等)
|
- `config/config.yaml` - 主配置文件(应用设置、爬虫配置、通知配置、平台配置等)
|
||||||
- `config/frequency_words.txt` - 关键词配置(监控词汇设置)
|
- `config/frequency_words.txt` - 关键词配置(监控词汇设置)
|
||||||
|
|
||||||
|
|
||||||
<details>
|
|
||||||
<summary><strong>📝 点击查看历史更新</strong></summary>
|
|
||||||
|
|
||||||
### 2025/07/09 - v1.4.1
|
### 2025/07/09 - v1.4.1
|
||||||
|
|
||||||
**功能新增**:增加增量推送(在 main.py 头部配置 FOCUS_NEW_ONLY),该开关只关心新话题而非持续热度,只在有新内容时才发通知。
|
**功能新增**:增加增量推送(在 main.py 头部配置 FOCUS_NEW_ONLY),该开关只关心新话题而非持续热度,只在有新内容时才发通知。
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user