- """
- for id_value in report_data["failed_ids"]:
- html += f'
- {html_escape(id_value)} ' - html += """ -
diff --git a/.github/ISSUE_TEMPLATE/01-bug-report.yml b/.github/ISSUE_TEMPLATE/01-bug-report.yml index f028116..0c3db59 100644 --- a/.github/ISSUE_TEMPLATE/01-bug-report.yml +++ b/.github/ISSUE_TEMPLATE/01-bug-report.yml @@ -4,8 +4,6 @@ name: 🐛 遇到问题了 description: 程序运行不正常或出现错误 title: "[问题] " labels: ["bug"] -assignees: - - sansan0 body: - type: markdown attributes: diff --git a/.github/ISSUE_TEMPLATE/02-feature-request.yml b/.github/ISSUE_TEMPLATE/02-feature-request.yml index 227625c..77963eb 100644 --- a/.github/ISSUE_TEMPLATE/02-feature-request.yml +++ b/.github/ISSUE_TEMPLATE/02-feature-request.yml @@ -4,8 +4,6 @@ name: 💡 我有个想法 description: 建议新功能或改进现有功能 title: "[建议] " labels: ["enhancement"] -assignees: - - sansan0 body: - type: markdown attributes: diff --git a/.github/ISSUE_TEMPLATE/03-config-help.yml b/.github/ISSUE_TEMPLATE/03-config-help.yml index bdaff53..e86cbaa 100644 --- a/.github/ISSUE_TEMPLATE/03-config-help.yml +++ b/.github/ISSUE_TEMPLATE/03-config-help.yml @@ -4,8 +4,6 @@ name: ⚙️ 设置遇到困难 description: 配置相关的问题或需要帮助 title: "[设置] " labels: ["配置", "帮助"] -assignees: - - sansan0 body: - type: markdown attributes: diff --git a/.github/workflows/clean-crawler.yml b/.github/workflows/clean-crawler.yml new file mode 100644 index 0000000..a84a142 --- /dev/null +++ b/.github/workflows/clean-crawler.yml @@ -0,0 +1,28 @@ +name: Check In + +# ✅ 签到续期:运行此 workflow 可重置 7 天计时,保持 "Get Hot News" 正常运行 +# ✅ Renewal: Run this workflow to reset the 7-day timer and keep "Get Hot News" active +# +# 📌 操作方法 / How to use: +# 1. 点击 "Run workflow" 按钮 / Click "Run workflow" button +# 2. 每 7 天内至少运行一次 / Run at least once every 7 days + +on: + workflow_dispatch: + +jobs: + del_runs: + runs-on: ubuntu-latest + permissions: + actions: write + contents: read + steps: + - name: Delete all workflow runs + uses: Mattraks/delete-workflow-runs@v2 + with: + token: ${{ github.token }} + repository: ${{ github.repository }} + retain_days: 0 + keep_minimum_runs: 0 + delete_workflow_by_state_pattern: "ALL" + delete_run_by_conclusion_pattern: "ALL" \ No newline at end of file diff --git a/.github/workflows/crawler.yml b/.github/workflows/crawler.yml new file mode 100644 index 0000000..733de22 --- /dev/null +++ b/.github/workflows/crawler.yml @@ -0,0 +1,163 @@ +name: Get Hot News + +on: + schedule: + # ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ + # ⚠️ 试用版说明 / Trial Mode + # ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ + # + # 🔄 运行机制 / How it works: + # - 每个周期为 7 天,届时自动停止 + # - 运行 "Check In" 会重置周期(重新开始 7 天倒计时,而非累加) + # - Each cycle is 7 days, then auto-stops + # - "Check In" resets the cycle (restarts 7-day countdown, not cumulative) + # + # 💡 设计初衷 / Why this design: + # 如果 7 天都忘了签到,或许这些资讯对你来说并非刚需 + # 适时的暂停,能帮你从信息流中抽离,给大脑留出喘息的空间 + # If you forget for 7 days, maybe you don't really need it + # A timely pause helps you detach from the stream and gives your mind space + # + # 🙏 珍惜资源 / Respect shared resources: + # GitHub Actions 是平台提供的公共资源,每次运行都会消耗算力 + # 签到机制确保资源分配给真正需要的用户,感谢你的理解与配合 + # GitHub Actions is a shared public resource provided by the platform + # Check-in ensures resources go to those who truly need it — thank you + # + # 🚀 长期使用请部署 Docker 版本 / For long-term use, deploy Docker version + # + # ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ + # + # 📝 修改运行时间:只改第一个数字(0-59),表示每小时第几分钟运行 + # 📝 Change time: Only modify the first number (0-59) = minute of each hour + # + # 示例 / Examples: + # "15 * * * *" → 每小时第15分钟 / minute 15 every hour + # "30 0-14 * * *" → 北京时间 8:00-22:00 每小时第30分钟 / Beijing 8am-10pm + # + - cron: "33 * * * *" + + workflow_dispatch: + +concurrency: + group: crawler-${{ github.ref_name }} + cancel-in-progress: true + +permissions: + contents: read + actions: write + +jobs: + crawl: + runs-on: ubuntu-latest + timeout-minutes: 15 + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + fetch-depth: 1 + clean: true + + - name: Check Expiration + env: + GH_TOKEN: ${{ github.token }} + run: | + WORKFLOW_FILE="crawler.yml" + API_URL="repos/${{ github.repository }}/actions/workflows/$WORKFLOW_FILE/runs" + + TOTAL=$(gh api "$API_URL" --jq '.total_count') + if [ -z "$TOTAL" ] || [ "$TOTAL" -eq 0 ]; then + echo "No previous runs found, skipping expiration check" + exit 0 + fi + + LAST_PAGE=$(( (TOTAL + 99) / 100 )) + FIRST_RUN_DATE=$(gh api "$API_URL?per_page=100&page=$LAST_PAGE" --jq '.workflow_runs[-1].created_at') + + if [ -n "$FIRST_RUN_DATE" ]; then + CURRENT_TIMESTAMP=$(date +%s) + FIRST_RUN_TIMESTAMP=$(date -d "$FIRST_RUN_DATE" +%s) + DIFF_SECONDS=$((CURRENT_TIMESTAMP - FIRST_RUN_TIMESTAMP)) + LIMIT_SECONDS=604800 + + if [ $DIFF_SECONDS -gt $LIMIT_SECONDS ]; then + echo "⚠️ 试用期已结束,请运行 'Check In' 签到续期" + echo "⚠️ Trial expired. Run 'Check In' to renew." + gh workflow disable "$WORKFLOW_FILE" + exit 1 + else + DAYS_LEFT=$(( (LIMIT_SECONDS - DIFF_SECONDS) / 86400 )) + echo "✅ 试用期剩余 ${DAYS_LEFT} 天,到期前请运行 'Check In' 签到续期" + echo "✅ Trial: ${DAYS_LEFT} days left. Run 'Check In' before expiry to renew." + fi + fi + + + # -------------------------------------------------------------------------------- + # 🚦 TRAFFIC CONTROL / 流量控制 + # -------------------------------------------------------------------------------- + # EN: Generates a random delay between 1 and 300 seconds (5 minutes). + # Critical for load balancing. + # + # CN: 生成 1 到 300 秒(5分钟)之间的随机延迟。 + # 这对负载均衡至关重要。 + - name: Random Delay (Traffic Control) + if: success() + run: | + echo "🎲 Traffic Control: Generating random delay..." + DELAY=$(( ( RANDOM % 300 ) + 1 )) + echo "⏸️ Sleeping for ${DELAY} seconds to spread the load..." + sleep ${DELAY}s + echo "▶️ Delay finished. Starting crawler..." + + - name: Set up Python + if: success() + uses: actions/setup-python@v5 + with: + python-version: "3.10" + cache: "pip" + + - name: Install dependencies + if: success() + run: | + python -m pip install --upgrade pip + pip install -r requirements.txt + + - name: Verify required files + if: success() + run: | + if [ ! -f config/config.yaml ]; then + echo "Error: Config missing" + exit 1 + fi + + - name: Run crawler + if: success() + env: + FEISHU_WEBHOOK_URL: ${{ secrets.FEISHU_WEBHOOK_URL }} + TELEGRAM_BOT_TOKEN: ${{ secrets.TELEGRAM_BOT_TOKEN }} + TELEGRAM_CHAT_ID: ${{ secrets.TELEGRAM_CHAT_ID }} + DINGTALK_WEBHOOK_URL: ${{ secrets.DINGTALK_WEBHOOK_URL }} + WEWORK_WEBHOOK_URL: ${{ secrets.WEWORK_WEBHOOK_URL }} + WEWORK_MSG_TYPE: ${{ secrets.WEWORK_MSG_TYPE }} + EMAIL_FROM: ${{ secrets.EMAIL_FROM }} + EMAIL_PASSWORD: ${{ secrets.EMAIL_PASSWORD }} + EMAIL_TO: ${{ secrets.EMAIL_TO }} + EMAIL_SMTP_SERVER: ${{ secrets.EMAIL_SMTP_SERVER }} + EMAIL_SMTP_PORT: ${{ secrets.EMAIL_SMTP_PORT }} + NTFY_TOPIC: ${{ secrets.NTFY_TOPIC }} + NTFY_SERVER_URL: ${{ secrets.NTFY_SERVER_URL }} + NTFY_TOKEN: ${{ secrets.NTFY_TOKEN }} + BARK_URL: ${{ secrets.BARK_URL }} + SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }} + STORAGE_BACKEND: auto + LOCAL_RETENTION_DAYS: ${{ secrets.LOCAL_RETENTION_DAYS }} + REMOTE_RETENTION_DAYS: ${{ secrets.REMOTE_RETENTION_DAYS }} + S3_BUCKET_NAME: ${{ secrets.S3_BUCKET_NAME }} + S3_ACCESS_KEY_ID: ${{ secrets.S3_ACCESS_KEY_ID }} + S3_SECRET_ACCESS_KEY: ${{ secrets.S3_SECRET_ACCESS_KEY }} + S3_ENDPOINT_URL: ${{ secrets.S3_ENDPOINT_URL }} + S3_REGION: ${{ secrets.S3_REGION }} + GITHUB_ACTIONS: true + run: python -m trendradar diff --git a/README-EN.md b/README-EN.md index 084dda9..36be381 100644 --- a/README-EN.md +++ b/README-EN.md @@ -1,6 +1,6 @@
@@ -16,8 +16,8 @@
[](https://github.com/sansan0/TrendRadar/stargazers)
[](https://github.com/sansan0/TrendRadar/network/members)
[](LICENSE)
-[](https://github.com/sansan0/TrendRadar)
-[](https://github.com/sansan0/TrendRadar)
+[](https://github.com/sansan0/TrendRadar)
+[](https://github.com/sansan0/TrendRadar)
[](https://work.weixin.qq.com/)
[](https://weixin.qq.com/)
@@ -48,62 +48,61 @@
@@ -16,8 +16,8 @@
[](https://github.com/sansan0/TrendRadar/stargazers)
[](https://github.com/sansan0/TrendRadar/network/members)
[](LICENSE)
-[](https://github.com/sansan0/TrendRadar)
-[](https://github.com/sansan0/TrendRadar)
+[](https://github.com/sansan0/TrendRadar)
+[](https://github.com/sansan0/TrendRadar)
[](https://work.weixin.qq.com/)
[](https://weixin.qq.com/)
@@ -48,62 +48,61 @@
- {title_data['time_display']}"
- if title_data["count"] > 1:
- result += f" ({title_data['count']}次)"
-
- return result
-
- elif platform == "ntfy":
- if link_url:
- formatted_title = f"[{cleaned_title}]({link_url})"
- else:
- formatted_title = cleaned_title
-
- title_prefix = "🆕 " if title_data.get("is_new") else ""
-
- if show_source:
- result = f"[{title_data['source_name']}] {title_prefix}{formatted_title}"
- else:
- result = f"{title_prefix}{formatted_title}"
-
- if rank_display:
- result += f" {rank_display}"
- if title_data["time_display"]:
- result += f" `- {title_data['time_display']}`"
- if title_data["count"] > 1:
- result += f" `({title_data['count']}次)`"
-
- return result
-
- elif platform == "slack":
- # Slack 使用 mrkdwn 格式
- if link_url:
- # Slack 链接格式: - {title_data['time_display']}"
+ if title_data["count"] > 1:
+ result += f" ({title_data['count']}次)"
+
+ return result
+
+ elif platform == "ntfy":
+ if link_url:
+ formatted_title = f"[{cleaned_title}]({link_url})"
+ else:
+ formatted_title = cleaned_title
+
+ title_prefix = "🆕 " if title_data.get("is_new") else ""
+
+ if show_source:
+ result = f"[{title_data['source_name']}] {title_prefix}{formatted_title}"
+ else:
+ result = f"{title_prefix}{formatted_title}"
+
+ if rank_display:
+ result += f" {rank_display}"
+ if title_data["time_display"]:
+ result += f" `- {title_data['time_display']}`"
+ if title_data["count"] > 1:
+ result += f" `({title_data['count']}次)`"
+
+ return result
+
+ elif platform == "slack":
+ # Slack 使用 mrkdwn 格式
+ if link_url:
+ # Slack 链接格式: {report_data}"
+
+ # 写入文件
+ with open(file_path, "w", encoding="utf-8") as f:
+ f.write(html_content)
+
+ # 如果是每日汇总且启用 index 复制
+ if is_daily_summary and enable_index_copy:
+ # 生成到根目录(供 GitHub Pages 访问)
+ root_index_path = Path("index.html")
+ with open(root_index_path, "w", encoding="utf-8") as f:
+ f.write(html_content)
+
+ # 同时生成到 output 目录(供 Docker Volume 挂载访问)
+ output_index_path = Path(output_dir) / "index.html"
+ Path(output_dir).mkdir(parents=True, exist_ok=True)
+ with open(output_index_path, "w", encoding="utf-8") as f:
+ f.write(html_content)
+
+ return file_path
diff --git a/trendradar/report/helpers.py b/trendradar/report/helpers.py
new file mode 100644
index 0000000..1142eaa
--- /dev/null
+++ b/trendradar/report/helpers.py
@@ -0,0 +1,125 @@
+# coding=utf-8
+"""
+报告辅助函数模块
+
+提供报告生成相关的通用辅助函数
+"""
+
+import re
+from typing import List
+
+
+def clean_title(title: str) -> str:
+ """清理标题中的特殊字符
+
+ 清理规则:
+ - 将换行符(\n, \r)替换为空格
+ - 将多个连续空白字符合并为单个空格
+ - 去除首尾空白
+
+ Args:
+ title: 原始标题字符串
+
+ Returns:
+ 清理后的标题字符串
+ """
+ if not isinstance(title, str):
+ title = str(title)
+ cleaned_title = title.replace("\n", " ").replace("\r", " ")
+ cleaned_title = re.sub(r"\s+", " ", cleaned_title)
+ cleaned_title = cleaned_title.strip()
+ return cleaned_title
+
+
+def html_escape(text: str) -> str:
+ """HTML特殊字符转义
+
+ 转义规则(按顺序):
+ - & → &
+ - < → <
+ - > → >
+ - " → "
+ - ' → '
+
+ Args:
+ text: 原始文本
+
+ Returns:
+ 转义后的文本
+ """
+ if not isinstance(text, str):
+ text = str(text)
+
+ return (
+ text.replace("&", "&")
+ .replace("<", "<")
+ .replace(">", ">")
+ .replace('"', """)
+ .replace("'", "'")
+ )
+
+
+def format_rank_display(ranks: List[int], rank_threshold: int, format_type: str) -> str:
+ """格式化排名显示
+
+ 根据不同平台类型生成对应格式的排名字符串。
+ 当最小排名小于等于阈值时,使用高亮格式。
+
+ Args:
+ ranks: 排名列表(可能包含重复值)
+ rank_threshold: 高亮阈值,小于等于此值的排名会高亮显示
+ format_type: 平台类型,支持:
+ - "html": HTML格式
+ - "feishu": 飞书格式
+ - "dingtalk": 钉钉格式
+ - "wework": 企业微信格式
+ - "telegram": Telegram格式
+ - "slack": Slack格式
+ - 其他: 默认markdown格式
+
+ Returns:
+ 格式化后的排名字符串,如 "[1]" 或 "[1 - 5]"
+ 如果排名列表为空,返回空字符串
+ """
+ if not ranks:
+ return ""
+
+ unique_ranks = sorted(set(ranks))
+ min_rank = unique_ranks[0]
+ max_rank = unique_ranks[-1]
+
+ # 根据平台类型选择高亮格式
+ if format_type == "html":
+ highlight_start = ""
+ highlight_end = ""
+ elif format_type == "feishu":
+ highlight_start = "**"
+ highlight_end = "**"
+ elif format_type == "dingtalk":
+ highlight_start = "**"
+ highlight_end = "**"
+ elif format_type == "wework":
+ highlight_start = "**"
+ highlight_end = "**"
+ elif format_type == "telegram":
+ highlight_start = ""
+ highlight_end = ""
+ elif format_type == "slack":
+ highlight_start = "*"
+ highlight_end = "*"
+ else:
+ # 默认 markdown 格式
+ highlight_start = "**"
+ highlight_end = "**"
+
+ # 生成排名显示
+ if min_rank <= rank_threshold:
+ if min_rank == max_rank:
+ return f"{highlight_start}[{min_rank}]{highlight_end}"
+ else:
+ return f"{highlight_start}[{min_rank} - {max_rank}]{highlight_end}"
+ else:
+ if min_rank == max_rank:
+ return f"[{min_rank}]"
+ else:
+ return f"[{min_rank} - {max_rank}]"
diff --git a/trendradar/report/html.py b/trendradar/report/html.py
new file mode 100644
index 0000000..e69216e
--- /dev/null
+++ b/trendradar/report/html.py
@@ -0,0 +1,1050 @@
+# coding=utf-8
+"""
+HTML 报告渲染模块
+
+提供 HTML 格式的热点新闻报告生成功能
+"""
+
+from datetime import datetime
+from typing import Dict, Optional, Callable
+
+from trendradar.report.helpers import html_escape
+
+
+def render_html_content(
+ report_data: Dict,
+ total_titles: int,
+ is_daily_summary: bool = False,
+ mode: str = "daily",
+ update_info: Optional[Dict] = None,
+ *,
+ reverse_content_order: bool = False,
+ get_time_func: Optional[Callable[[], datetime]] = None,
+) -> str:
+ """渲染HTML内容
+
+ Args:
+ report_data: 报告数据字典,包含 stats, new_titles, failed_ids, total_new_count
+ total_titles: 新闻总数
+ is_daily_summary: 是否为当日汇总
+ mode: 报告模式 ("daily", "current", "incremental")
+ update_info: 更新信息(可选)
+ reverse_content_order: 是否反转内容顺序(新增热点在前)
+ get_time_func: 获取当前时间的函数(可选,默认使用 datetime.now)
+
+ Returns:
+ 渲染后的 HTML 字符串
+ """
+ html = """
+
+
+
+
+
+