TrendRadar/.github/workflows/crawler.yml
2025-12-13 13:44:35 +08:00

164 lines
6.8 KiB
YAML
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

name: Get Hot News
on:
schedule:
# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
# ⚠️ 试用版说明 / Trial Mode
# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
#
# 🔄 运行机制 / How it works:
# - 每个周期为 7 天,届时自动停止
# - 运行 "Check In" 会重置周期(重新开始 7 天倒计时,而非累加)
# - Each cycle is 7 days, then auto-stops
# - "Check In" resets the cycle (restarts 7-day countdown, not cumulative)
#
# 💡 设计初衷 / Why this design:
# 如果 7 天都忘了签到,或许这些资讯对你来说并非刚需
# 适时的暂停,能帮你从信息流中抽离,给大脑留出喘息的空间
# If you forget for 7 days, maybe you don't really need it
# A timely pause helps you detach from the stream and gives your mind space
#
# 🙏 珍惜资源 / Respect shared resources:
# GitHub Actions 是平台提供的公共资源,每次运行都会消耗算力
# 签到机制确保资源分配给真正需要的用户,感谢你的理解与配合
# GitHub Actions is a shared public resource provided by the platform
# Check-in ensures resources go to those who truly need it — thank you
#
# 🚀 长期使用请部署 Docker 版本 / For long-term use, deploy Docker version
#
# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
#
# 📝 修改运行时间只改第一个数字0-59表示每小时第几分钟运行
# 📝 Change time: Only modify the first number (0-59) = minute of each hour
#
# 示例 / Examples:
# "15 * * * *" → 每小时第15分钟 / minute 15 every hour
# "30 0-14 * * *" → 北京时间 8:00-22:00 每小时第30分钟 / Beijing 8am-10pm
#
- cron: "33 * * * *"
workflow_dispatch:
concurrency:
group: crawler-${{ github.ref_name }}
cancel-in-progress: true
permissions:
contents: read
actions: write
jobs:
crawl:
runs-on: ubuntu-latest
timeout-minutes: 15
steps:
- name: Checkout repository
uses: actions/checkout@v4
with:
fetch-depth: 1
clean: true
- name: Check Expiration
env:
GH_TOKEN: ${{ github.token }}
run: |
WORKFLOW_FILE="crawler.yml"
API_URL="repos/${{ github.repository }}/actions/workflows/$WORKFLOW_FILE/runs"
TOTAL=$(gh api "$API_URL" --jq '.total_count')
if [ -z "$TOTAL" ] || [ "$TOTAL" -eq 0 ]; then
echo "No previous runs found, skipping expiration check"
exit 0
fi
LAST_PAGE=$(( (TOTAL + 99) / 100 ))
FIRST_RUN_DATE=$(gh api "$API_URL?per_page=100&page=$LAST_PAGE" --jq '.workflow_runs[-1].created_at')
if [ -n "$FIRST_RUN_DATE" ]; then
CURRENT_TIMESTAMP=$(date +%s)
FIRST_RUN_TIMESTAMP=$(date -d "$FIRST_RUN_DATE" +%s)
DIFF_SECONDS=$((CURRENT_TIMESTAMP - FIRST_RUN_TIMESTAMP))
LIMIT_SECONDS=604800
if [ $DIFF_SECONDS -gt $LIMIT_SECONDS ]; then
echo "⚠️ 试用期已结束,请运行 'Check In' 签到续期"
echo "⚠️ Trial expired. Run 'Check In' to renew."
gh workflow disable "$WORKFLOW_FILE"
exit 1
else
DAYS_LEFT=$(( (LIMIT_SECONDS - DIFF_SECONDS) / 86400 ))
echo "✅ 试用期剩余 ${DAYS_LEFT} 天,到期前请运行 'Check In' 签到续期"
echo "✅ Trial: ${DAYS_LEFT} days left. Run 'Check In' before expiry to renew."
fi
fi
# --------------------------------------------------------------------------------
# 🚦 TRAFFIC CONTROL / 流量控制
# --------------------------------------------------------------------------------
# EN: Generates a random delay between 1 and 300 seconds (5 minutes).
# Critical for load balancing.
#
# CN: 生成 1 到 300 秒5分钟之间的随机延迟。
# 这对负载均衡至关重要。
- name: Random Delay (Traffic Control)
if: success()
run: |
echo "🎲 Traffic Control: Generating random delay..."
DELAY=$(( ( RANDOM % 300 ) + 1 ))
echo "⏸️ Sleeping for ${DELAY} seconds to spread the load..."
sleep ${DELAY}s
echo "▶️ Delay finished. Starting crawler..."
- name: Set up Python
if: success()
uses: actions/setup-python@v5
with:
python-version: "3.10"
cache: "pip"
- name: Install dependencies
if: success()
run: |
python -m pip install --upgrade pip
pip install -r requirements.txt
- name: Verify required files
if: success()
run: |
if [ ! -f config/config.yaml ]; then
echo "Error: Config missing"
exit 1
fi
- name: Run crawler
if: success()
env:
FEISHU_WEBHOOK_URL: ${{ secrets.FEISHU_WEBHOOK_URL }}
TELEGRAM_BOT_TOKEN: ${{ secrets.TELEGRAM_BOT_TOKEN }}
TELEGRAM_CHAT_ID: ${{ secrets.TELEGRAM_CHAT_ID }}
DINGTALK_WEBHOOK_URL: ${{ secrets.DINGTALK_WEBHOOK_URL }}
WEWORK_WEBHOOK_URL: ${{ secrets.WEWORK_WEBHOOK_URL }}
WEWORK_MSG_TYPE: ${{ secrets.WEWORK_MSG_TYPE }}
EMAIL_FROM: ${{ secrets.EMAIL_FROM }}
EMAIL_PASSWORD: ${{ secrets.EMAIL_PASSWORD }}
EMAIL_TO: ${{ secrets.EMAIL_TO }}
EMAIL_SMTP_SERVER: ${{ secrets.EMAIL_SMTP_SERVER }}
EMAIL_SMTP_PORT: ${{ secrets.EMAIL_SMTP_PORT }}
NTFY_TOPIC: ${{ secrets.NTFY_TOPIC }}
NTFY_SERVER_URL: ${{ secrets.NTFY_SERVER_URL }}
NTFY_TOKEN: ${{ secrets.NTFY_TOKEN }}
BARK_URL: ${{ secrets.BARK_URL }}
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }}
STORAGE_BACKEND: auto
LOCAL_RETENTION_DAYS: ${{ secrets.LOCAL_RETENTION_DAYS }}
REMOTE_RETENTION_DAYS: ${{ secrets.REMOTE_RETENTION_DAYS }}
S3_BUCKET_NAME: ${{ secrets.S3_BUCKET_NAME }}
S3_ACCESS_KEY_ID: ${{ secrets.S3_ACCESS_KEY_ID }}
S3_SECRET_ACCESS_KEY: ${{ secrets.S3_SECRET_ACCESS_KEY }}
S3_ENDPOINT_URL: ${{ secrets.S3_ENDPOINT_URL }}
S3_REGION: ${{ secrets.S3_REGION }}
GITHUB_ACTIONS: true
run: python -m trendradar