diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index 3ba3e8d..530490f 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -1,17 +1,33 @@ -name: Build and Push Multi-Arch Docker Images +name: Build and Push Docker Images on: push: - tags: ["v*"] + tags: + - "v*" # 主项目版本 + - "mcp-v*" # MCP 版本 workflow_dispatch: + inputs: + image: + description: "选择要构建的镜像" + required: true + default: "all" + type: choice + options: + - all + - crawler + - mcp env: REGISTRY: docker.io - IMAGE_NAME: wantcat/trendradar jobs: - build: + build-crawler: runs-on: ubuntu-latest + # 条件:v* 标签(排除 mcp-v*)或手动触发选择 all/crawler + if: | + (github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v') && !startsWith(github.ref, 'refs/tags/mcp-v')) || + (github.event_name == 'workflow_dispatch' && (github.event.inputs.image == 'all' || github.event.inputs.image == 'crawler')) + steps: - name: Checkout uses: actions/checkout@v4 @@ -35,12 +51,11 @@ jobs: id: meta uses: docker/metadata-action@v5 with: - images: ${{ env.IMAGE_NAME }} + images: wantcat/trendradar tags: | - type=ref,event=branch type=semver,pattern={{version}} type=semver,pattern={{major}}.{{minor}} - type=raw,value=latest,enable={{is_default_branch}} + type=raw,value=latest - name: Build and push uses: docker/build-push-action@v5 @@ -55,5 +70,65 @@ jobs: labels: ${{ steps.meta.outputs.labels }} cache-from: type=gha cache-to: type=gha,mode=max - build-args: | - BUILDKIT_INLINE_CACHE=1 + + build-mcp: + runs-on: ubuntu-latest + # 条件:mcp-v* 标签 或手动触发选择 all/mcp + if: | + (github.event_name == 'push' && startsWith(github.ref, 'refs/tags/mcp-v')) || + (github.event_name == 'workflow_dispatch' && (github.event.inputs.image == 'all' || github.event.inputs.image == 'mcp')) + + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + with: + driver-opts: | + network=host + + - name: Login to Docker Hub + uses: docker/login-action@v3 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + + - name: Extract version from tag + id: version + run: | + if [[ "${{ github.ref }}" == refs/tags/mcp-v* ]]; then + VERSION="${GITHUB_REF#refs/tags/mcp-v}" + echo "version=${VERSION}" >> $GITHUB_OUTPUT + echo "major_minor=$(echo $VERSION | cut -d. -f1,2)" >> $GITHUB_OUTPUT + else + echo "version=latest" >> $GITHUB_OUTPUT + echo "major_minor=latest" >> $GITHUB_OUTPUT + fi + + - name: Extract metadata + id: meta + uses: docker/metadata-action@v5 + with: + images: wantcat/trendradar-mcp + tags: | + type=raw,value=${{ steps.version.outputs.version }} + type=raw,value=${{ steps.version.outputs.major_minor }} + type=raw,value=latest + + - name: Build and push + uses: docker/build-push-action@v5 + env: + BUILDKIT_PROGRESS: plain + with: + context: . + file: ./docker/Dockerfile.mcp + platforms: linux/amd64,linux/arm64 + push: true + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} + cache-from: type=gha + cache-to: type=gha,mode=max diff --git a/README-EN.md b/README-EN.md index bd06fd2..a2c148b 100644 --- a/README-EN.md +++ b/README-EN.md @@ -4,7 +4,7 @@ TrendRadar Banner -🚀 Deploy in 30 seconds — Your Smart Trending News Assistant +🚀 Deploy in 30 seconds — Say goodbye to endless scrolling, only see the news you truly care about sansan0%2FTrendRadar | Trendshift @@ -14,7 +14,7 @@ [![GitHub Stars](https://img.shields.io/github/stars/sansan0/TrendRadar?style=flat-square&logo=github&color=yellow)](https://github.com/sansan0/TrendRadar/stargazers) [![GitHub Forks](https://img.shields.io/github/forks/sansan0/TrendRadar?style=flat-square&logo=github&color=blue)](https://github.com/sansan0/TrendRadar/network/members) [![License](https://img.shields.io/badge/license-GPL--3.0-blue.svg?style=flat-square)](LICENSE) -[![Version](https://img.shields.io/badge/version-v3.4.1-blue.svg)](https://github.com/sansan0/TrendRadar) +[![Version](https://img.shields.io/badge/version-v3.5.0-blue.svg)](https://github.com/sansan0/TrendRadar) [![MCP](https://img.shields.io/badge/MCP-v1.0.3-green.svg)](https://github.com/sansan0/TrendRadar) [![WeWork](https://img.shields.io/badge/WeWork-Notification-00D4AA?style=flat-square)](https://work.weixin.qq.com/) @@ -44,6 +44,34 @@ > This project is designed to be lightweight and easy to deploy +
+⚠️ Click to expand: Fork Guide: Docs, Resource Limits & Deployment Options +
+ +**📄 Documentation Version:** + +If you use this project via **Fork**, you might be viewing outdated documentation. Fork copies the documentation version at that time, but the original project may have been updated. + +**👉 [Click to View Latest Official Documentation](https://github.com/sansan0/TrendRadar?tab=readme-ov-file)** + +**How to Tell?** Check the repository address at the top of the page: +- `github.com/your-username/TrendRadar` ← Your forked version +- `github.com/sansan0/TrendRadar` ← Latest official version + +--- + +**🛡️ Resource Limits & Safety Tips:** + +GitHub provides limited Actions resources per account. To avoid being flagged for abuse and risking account suspension: + +- **Platform Count**: Keep it around **10 platforms** — more platforms consume more resources +- **Run Frequency**: Minimum interval of **30 minutes** — more frequent runs are unnecessary +- **Fair Use**: GitHub Actions is designed for lightweight scheduled tasks, not high-frequency crawlers + +💡 **Want more freedom?** Try [🐳 Docker Deployment](#6-docker-deployment) on your own server with no restrictions. + +
+
## 📑 Quick Navigation @@ -156,23 +184,25 @@ Default monitoring of 11 mainstream platforms, with support for adding custom pl > > For detailed comparison and configuration, see [Configuration Guide - Push Mode Details](#3-push-mode-details) -**Additional Feature - Push Time Window Control** (Optional): +**Additional Features** (Optional): -- Set push time range (e.g., 09:00-18:00), push only within specified time -- Configure multiple pushes within window or once per day -- Avoid notifications during non-work hours +| Feature | Description | Default | +|---------|-------------|---------| +| **Push Time Window Control** | Set push time range (e.g., 09:00-18:00) to avoid non-work hours notifications | Disabled | +| **Content Order Configuration** | Adjust display order of "Trending Keywords Stats" and "New Trending News" (v3.5.0 new) | Stats first | -> 💡 This feature is disabled by default, see [Quick Start](#-quick-start) for configuration +> 💡 For detailed configuration, see [Configuration Guide - Report Configuration](#7-report-configuration) and [Configuration Guide - Push Window](#8-push-window-configuration) ### **Precise Content Filtering** Set personal keywords (e.g., AI, BYD, Education Policy) to receive only relevant trending news, filtering out noise. -**Basic Syntax** (4 types): +**Basic Syntax** (5 types): - Normal words: Basic matching - Required words `+`: Narrow scope - Filter words `!`: Exclude noise - Count limit `@`: Control display count (v3.2.0 new) +- Global filter `[GLOBAL_FILTER]`: Globally exclude specified content (v3.5.0 new) **Advanced Features** (v3.2.0 new): - 🔢 **Keyword Sorting Control**: Sort by popularity or config order @@ -214,6 +244,14 @@ No longer controlled by platform algorithms, TrendRadar reorganizes all trending Supports **WeWork** (+ WeChat push solution), **Feishu**, **DingTalk**, **Telegram**, **Email**, **ntfy**, **Bark**, **Slack** — messages delivered directly to phone and email. +**📌 Multi-Account Push Notes (v3.5.0 New Feature):** + +- ✅ **Multi-Account Configuration Support**: All push channels (Feishu, DingTalk, WeWork, Telegram, ntfy, Bark, Slack) support configuring multiple accounts +- ✅ **Configuration Method**: Use English semicolon `;` to separate multiple account values +- ✅ **Example**: Set `FEISHU_WEBHOOK_URL` Secret value to `https://webhook1;https://webhook2` +- ⚠️ **Paired Configuration**: Telegram and ntfy require paired parameter quantities to match (e.g., token and chat_id both have 2 values) +- ⚠️ **Quantity Limit**: Default maximum 3 accounts per channel, exceeded values will be truncated + ### **Multi-Platform Support** - **GitHub Pages**: Auto-generate beautiful web reports, PC/mobile adapted - **Docker Deployment**: Supports multi-architecture containerized operation @@ -272,6 +310,62 @@ Transform from "algorithm recommendation captivity" to "actively getting the inf - **Major Version Upgrade**: Upgrading from v1.x to v2.y, recommend deleting existing fork and re-forking to save effort and avoid config conflicts +### 2025/12/03 - v3.5.0 + +**🎉 Core Feature Enhancements** + +1. **Multi-Account Push Support** + - All push channels (Feishu, DingTalk, WeWork, Telegram, ntfy, Bark, Slack) support multiple account configuration + - Use semicolon `;` to separate multiple accounts, e.g., `FEISHU_WEBHOOK_URL=url1;url2` + - Automatic validation for paired configurations (e.g., Telegram's token and chat_id) + +2. **Configurable Push Content Order** + - Added `reverse_content_order` configuration option + - Customize display order of trending keywords stats and new trending news + +3. **Global Filter Keywords** + - Added `[GLOBAL_FILTER]` region marker for filtering unwanted content globally + - Use cases: Filter ads, marketing, low-quality content, etc. + +**🐳 Docker Dual-Path HTML Generation Optimization** + +- **Bug Fix**: Resolved issue where `index.html` could not sync to host in Docker environment +- **Dual-Path Generation**: Daily summary HTML is generated to two locations simultaneously + - `index.html` (project root): For GitHub Pages access + - `output/index.html`: Accessible on host via Docker Volume mount +- **Compatibility**: Ensures web reports are accessible in Docker, GitHub Actions, and local environments + +**🐳 Docker MCP Image Support** + +- Added independent MCP service image `wantcat/trendradar-mcp` +- Supports Docker deployment of AI analysis features via HTTP interface (port 3333) +- Dual-container architecture: News push service and MCP service run independently, can be scaled and restarted separately +- See [Docker Deployment - MCP Service](#6-docker-deployment) for details + +**🌐 Web Server Support** + +- Added built-in web server for browser access to generated reports +- Control via `manage.py` commands: `docker exec -it trend-radar python manage.py start_webserver` +- Access URL: `http://localhost:8080` (port configurable) +- Security features: Static file service, directory restriction, localhost binding +- Supports both auto-start and manual control modes + +**📖 Documentation Optimization** + +- Added [Report Configuration](#7-report-configuration) section: report-related parameter details +- Added [Push Window Configuration](#8-push-window-configuration) section: push_window configuration tutorial +- Added [Execution Frequency Configuration](#9-execution-frequency-configuration) section: Cron expression explanation and common examples +- Added [Multi-Account Push Configuration](#10-multiple-account-push-configuration) section: multi-account push configuration details +- Optimized all configuration sections: Unified "Configuration Location" instructions +- Simplified Quick Start configuration: Three core files at a glance +- Optimized [Docker Deployment](#6-docker-deployment) section: Added image description, recommended git clone deployment, reorganized deployment methods + +**🔧 Upgrade Instructions**: +- **GitHub Fork Users**: Update `main.py`, `config/config.yaml` (Added multi-account push support, existing single-account configuration unaffected) +- **Docker Users**: Update `.env`, `docker-compose.yml` or set environment variables `REVERSE_CONTENT_ORDER`, `MAX_ACCOUNTS_PER_CHANNEL` +- **Multi-Account Push**: New feature, disabled by default, existing single-account configuration unaffected + + ### 2025/11/28 - v3.4.1 **🔧 Format Optimization** @@ -705,6 +799,24 @@ frequency_words.txt file added **required word** feature, using + sign - ⚠️ **DO NOT Create Custom Names**: The Secret Name must **strictly use** the names listed below (e.g., `WEWORK_WEBHOOK_URL`, `FEISHU_WEBHOOK_URL`, etc.). Do not modify or create new names arbitrarily, or the system will not recognize them - 💡 **Can Configure Multiple Platforms**: The system will send notifications to all configured platforms + **📌 Multi-Account Push Notes (v3.5.0 New Feature):** + + - ✅ **Multi-Account Configuration Support**: All push channels (Feishu, DingTalk, WeWork, Telegram, ntfy, Bark, Slack) support configuring multiple accounts + - ✅ **Configuration Method**: Use English semicolon `;` to separate multiple account values + - ✅ **Example**: Set `FEISHU_WEBHOOK_URL` Secret value to `https://webhook1;https://webhook2` + - ⚠️ **Paired Configuration**: Telegram and ntfy require paired parameter quantities to match (e.g., token and chat_id both have 2 values) + - ⚠️ **Quantity Limit**: Default maximum 3 accounts per channel, exceeded values will be truncated + + **Multi-Account Configuration Examples**: + + | Name | Secret (Value) Example | + |------|------------------------| + | `FEISHU_WEBHOOK_URL` | `https://webhook1;https://webhook2;https://webhook3` | + | `TELEGRAM_BOT_TOKEN` | `token1;token2` | + | `TELEGRAM_CHAT_ID` | `chatid1;chatid2` | + | `NTFY_TOPIC` | `topic1;topic2` | + | `NTFY_TOKEN` | `;token2` (1st has no token, use empty string as placeholder) | + **Configuration Example:** GitHub Secrets Configuration Example @@ -1316,15 +1428,21 @@ frequency_words.txt file added **required word** feature, using + sign 3. Click **"Run workflow"** button on the right to run 4. Wait about 1 minute, messages will be pushed to your configured platform + > ⏱️ **Testing Tips**: + > - Don't test too frequently to avoid triggering GitHub Actions limits + > - After clicking Run workflow, you need to **refresh the browser page** to see the new run record + 4. **Configuration Notes (Optional)**: - > 💡 Default configuration works normally, only adjust if you need personalization + > 💡 **Default configuration works normally**, only adjust if you need personalization, understand these three files: - - **Push Settings**: Configure push mode and notification options in [config/config.yaml](config/config.yaml) → [Push Mode Details](#3-push-mode-details) - - **Keyword Settings**: Add your interested keywords in [config/frequency_words.txt](config/frequency_words.txt) → [Keyword Configuration Tutorial](#2-keyword-configuration) - - **Push Frequency Adjustment**: In [.github/workflows/crawler.yml](.github/workflows/crawler.yml) adjust carefully, don't be greedy + | File | Purpose | + |------|---------| + | `config/config.yaml` | Main config file: push mode, time window, platform list, hotspot weights, etc. | + | `config/frequency_words.txt` | Keyword file: set your interested keywords, filter push content | + | `.github/workflows/crawler.yml` | Execution frequency: control how often to run (⚠️ modify carefully) | - **Note**: Suggest only adjusting explicitly documented config items, other options mainly for author's development testing + 👉 **Detailed Configuration Tutorial**: [Configuration Guide](#configuration-guide) 5. **🎉 Deployment Success! Share Your Experience** @@ -1365,6 +1483,8 @@ frequency_words.txt file added **required word** feature, using + sign 👉 Click to expand: Custom Monitoring Platforms
+**Configuration Location:** `platforms` section in `config/config.yaml` + This project's news data comes from [newsnow](https://github.com/ourongxing/newsnow). You can click the [website](https://newsnow.busiyi.world/), click [More], to see if there are platforms you want. For specific additions, visit [project source code](https://github.com/ourongxing/newsnow/tree/main/server/sources), based on the file names there, modify the `platforms` configuration in `config/config.yaml` file: @@ -1379,13 +1499,18 @@ platforms: name: "Wallstreetcn" # Add more platforms... ``` -If you don't know how to look, you can directly copy the partially organized [Platform Configuration](https://github.com/sansan0/TrendRadar/issues/95) + +> 💡 **Shortcut**: If you don't know how to read source code, you can copy from others' organized [Platform Configuration Summary](https://github.com/sansan0/TrendRadar/issues/95) + +> ⚠️ **Note**: More platforms is not always better, suggest choosing 10-15 core platforms. Too many platforms will cause information overload and actually reduce user experience. ### 2. Keyword Configuration -Configure monitoring keywords in `frequency_words.txt` with four syntax types and grouping features. +**Configuration Location:** `config/frequency_words.txt` + +Configure monitoring keywords in `frequency_words.txt` with five syntax types, region markers, and grouping features. | Syntax Type | Symbol | Purpose | Example | Matching Logic | |------------|--------|---------|---------|----------------| @@ -1393,6 +1518,7 @@ Configure monitoring keywords in `frequency_words.txt` with four syntax types an | **Required** | `+` | Scope limiting | `+phone` | Must include both | | **Filter** | `!` | Noise exclusion | `!ad` | Exclude if included | | **Count Limit** | `@` | Control display count | `@10` | Max 10 news (v3.2.0 new) | +| **Global Filter** | `[GLOBAL_FILTER]` | Globally exclude content | See example below | Filter under any circumstances (v3.5.0 new) | #### 2.1 Basic Syntax @@ -1437,6 +1563,55 @@ Musk **Priority:** `@number` > Global config > Unlimited +##### 5. **Global Filter** `[GLOBAL_FILTER]` - Globally Exclude Content (v3.5.0 new) +```txt +[GLOBAL_FILTER] +advertisement +promotion +marketing +shocking +clickbait + +[WORD_GROUPS] +technology +AI + +Huawei +HarmonyOS +!car +``` +**Effect:** Filters news containing specified words under **any circumstances**, with **highest priority** + +**Use Cases:** +- Filter low-quality content: shocking, clickbait, breaking news, etc. +- Filter marketing content: advertisement, promotion, sponsorship, etc. +- Filter specific topics: entertainment, gossip (based on needs) + +**Filter Priority:** Global Filter > Group Filter(`!`) > Group Matching + +**Region Markers:** +- `[GLOBAL_FILTER]`: Global filter region, words are filtered under any circumstances +- `[WORD_GROUPS]`: Keyword groups region, maintains existing syntax (`!`, `+`, `@`) +- If no region markers are used, all content is treated as keyword groups (backward compatible) + +**Matching Examples:** +```txt +[GLOBAL_FILTER] +advertisement + +[WORD_GROUPS] +technology +AI +``` +- ❌ "Advertisement: Latest tech product launch" ← Contains global filter word "advertisement", rejected +- ✅ "Tech company launches new AI product" ← No global filter words, matches "technology" group +- ✅ "AI technology breakthrough draws attention" ← No global filter words, matches "AI" in "technology" group + +**Important Notes:** +- Use global filter words carefully to avoid over-filtering and missing valuable content +- Recommended to keep global filter words under 5-15 +- For group-specific filtering, prioritize using group filter words (`!` prefix) + --- #### 🔗 Group Feature - Importance of Empty Lines @@ -1625,6 +1800,15 @@ BYD 👉 Click to expand: Three Push Modes Detailed Comparison
+**Configuration Location:** `report.mode` in `config/config.yaml` + +```yaml +report: + mode: "daily" # Options: "daily" | "incremental" | "current" +``` + +**Docker Environment Variable:** `REPORT_MODE=incremental` + #### Detailed Comparison Table | Mode | Target Users | Push Timing | Display Content | Typical Use Case | @@ -1678,6 +1862,15 @@ Assume you monitor "Apple" keyword, execute once per hour: 👉 Click to expand: Hotspot Weight Adjustment
+**Configuration Location:** `weight` section in `config/config.yaml` + +```yaml +weight: + rank_weight: 0.6 # Ranking weight + frequency_weight: 0.3 # Frequency weight + hotness_weight: 0.1 # Hotness weight +``` + Current default configuration is balanced. #### Two Core Scenarios @@ -1775,50 +1968,33 @@ Updated: 2025-01-15 12:30:15 👉 Click to expand: Complete Docker Deployment Guide
-#### Method 1: Quick Experience (One-Line Command) +**Image Description:** -**Linux/macOS System:** -```bash -# Create config directory and download config files -mkdir -p config output -wget https://raw.githubusercontent.com/sansan0/TrendRadar/master/config/config.yaml -P config/ -wget https://raw.githubusercontent.com/sansan0/TrendRadar/master/config/frequency_words.txt -P config/ -``` -Or **Manual Creation**: -1. Create `config` folder in current directory -2. Download config files: - - Visit https://raw.githubusercontent.com/sansan0/TrendRadar/master/config/config.yaml → Right-click "Save As" → Save to `config\config.yaml` - - Visit https://raw.githubusercontent.com/sansan0/TrendRadar/master/config/frequency_words.txt → Right-click "Save As" → Save to `config\frequency_words.txt` +TrendRadar provides two independent Docker images, deploy according to your needs: -Final directory structure should be: -``` -current directory/ -└── config/ - ├── config.yaml - └── frequency_words.txt -``` +| Image Name | Purpose | Description | +|---------|------|------| +| `wantcat/trendradar` | News Push Service | Scheduled news crawling, push notifications (Required) | +| `wantcat/trendradar-mcp` | AI Analysis Service | MCP protocol support, AI dialogue analysis (Optional) | -```bash -docker run -d --name trend-radar \ - -v ./config:/app/config:ro \ - -v ./output:/app/output \ - -e FEISHU_WEBHOOK_URL="your feishu webhook" \ - -e DINGTALK_WEBHOOK_URL="your dingtalk webhook" \ - -e WEWORK_WEBHOOK_URL="your wework webhook" \ - -e TELEGRAM_BOT_TOKEN="your telegram_bot_token" \ - -e TELEGRAM_CHAT_ID="your telegram_chat_id" \ - -e EMAIL_FROM="your sender email" \ - -e EMAIL_PASSWORD="your email password or auth code" \ - -e EMAIL_TO="recipient email" \ - -e CRON_SCHEDULE="*/30 * * * *" \ - -e RUN_MODE="cron" \ - -e IMMEDIATE_RUN="true" \ - wantcat/trendradar:latest -``` +> 💡 **Recommendations**: +> - Only need push functionality: Deploy `wantcat/trendradar` image only +> - Need AI analysis: Deploy both images -#### Method 2: Using docker-compose (Recommended) +--- + +#### Method 1: Using docker-compose (Recommended) 1. **Create Project Directory and Config**: + + **Method 1-A: Using git clone (Recommended, Simplest)** + ```bash + # Clone project to local + git clone https://github.com/sansan0/TrendRadar.git + cd TrendRadar + ``` + + **Method 1-B: Using wget to download config files** ```bash # Create directory structure mkdir -p trendradar/{config,docker} @@ -1829,11 +2005,11 @@ docker run -d --name trend-radar \ wget https://raw.githubusercontent.com/sansan0/TrendRadar/master/config/frequency_words.txt -P config/ # Download docker-compose config - wget https://raw.githubusercontent.com/sansan0/TrendRadar/master/docker/.env - wget https://raw.githubusercontent.com/sansan0/TrendRadar/master/docker/docker-compose.yml + wget https://raw.githubusercontent.com/sansan0/TrendRadar/master/docker/.env -P docker/ + wget https://raw.githubusercontent.com/sansan0/TrendRadar/master/docker/docker-compose.yml -P docker/ ``` -Final directory structure should be: + > 💡 **Note**: Key directory structure required for Docker deployment: ``` current directory/ ├── config/ @@ -1858,10 +2034,13 @@ current directory/ | `ENABLE_CRAWLER` | `crawler.enable_crawler` | `true` / `false` | Enable crawler | | `ENABLE_NOTIFICATION` | `notification.enable_notification` | `true` / `false` | Enable notification | | `REPORT_MODE` | `report.mode` | `daily` / `incremental` / `current`| Report mode | + | `MAX_ACCOUNTS_PER_CHANNEL` | `notification.max_accounts_per_channel` | `3` | Maximum accounts per channel | | `PUSH_WINDOW_ENABLED` | `notification.push_window.enabled` | `true` / `false` | Push time window switch | | `PUSH_WINDOW_START` | `notification.push_window.time_range.start` | `08:00` | Push start time | | `PUSH_WINDOW_END` | `notification.push_window.time_range.end` | `22:00` | Push end time | - | `FEISHU_WEBHOOK_URL` | `notification.webhooks.feishu_url` | `https://...` | Feishu Webhook | + | `ENABLE_WEBSERVER` | - | `true` / `false` | Auto-start web server | + | `WEBSERVER_PORT` | - | `8080` | Web server port (default 8080) | + | `FEISHU_WEBHOOK_URL` | `notification.webhooks.feishu_url` | `https://...` | Feishu Webhook (supports multi-account, use `;` separator) | **Config Priority**: Environment Variables > config.yaml @@ -1872,22 +2051,52 @@ current directory/ 3. **Start Service**: + + **Option A: Start All Services (Push + AI Analysis)** ```bash - # Pull latest image and start + # Pull latest images docker-compose pull + + # Start all services (trend-radar + trend-radar-mcp) docker-compose up -d ``` -4. **Check Running Status**: + **Option B: Start News Push Service Only** ```bash - # View logs - docker logs -f trend-radar - - # View container status - docker ps | grep trend-radar + # Start trend-radar only (scheduled crawling and push) + docker-compose pull trend-radar + docker-compose up -d trend-radar ``` -#### Method 3: Local Build (Developer Option) + **Option C: Start MCP AI Analysis Service Only** + ```bash + # Start trend-radar-mcp only (AI analysis interface) + docker-compose pull trend-radar-mcp + docker-compose up -d trend-radar-mcp + ``` + + > 💡 **Tips**: + > - Most users only need to start `trend-radar` for news push functionality + > - Only start `trend-radar-mcp` when using Claude/ChatGPT for AI dialogue analysis + > - Both services are independent and can be flexibly combined + +4. **Check Running Status**: + ```bash + # View news push service logs + docker logs -f trend-radar + + # View MCP AI analysis service logs + docker logs -f trend-radar-mcp + + # View all container status + docker ps | grep trend-radar + + # Stop specific service + docker-compose stop trend-radar # Stop push service + docker-compose stop trend-radar-mcp # Stop MCP service + ``` + +#### Method 2: Local Build (Developer Option) If you need custom code modifications or build your own image: @@ -1903,17 +2112,38 @@ vim config/frequency_words.txt # Use build version docker-compose cd docker cp docker-compose-build.yml docker-compose.yml +``` -# Build and start +**Build and Start Services**: + +```bash +# Option A: Build and start all services docker-compose build docker-compose up -d + +# Option B: Build and start news push service only +docker-compose build trend-radar +docker-compose up -d trend-radar + +# Option C: Build and start MCP AI analysis service only +docker-compose build trend-radar-mcp +docker-compose up -d trend-radar-mcp ``` +> 💡 **Architecture Parameter Notes**: +> - Default builds `amd64` architecture images (suitable for most x86_64 servers) +> - To build `arm64` architecture (Apple Silicon, Raspberry Pi, etc.), set environment variable: +> ```bash +> export DOCKER_ARCH=arm64 +> docker-compose build +> ``` + #### Image Update ```bash -# Method 1: Manual update +# Method 1: Manual update (Crawler + MCP images) docker pull wantcat/trendradar:latest +docker pull wantcat/trendradar-mcp:latest docker-compose down docker-compose up -d @@ -1922,6 +2152,13 @@ docker-compose pull docker-compose up -d ``` +**Available Images**: + +| Image Name | Purpose | Description | +|---------|------|---------| +| `wantcat/trendradar` | News Push Service | Scheduled news crawling, push notifications | +| `wantcat/trendradar-mcp` | MCP Service | AI analysis features (optional) | + #### Service Management Commands ```bash @@ -1940,6 +2177,11 @@ docker exec -it trend-radar python manage.py config # Display output files docker exec -it trend-radar python manage.py files +# Web server management (for browser access to generated reports) +docker exec -it trend-radar python manage.py start_webserver # Start web server +docker exec -it trend-radar python manage.py stop_webserver # Stop web server +docker exec -it trend-radar python manage.py webserver_status # Check web server status + # View help info docker exec -it trend-radar python manage.py help @@ -1953,10 +2195,52 @@ docker stop trend-radar docker rm trend-radar ``` +> 💡 **Web Server Notes**: +> - After starting, access latest report at `http://localhost:8080` +> - Access historical reports via directory navigation (e.g., `http://localhost:8080/2025年xx月xx日/`) +> - Port can be configured in `.env` file with `WEBSERVER_PORT` parameter +> - Auto-start: Set `ENABLE_WEBSERVER=true` in `.env` +> - Security: Static files only, limited to output directory, localhost binding only + #### Data Persistence Generated reports and data are saved in `./output` directory by default. Data persists even if container is restarted or removed. +**📊 Web Report Access Paths**: + +TrendRadar generates daily summary HTML reports to two locations simultaneously: + +| File Location | Access Method | Use Case | +|--------------|---------------|----------| +| `output/index.html` | Direct host access | **Docker Deployment** (via Volume mount, visible on host) | +| `index.html` | Root directory access | **GitHub Pages** (repository root, auto-detected by Pages) | +| `output/YYYY年MM月DD日/html/当日汇总.html` | Historical reports | All environments (archived by date) | + +**Local Access Examples**: +```bash +# Method 1: Via Web Server (recommended, Docker environment) +# 1. Start web server +docker exec -it trend-radar python manage.py start_webserver +# 2. Access in browser +http://localhost:8080 # Access latest report (default index.html) +http://localhost:8080/2025年xx月xx日/ # Access reports for specific date +http://localhost:8080/2025年xx月xx日/html/ # Browse all HTML files for that date + +# Method 2: Direct file access (local environment) +open ./output/index.html # macOS +start ./output/index.html # Windows +xdg-open ./output/index.html # Linux + +# Method 3: Access historical archives +open ./output/2025年xx月xx日/html/当日汇总.html +``` + +**Why two index.html files?** +- `output/index.html`: Docker Volume mounted to host, can be opened locally +- `index.html`: Pushed to repository by GitHub Actions, auto-deployed by GitHub Pages + +> 💡 **Tip**: Both files have identical content, choose either one to access. + #### Troubleshooting ```bash @@ -1973,6 +2257,521 @@ docker exec -it trend-radar /bin/bash docker exec -it trend-radar ls -la /app/config/ ``` +#### MCP Service Deployment (AI Analysis Feature) + +If you need to use AI analysis features, you can deploy the standalone MCP service container. + +**Architecture Description**: + +```mermaid +flowchart TB + subgraph trend-radar["trend-radar"] + A1[Scheduled News Fetching] + A2[Push Notifications] + end + + subgraph trend-radar-mcp["trend-radar-mcp"] + B1[127.0.0.1:3333] + B2[AI Analysis API] + end + + subgraph shared["Shared Volume"] + C1["config/ (ro)"] + C2["output/ (ro)"] + end + + trend-radar --> shared + trend-radar-mcp --> shared +``` + +**Quick Start**: + +Use docker-compose to start both news push and MCP services: + +```bash +# Download latest docker-compose.yml (includes MCP service config) +wget https://raw.githubusercontent.com/sansan0/TrendRadar/master/docker/docker-compose.yml + +# Start all services +docker-compose up -d + +# Check running status +docker ps | grep trend-radar +``` + +**Start MCP Service Separately**: + +```bash +docker run -d --name trend-radar-mcp \ + -p 127.0.0.1:3333:3333 \ + -v ./config:/app/config:ro \ + -v ./output:/app/output:ro \ + -e TZ=Asia/Shanghai \ + wantcat/trendradar-mcp:latest +``` + +**Verify Service**: + +```bash +# Check if MCP service is running properly +curl http://127.0.0.1:3333/mcp + +# View MCP service logs +docker logs -f trend-radar-mcp +``` + +**Configure in AI Clients**: + +After MCP service starts, configure in Claude Desktop, Cherry Studio, Cursor, etc.: + +```json +{ + "mcpServers": { + "trendradar": { + "url": "http://127.0.0.1:3333/mcp", + "description": "TrendRadar News Trending Analysis" + } + } +} +``` + +> 💡 **Tip**: MCP service only listens on local port (127.0.0.1) for security. For remote access, configure reverse proxy and authentication yourself. + + + +### 7. Report Configuration + +
+👉 Click to expand: Report-Related Parameter Configuration +
+ +**Configuration Location:** `report` section in `config/config.yaml` + +```yaml +report: + mode: "daily" # Push mode + rank_threshold: 5 # Ranking highlight threshold + sort_by_position_first: false # Sorting priority + max_news_per_keyword: 0 # Maximum display count per keyword + reverse_content_order: false # Content order configuration +``` + +#### Configuration Details + +| Config Item | Type | Default | Description | +|------------|------|---------|-------------| +| `mode` | string | `daily` | Push mode, options: `daily`/`incremental`/`current`, see [Push Mode Details](#3-push-mode-details) | +| `rank_threshold` | int | `5` | Ranking highlight threshold, news with rank ≤ this value will be displayed in bold | +| `sort_by_position_first` | bool | `false` | Sorting priority: `false`=sort by news count, `true`=sort by config position | +| `max_news_per_keyword` | int | `0` | Maximum display count per keyword, `0`=unlimited | +| `reverse_content_order` | bool | `false` | Content order: `false`=trending keywords stats first, `true`=new trending news first | + +#### Content Order Configuration (v3.5.0 New) + +Controls display order of two content sections in push messages and HTML reports: + +| Config Value | Display Order | +|-------------|--------------| +| `false` (default) | ① Trending Keywords Stats → ② New Trending News | +| `true` | ① New Trending News → ② Trending Keywords Stats | + +**Use Cases:** +- `false` (default): Suitable for users focusing on keyword match results, view categorized stats first +- `true`: Suitable for users focusing on latest updates, prioritize viewing new trending topics + +**Docker Environment Variable:** +```bash +REVERSE_CONTENT_ORDER=true +``` + +#### Sorting Priority Configuration + +**Example Scenario:** Config order A, B, C, news count A(3), B(10), C(5) + +| Config Value | Display Order | Use Case | +|-------------|--------------|----------| +| `false` (default) | B(10) → C(5) → A(3) | Focus on popularity trends | +| `true` | A(3) → B(10) → C(5) | Focus on personal priority | + +**Docker Environment Variables:** +```bash +SORT_BY_POSITION_FIRST=true +MAX_NEWS_PER_KEYWORD=10 +``` + +
+ +### 8. Push Window Configuration + +
+👉 Click to expand: Push Time Window Control Details +
+ +**Configuration Location:** `notification.push_window` section in `config/config.yaml` + +```yaml +notification: + push_window: + enabled: false # Whether to enable + time_range: + start: "20:00" # Start time (Beijing time) + end: "22:00" # End time (Beijing time) + once_per_day: true # Push only once per day + push_record_retention_days: 7 # Push record retention days +``` + +#### Configuration Details + +| Config Item | Type | Default | Description | +|------------|------|---------|-------------| +| `enabled` | bool | `false` | Whether to enable push time window control | +| `time_range.start` | string | `"20:00"` | Push window start time (Beijing time, HH:MM format) | +| `time_range.end` | string | `"22:00"` | Push window end time (Beijing time, HH:MM format) | +| `once_per_day` | bool | `true` | `true`=push only once per day within window, `false`=push every execution within window | +| `push_record_retention_days` | int | `7` | Push record retention days (used to determine if already pushed) | + +#### Use Cases + +| Scenario | Configuration Example | +|----------|---------------------| +| **Working Hours Push** | `start: "09:00"`, `end: "18:00"`, `once_per_day: false` | +| **Evening Summary Push** | `start: "20:00"`, `end: "22:00"`, `once_per_day: true` | +| **Lunch Break Push** | `start: "12:00"`, `end: "13:00"`, `once_per_day: true` | + +#### Important Notice + +> ⚠️ **GitHub Actions Users Note:** +> - GitHub Actions execution time is unstable, may have ±15 minutes deviation +> - Time range should be at least **2 hours** wide +> - For precise timed push, recommend **Docker deployment** on personal server + +#### Docker Environment Variables + +```bash +PUSH_WINDOW_ENABLED=true +PUSH_WINDOW_START=09:00 +PUSH_WINDOW_END=18:00 +PUSH_WINDOW_ONCE_PER_DAY=false +PUSH_WINDOW_RETENTION_DAYS=7 +``` + +#### Complete Configuration Examples + +**Scenario: Push once between 8-10 PM daily** + +```yaml +notification: + push_window: + enabled: true + time_range: + start: "20:00" + end: "22:00" + once_per_day: true + push_record_retention_days: 7 +``` + +**Scenario: Push every hour during working hours** + +```yaml +notification: + push_window: + enabled: true + time_range: + start: "09:00" + end: "18:00" + once_per_day: false + push_record_retention_days: 7 +``` + +
+ +### 9. Execution Frequency Configuration + +
+👉 Click to expand: Automatic Execution Frequency Settings +
+ +**Configuration Location:** `schedule` section in `.github/workflows/crawler.yml` + +```yaml +on: + schedule: + - cron: "0 * * * *" # Run every hour +``` + +#### What is a Cron Expression? + +Cron is a time-based job scheduler format, consisting of 5 parts: `minute hour day month weekday` + +``` +┌───────────── minute (0-59) +│ ┌───────────── hour (0-23) +│ │ ┌───────────── day (1-31) +│ │ │ ┌───────────── month (1-12) +│ │ │ │ ┌───────────── weekday (0-6, 0=Sunday) +│ │ │ │ │ +* * * * * +``` + +#### Common Configuration Examples + +| Desired Effect | Cron Expression | Description | +|---------------|----------------|-------------| +| Every hour | `0 * * * *` | Run at minute 0 of every hour (default) | +| Every 30 minutes | `*/30 * * * *` | Run every 30 minutes | +| Daily at 8 AM | `0 0 * * *` | UTC 0:00 = Beijing 8:00 AM | +| Working hours | `*/30 0-14 * * *` | Beijing 8:00-22:00, every 30 minutes | +| 3 times daily | `0 0,6,12 * * *` | Beijing 8:00, 14:00, 20:00 | + +#### Important Notes + +> ⚠️ **Time Zone Note**: GitHub Actions uses **UTC time**, Beijing time needs to **subtract 8 hours** +> - Want Beijing 8:00 AM run → Set UTC 0:00 +> - Want Beijing 8:00 PM run → Set UTC 12:00 + +> ⚠️ **Frequency Limit**: GitHub has a limit on Actions execution count per account +> - **Recommendation**: Don't set intervals shorter than 30 minutes +> - **Reason**: Too frequent may be considered abuse, facing account ban risk +> - **Reality**: GitHub Actions execution time has inherent deviation, setting too precise is meaningless + +#### Modification Method + +1. Open your forked repository +2. Find `.github/workflows/crawler.yml` file +3. Click edit (pencil icon) +4. Modify the expression in `cron: "0 * * * *"` +5. Click "Commit changes" to save + +
+ +### 10. Multiple Account Push Configuration + +
+👉 Click to expand: Multiple Account Push Configuration Guide +
+ +**Configuration Location:** `notification` section in `config/config.yaml` + +> ### ⚠️ **Security Warning** +> **GitHub Fork Users: DO NOT configure push information in `config.yaml`!** +> +> - **Risk**: `config.yaml` will be committed to public Git repositories. Configuring push information (Webhook URLs, Tokens, etc.) will expose sensitive data +> - **Recommended Methods**: +> - **GitHub Actions Users** → Use GitHub Secrets environment variables +> - **Docker Users** → Use [`.env` file configuration](#6-docker-deployment) (`.env` is in `.gitignore` and won't be committed) +> - **Local Development Users**: Can configure in `config.yaml` (ensure it won't be pushed to public repositories) + +#### Supported Channels + +| Channel | Configuration Item | Pairing Required | Description | +|---------|-------------------|-----------------|-------------| +| **Feishu** | `feishu_url` | No | Multiple webhook URLs | +| **DingTalk** | `dingtalk_url` | No | Multiple webhook URLs | +| **WeWork** | `wework_url` | No | Multiple webhook URLs | +| **Telegram** | `telegram_bot_token` + `telegram_chat_id` | ✅ Yes | Token and chat_id quantities must match | +| **ntfy** | `ntfy_topic` + `ntfy_token` | ✅ Yes | Topic and token quantities must match (token optional) | +| **Bark** | `bark_url` | No | Multiple push URLs | +| **Slack** | `slack_webhook_url` | No | Multiple webhook URLs | +| **Email** | `email_to` | - | Already supports multiple recipients (comma-separated), no modification needed | + +#### Recommended Method 1: GitHub Actions Environment Variables + +**Configuration Location**: GitHub Repo → Settings → Secrets and variables → Actions → Repository secrets + +**Basic Configuration Example**: +```bash +# Multi-account quantity limit +MAX_ACCOUNTS_PER_CHANNEL=3 + +# Feishu multi-account (3 groups) +FEISHU_WEBHOOK_URL=https://hook1.feishu.cn/xxx;https://hook2.feishu.cn/yyy;https://hook3.feishu.cn/zzz + +# DingTalk multi-account (2 groups) +DINGTALK_WEBHOOK_URL=https://oapi.dingtalk.com/xxx;https://oapi.dingtalk.com/yyy + +# WeWork multi-account (2 groups) +WEWORK_WEBHOOK_URL=https://qyapi.weixin.qq.com/cgi-bin/webhook/send?key=xxx;https://qyapi.weixin.qq.com/cgi-bin/webhook/send?key=yyy + +# Bark multi-account (2 devices) +BARK_URL=https://api.day.app/key1;https://api.day.app/key2 + +# Slack multi-account (2 channels) +SLACK_WEBHOOK_URL=https://hooks.slack.com/xxx;https://hooks.slack.com/yyy +``` + +**Paired Configuration Examples (Telegram and ntfy)**: + +
+Telegram Paired Configuration + +```bash +# ✅ Correct: 2 tokens correspond to 2 chat_ids +TELEGRAM_BOT_TOKEN=123456:AAA-BBB;789012:CCC-DDD +TELEGRAM_CHAT_ID=-100111;-100222 + +# ❌ Incorrect: quantities don't match, push will be skipped +TELEGRAM_BOT_TOKEN=token1;token2;token3 +TELEGRAM_CHAT_ID=id1;id2 +``` + +**Note**: The quantities of `token` and `chat_id` must match exactly, otherwise the channel push will be skipped. + +
+ +
+ntfy Paired Configuration + +```bash +# ✅ Correct: 3 topics, only the 2nd needs a token +NTFY_TOPIC=topic1;topic2;topic3 +NTFY_TOKEN=;token_for_topic2; + +# ✅ Correct: 2 topics both need tokens +NTFY_TOPIC=topic1;topic2 +NTFY_TOKEN=token1;token2 + +# ❌ Incorrect: topic and token quantities don't match +NTFY_TOPIC=topic1;topic2 +NTFY_TOKEN=token1;token2;token3 +``` + +**Notes**: +- If a topic doesn't need a token, leave it empty at the corresponding position (between two semicolons) +- The quantities of `topic` and `token` must match + +
+ +--- + +#### Recommended Method 2: Docker Environment Variables (.env) + +**Configuration Location**: `docker/.env` file in project root directory + +**Basic Configuration Example**: +```bash +# Multi-account quantity limit +MAX_ACCOUNTS_PER_CHANNEL=3 + +# Feishu multi-account (3 groups) +FEISHU_WEBHOOK_URL=https://hook1.feishu.cn/xxx;https://hook2.feishu.cn/yyy;https://hook3.feishu.cn/zzz + +# DingTalk multi-account (2 groups) +DINGTALK_WEBHOOK_URL=https://oapi.dingtalk.com/xxx;https://oapi.dingtalk.com/yyy + +# WeWork multi-account (2 groups) +WEWORK_WEBHOOK_URL=https://qyapi.weixin.qq.com/cgi-bin/webhook/send?key=xxx;https://qyapi.weixin.qq.com/cgi-bin/webhook/send?key=yyy + +# Bark multi-account (2 devices) +BARK_URL=https://api.day.app/key1;https://api.day.app/key2 + +# Slack multi-account (2 channels) +SLACK_WEBHOOK_URL=https://hooks.slack.com/xxx;https://hooks.slack.com/yyy +``` + +**Paired Configuration Examples (Telegram and ntfy)**: + +
+Telegram Paired Configuration + +```bash +# ✅ Correct: 2 tokens correspond to 2 chat_ids +TELEGRAM_BOT_TOKEN=123456:AAA-BBB;789012:CCC-DDD +TELEGRAM_CHAT_ID=-100111;-100222 + +# ❌ Incorrect: quantities don't match, push will be skipped +TELEGRAM_BOT_TOKEN=token1;token2;token3 +TELEGRAM_CHAT_ID=id1;id2 +``` + +**Note**: The quantities of `token` and `chat_id` must match exactly, otherwise the channel push will be skipped. + +
+ +
+ntfy Paired Configuration + +```bash +# ✅ Correct: 3 topics, only the 2nd needs a token +NTFY_TOPIC=topic1;topic2;topic3 +NTFY_TOKEN=;token_for_topic2; + +# ✅ Correct: 2 topics both need tokens +NTFY_TOPIC=topic1;topic2 +NTFY_TOKEN=token1;token2 + +# ❌ Incorrect: topic and token quantities don't match +NTFY_TOPIC=topic1;topic2 +NTFY_TOKEN=token1;token2;token3 +``` + +**Notes**: +- If a topic doesn't need a token, leave it empty at the corresponding position (between two semicolons) +- The quantities of `topic` and `token` must match + +
+ +--- + +#### Push Behavior Description + +1. **Independent Push**: Each account sends independently, one failure doesn't affect other accounts +2. **Partial Success**: As long as one account sends successfully, the overall result is considered successful +3. **Log Differentiation**: Multi-account logs show "Account 1", "Account 2", etc. +4. **Batch Interval**: Multi-account increases total send time (each account independently calculates batch interval) + +--- + +#### FAQ + +
+Q1: What happens if more than 3 accounts are configured? +
+ +The system will automatically truncate to the configured maximum quantity and output warning logs. You can adjust the limit via `max_accounts_per_channel`. + +**⚠️ Special Note for GitHub Actions Users**: +- **Not recommended to configure too many accounts** (suggest no more than 3), which may cause: + - **Trigger GitHub Actions rate limits**: Frequent network requests may be identified as abnormal behavior + - **Affect Workflow execution time**: Multi-account pushing will extend runtime and may exceed free quotas + - **Potential account risks**: Excessive use of GitHub Actions resources may affect account status +- **Recommended Practices**: + - Use `max_accounts_per_channel: 3` or lower values + - Adjust Cron execution frequency (e.g., change from hourly to every 2-3 hours) + - Prioritize the most important push channels and avoid configuring too many accounts + +
+ +
+Q2: Will multi-accounts affect push speed? +
+ +Yes. Each account sends independently, total time = number of accounts × single account send time. Recommend controlling the number of accounts. + +
+ +
+Q3: How can local development users configure in config.yaml? +
+ +If you are doing local development and **will not push code to public repositories**, you can configure directly in `config/config.yaml`: + +```yaml +notification: + enable_notification: true + max_accounts_per_channel: 3 + + webhooks: + feishu_url: "https://hook1.feishu.cn/xxx;https://hook2.feishu.cn/yyy" + telegram_bot_token: "token1;token2" + telegram_chat_id: "id1;id2" +``` + +**⚠️ Important Reminder**: +- Ensure `config/config.yaml` is in `.gitignore` (if you will commit code) +- Or only use in local development environment, **never commit to public repositories** + +
+

diff --git a/README.md b/README.md index 4137b87..45ba621 100644 --- a/README.md +++ b/README.md @@ -14,7 +14,7 @@ [![GitHub Stars](https://img.shields.io/github/stars/sansan0/TrendRadar?style=flat-square&logo=github&color=yellow)](https://github.com/sansan0/TrendRadar/stargazers) [![GitHub Forks](https://img.shields.io/github/forks/sansan0/TrendRadar?style=flat-square&logo=github&color=blue)](https://github.com/sansan0/TrendRadar/network/members) [![License](https://img.shields.io/badge/license-GPL--3.0-blue.svg?style=flat-square)](LICENSE) -[![Version](https://img.shields.io/badge/version-v3.4.1-blue.svg)](https://github.com/sansan0/TrendRadar) +[![Version](https://img.shields.io/badge/version-v3.5.0-blue.svg)](https://github.com/sansan0/TrendRadar) [![MCP](https://img.shields.io/badge/MCP-v1.0.3-green.svg)](https://github.com/sansan0/TrendRadar) [![企业微信通知](https://img.shields.io/badge/企业微信-通知-00D4AA?style=flat-square)](https://work.weixin.qq.com/) @@ -44,14 +44,12 @@ > 本项目以轻量,易部署为目标
-⚠️ 点击展开:查看最新文档(Fork 用户必读) +⚠️ 点击展开:Fork 须知:文档更新、资源限制与部署建议
-最近有很多第一次接触 GitHub 的新用户使用本项目,因此特别补充这个说明。 +**📄 文档版本说明:** -**问题**:如果你是通过 **Fork** 使用本项目,你看到的可能是旧版文档。 - -**原因**:Fork 时会复制当时的文档版本,但原项目可能已更新。 +如果你是通过 **Fork** 使用本项目,你看到的可能是旧版文档。因为 Fork 时会复制当时的文档版本,但原项目可能已更新。 **👉 [点击查看最新官方文档](https://github.com/sansan0/TrendRadar?tab=readme-ov-file)** @@ -59,6 +57,18 @@ - `github.com/你的用户名/TrendRadar` ← 你 fork 的版本 - `github.com/sansan0/TrendRadar` ← 最新官方版本 +--- + +**🛡️ 资源限制与安全提示:** + +GitHub 为每个账号提供的 Actions 资源是有限额的。为了避免被官方判定为滥用而面临封号风险,请注意: + +- **监控平台数量**:建议控制在 **10 个左右**,过多平台会消耗更多资源 +- **执行频率**:建议最短间隔为 **30 分钟**,过于频繁无实际意义 +- **合理使用**:GitHub Actions 适合轻量级定时任务,而非高频爬虫 + +💡 **想要更自由地使用?** 推荐 [🐳 Docker 部署](#6-docker-部署),在自己的服务器上运行。 +

@@ -222,23 +232,25 @@ > > 详细对比和配置教程见 [配置详解 - 推送模式详解](#3-推送模式详解) -**附加功能 - 推送时间窗口控制**(可选): +**附加功能**(可选): -- 设定推送时间范围(如 09:00-18:00),只在指定时间内推送 -- 可配置窗口内多次推送或每天仅推送一次 -- 避免非工作时间打扰 +| 功能 | 说明 | 默认 | +|------|------|------| +| **推送时间窗口控制** | 设定推送时间范围(如 09:00-18:00),避免非工作时间打扰 | 关闭 | +| **内容顺序配置** | 调整"热点词汇统计"和"新增热点新闻"的显示顺序(v3.5.0 新增) | 统计在前 | -> 💡 此功能默认关闭,配置方法见 [快速开始](#-快速开始) +> 💡 详细配置教程见 [配置详解 - 报告配置](#7-报告配置) 和 [配置详解 - 推送时间窗口](#8-推送时间窗口配置) ### **精准内容筛选** 设置个人关键词(如:AI、比亚迪、教育政策),只推送相关热点,过滤无关信息 -**基础语法**(4种): +**基础语法**(5种): - 普通词:基础匹配 - 必须词 `+`:限定范围 - 过滤词 `!`:排除干扰 - 数量限制 `@`:控制显示数量(v3.2.0 新增) +- 全局过滤 `[GLOBAL_FILTER]`:全局排除指定内容(v3.5.0 新增) **高级功能**(v3.2.0 新增): - 🔢 **关键词排序控制**:按热度优先 or 配置顺序优先 @@ -279,6 +291,14 @@ 支持**企业微信**(+ 微信推送方案)、**飞书**、**钉钉**、**Telegram**、**邮件**、**ntfy**、**Bark**、**Slack**,消息直达手机和邮箱 +**📌 多账号推送说明(v3.5.0 新增):** + +- ✅ **支持多账号配置**:所有推送渠道(飞书、钉钉、企业微信、Telegram、ntfy、Bark、Slack)均支持配置多个账号 +- ✅ **配置方式**:使用英文分号 `;` 分隔多个账号值 +- ✅ **示例**:`FEISHU_WEBHOOK_URL` 的 Secret 值填写 `https://webhook1;https://webhook2` +- ⚠️ **配对配置**:Telegram 和 ntfy 需要保证配对参数数量一致(如 token 和 chat_id 都是 2 个) +- ⚠️ **数量限制**:默认每个渠道最多 3 个账号,超出会被截断 + ### **多端适配** - **GitHub Pages**:自动生成精美网页报告,PC/移动端适配 - **Docker部署**:支持多架构容器化运行 @@ -337,6 +357,61 @@ GitHub 一键 Fork 即可使用,无需编程基础。 - **大版本升级**:从 v1.x 升级到 v2.y,建议删除现有 fork 后重新 fork,这样更省力且避免配置冲突 +### 2025/12/03 - v3.5.0 + +**🎉 核心功能增强** + +1. **多账号推送支持** + - 所有推送渠道(飞书、钉钉、企业微信、Telegram、ntfy、Bark、Slack)支持多账号配置 + - 使用分号 `;` 分隔多个账号,例如:`FEISHU_WEBHOOK_URL=url1;url2` + - 自动验证配对配置(如 Telegram 的 token 和 chat_id)数量一致性 + +2. **推送内容顺序可配置** + - 新增 `reverse_content_order` 配置项 + - 支持自定义热点词汇统计与新增热点新闻的显示顺序 + +3. **全局过滤关键词** + - 新增 `[GLOBAL_FILTER]` 区域标记,支持全局过滤不想看到的内容 + - 适用场景:过滤广告、营销、低质内容等 + +**🐳 Docker 双路径 HTML 生成优化** + +- **问题修复**:解决 Docker 环境下 `index.html` 无法同步到宿主机的问题 +- **双路径生成**:当日汇总 HTML 同时生成到两个位置 + - `index.html`(项目根目录):供 GitHub Pages 访问 + - `output/index.html`:通过 Docker Volume 挂载,宿主机可直接访问 +- **兼容性**:确保 Docker、GitHub Actions、本地运行环境均能正常访问网页版报告 + +**🐳 Docker MCP 镜像支持** + +- 新增独立的 MCP 服务镜像 `wantcat/trendradar-mcp` +- 支持 Docker 部署 AI 分析功能,通过 HTTP 接口(端口 3333)提供服务 +- 双容器架构:新闻推送服务与 MCP 服务独立运行,可分别扩展和重启 +- 详见 [Docker 部署 - MCP 服务](#6-docker-部署) + +**🌐 Web 服务器支持** + +- 新增内置 Web 服务器,支持通过浏览器访问生成的报告 +- 通过 `manage.py` 命令控制启动/停止:`docker exec -it trend-radar python manage.py start_webserver` +- 访问地址:`http://localhost:8080`(端口可配置) +- 安全特性:静态文件服务、目录限制、本地访问 +- 支持自动启动和手动控制两种模式 + +**📖 文档优化** + +- 新增 [报告配置](#7-报告配置) 章节:report 相关参数详解 +- 新增 [推送时间窗口配置](#8-推送时间窗口配置) 章节:push_window 配置教程 +- 新增 [执行频率配置](#9-执行频率配置) 章节:Cron 表达式说明和常用示例 +- 新增 [多账号推送配置](#10-多账号推送配置) 章节:多账号推送配置详解 +- 优化各配置章节:统一添加"配置位置"说明 +- 简化快速开始配置说明:三个核心文件一目了然 +- 优化 [Docker 部署](#6-docker-部署) 章节:新增镜像说明、推荐 git clone 部署、重组部署方式 + +**🔧 升级说明**: +- **GitHub Fork 用户**:更新 `main.py`、`config/config.yaml`(新增多账号推送支持,无需修改现有配置) +- **多账号推送**:新功能,默认不启用,现有单账号配置不受影响 + + ### 2025/11/26 - mcp-v1.0.3 **MCP 模块更新:** @@ -344,6 +419,11 @@ GitHub 一键 Fork 即可使用,无需编程基础。 - 支持自然语言日期表达式解析(本周、最近7天、上月等) - 工具总数从 13 个增加到 14 个 + +
+👉 点击展开:历史更新 + + ### 2025/11/28 - v3.4.1 **🔧 格式优化** @@ -366,10 +446,6 @@ GitHub 一键 Fork 即可使用,无需编程基础。 - **GitHub Fork 用户**:更新 `main.py`,`config.yaml` -
-👉 点击展开:历史更新 - - ### 2025/11/25 - v3.4.0 **🎉 新增 Slack 推送支持** @@ -760,6 +836,24 @@ frequency_words.txt 文件增加了一个【必须词】功能,使用 + 号 - ⚠️ **严禁自创名称**:Secret 的 Name(名称)必须**严格使用**下方列出的名称(如 `WEWORK_WEBHOOK_URL`、`FEISHU_WEBHOOK_URL` 等),不能自己随意修改或创造新名称,否则系统无法识别 - 💡 **可以同时配置多个平台**:系统会向所有配置的平台发送通知 + **📌 多账号推送说明(v3.5.0 新增):** + + - ✅ **支持多账号配置**:所有推送渠道(飞书、钉钉、企业微信、Telegram、ntfy、Bark、Slack)均支持配置多个账号 + - ✅ **配置方式**:使用英文分号 `;` 分隔多个账号值 + - ✅ **示例**:`FEISHU_WEBHOOK_URL` 的 Secret 值填写 `https://webhook1;https://webhook2` + - ⚠️ **配对配置**:Telegram 和 ntfy 需要保证配对参数数量一致(如 token 和 chat_id 都是 2 个) + - ⚠️ **数量限制**:默认每个渠道最多 3 个账号,超出部分被截断 + + **多账号配置示例**: + + | Name(名称) | Secret(值)示例 | + |-------------|-----------------| + | `FEISHU_WEBHOOK_URL` | `https://webhook1;https://webhook2;https://webhook3` | + | `TELEGRAM_BOT_TOKEN` | `token1;token2` | + | `TELEGRAM_CHAT_ID` | `chatid1;chatid2` | + | `NTFY_TOPIC` | `topic1;topic2` | + | `NTFY_TOKEN` | `;token2`(第一个无 token 时留空占位) | + **配置示例:** GitHub Secrets 配置示例 @@ -1364,15 +1458,21 @@ frequency_words.txt 文件增加了一个【必须词】功能,使用 + 号 3. 点击右侧的 **"Run workflow"** 按钮运行 4. 等待 1 分钟左右,消息会推送到你配置的平台 + > ⏱️ **测试提示**: + > - 手动测试不要太频繁,避免触发 GitHub Actions 限制 + > - 点击 Run workflow 后需要**刷新浏览器页面**才能看到新的运行记录 + 4. **配置说明(可选)**: - > 💡 默认配置已可正常使用,如需个性化调整,可参考以下选项 + > 💡 **默认配置已可正常使用**,如需个性化调整,了解以下三个文件即可 - - **推送设置**:在 [config/config.yaml](config/config.yaml) 中配置推送模式和通知选项 → [推送模式详解](#3-推送模式详解) - - **关键词设置**:在 [config/frequency_words.txt](config/frequency_words.txt) 中添加你关心的关键词 → [关键词配置教程](#2-关键词配置) - - **推送频率调整**:在 [.github/workflows/crawler.yml](.github/workflows/crawler.yml) 请谨慎调整,别贪心 + | 文件 | 作用 | + |------|------| + | `config/config.yaml` | 主配置文件:推送模式、时间窗口、平台列表、热点权重等 | + | `config/frequency_words.txt` | 关键词文件:设置你关心的词汇,筛选推送内容 | + | `.github/workflows/crawler.yml` | 执行频率:控制多久运行一次(⚠️ 谨慎修改) | - **注意**:建议只调整文档中明确说明的配置项,其他选项主要供作者开发时测试使用 + 👉 **详细配置教程**:[配置详解](#配置详解) 5. **🎉 部署成功!分享你的使用体验** @@ -1413,6 +1513,8 @@ frequency_words.txt 文件增加了一个【必须词】功能,使用 + 号 👉 点击展开:自定义监控平台
+**配置位置:** `config/config.yaml` 的 `platforms` 部分 + 本项目的资讯数据来源于 [newsnow](https://github.com/ourongxing/newsnow) ,你可以点击[网站](https://newsnow.busiyi.world/),点击[更多],查看是否有你想要的平台。 具体添加可访问 [项目源代码](https://github.com/ourongxing/newsnow/tree/main/server/sources),根据里面的文件名,在 `config/config.yaml` 文件中修改 `platforms` 配置: @@ -1427,15 +1529,16 @@ platforms: name: "华尔街见闻" # 添加更多平台... ``` -如果不会看的话,可以有选择的复制他人整理好的部分[平台配置](https://github.com/sansan0/TrendRadar/issues/95) -> 💡 平台不是越多越好,别贪心大量信息,你要进行筛选,否则依然只会被大量信息淹没。 +> 💡 **快捷方式**:如果不会看源代码,可以复制他人整理好的 [平台配置汇总](https://github.com/sansan0/TrendRadar/issues/95) + +> ⚠️ **注意**:平台不是越多越好,建议选择 10-15 个核心平台。过多平台会导致信息过载,反而降低使用体验。
### 2. 关键词配置 -在 `frequency_words.txt` 文件中配置监控的关键词,支持四种语法和词组功能。 +在 `frequency_words.txt` 文件中配置监控的关键词,支持五种语法、区域标记和词组功能。 | 语法类型 | 符号 | 作用 | 示例 | 匹配逻辑 | |---------|------|------|------|---------| @@ -1443,6 +1546,7 @@ platforms: | **必须词** | `+` | 限定范围 | `+手机` | 必须同时包含 | | **过滤词** | `!` | 排除干扰 | `!广告` | 包含则直接排除 | | **数量限制** | `@` | 控制显示数量 | `@10` | 最多显示10条新闻(v3.2.0新增) | +| **全局过滤** | `[GLOBAL_FILTER]` | 全局排除指定内容 | 见下方示例 | 任何情况下都过滤(v3.5.0新增) | #### 2.1 基础语法 @@ -1452,6 +1556,8 @@ platforms: 👉 点击展开:基础语法教程
+**配置位置:** `config/frequency_words.txt` + ##### 1. **普通关键词** - 基础匹配 ```txt 华为 @@ -1487,6 +1593,55 @@ OPPO **配置优先级:** `@数字` > 全局配置 > 不限制 +##### 5. **全局过滤** `[GLOBAL_FILTER]` - 全局排除指定内容(v3.5.0 新增) +```txt +[GLOBAL_FILTER] +广告 +推广 +营销 +震惊 +标题党 + +[WORD_GROUPS] +科技 +AI + +华为 +鸿蒙 +!车 +``` +**作用:** 在任何情况下过滤包含指定词的新闻,**优先级最高** + +**使用场景:** +- 过滤低质内容:震惊、标题党、爆料等 +- 过滤营销内容:广告、推广、赞助等 +- 过滤特定主题:娱乐、八卦(根据需求) + +**过滤优先级:** 全局过滤 > 词组内过滤(`!`) > 词组匹配 + +**区域说明:** +- `[GLOBAL_FILTER]`:全局过滤区,包含的词在任何情况下都会被过滤 +- `[WORD_GROUPS]`:词组区,保持现有语法(`!`、`+`、`@`) +- 如果不使用区域标记,默认全部作为词组处理(向后兼容) + +**匹配示例:** +```txt +[GLOBAL_FILTER] +广告 + +[WORD_GROUPS] +科技 +AI +``` +- ❌ "广告:最新科技产品发布" ← 包含全局过滤词"广告",直接拒绝 +- ✅ "科技公司发布AI新产品" ← 不包含全局过滤词,匹配"科技"词组 +- ✅ "AI技术突破引发关注" ← 不包含全局过滤词,匹配"科技"词组中的"AI" + +**注意事项:** +- 全局过滤词应谨慎使用,避免过度过滤导致遗漏有价值内容 +- 建议全局过滤词控制在 5-15 个以内 +- 对于特定词组的过滤,优先使用词组内过滤词(`!` 前缀) + --- #### 🔗 词组功能 - 空行分隔的重要作用 @@ -1672,6 +1827,15 @@ report: 👉 点击展开:三种推送模式详细对比
+**配置位置:** `config/config.yaml` 的 `report.mode` + +```yaml +report: + mode: "daily" # 可选: "daily" | "incremental" | "current" +``` + +**Docker 环境变量:** `REPORT_MODE=incremental` + #### 详细对比表格 | 模式 | 适用人群 | 推送时机 | 显示内容 | 典型使用场景 | @@ -1725,6 +1889,15 @@ report: 👉 点击展开:热点权重调整
+**配置位置:** `config/config.yaml` 的 `weight` 部分 + +```yaml +weight: + rank_weight: 0.6 # 排名权重 + frequency_weight: 0.3 # 频次权重 + hotness_weight: 0.1 # 热度权重 +``` + 当前默认的配置是平衡性配置 #### 两个核心场景 @@ -1822,50 +1995,33 @@ weight: 👉 点击展开:Docker 部署完整指南
-#### 方式一:快速体验(一行命令) +**镜像说明:** -**Linux/macOS 系统:** -```bash -# 创建配置目录并下载配置文件 -mkdir -p config output -wget https://raw.githubusercontent.com/sansan0/TrendRadar/master/config/config.yaml -P config/ -wget https://raw.githubusercontent.com/sansan0/TrendRadar/master/config/frequency_words.txt -P config/ -``` -或者**手动创建**: -1. 在当前目录创建 `config` 文件夹 -2. 下载配置文件: - - 访问 https://raw.githubusercontent.com/sansan0/TrendRadar/master/config/config.yaml → 右键"另存为" → 保存到 `config\config.yaml` - - 访问 https://raw.githubusercontent.com/sansan0/TrendRadar/master/config/frequency_words.txt → 右键"另存为" → 保存到 `config\frequency_words.txt` +TrendRadar 提供两个独立的 Docker 镜像,可根据需求选择部署: -完成后的目录结构应该是: -``` -当前目录/ -└── config/ - ├── config.yaml - └── frequency_words.txt -``` +| 镜像名称 | 用途 | 说明 | +|---------|------|------| +| `wantcat/trendradar` | 新闻推送服务 | 定时抓取新闻、推送通知(必选) | +| `wantcat/trendradar-mcp` | AI 分析服务 | MCP 协议支持、AI 对话分析(可选) | -```bash -docker run -d --name trend-radar \ - -v ./config:/app/config:ro \ - -v ./output:/app/output \ - -e FEISHU_WEBHOOK_URL="你的飞书webhook" \ - -e DINGTALK_WEBHOOK_URL="你的钉钉webhook" \ - -e WEWORK_WEBHOOK_URL="你的企业微信webhook" \ - -e TELEGRAM_BOT_TOKEN="你的telegram_bot_token" \ - -e TELEGRAM_CHAT_ID="你的telegram_chat_id" \ - -e EMAIL_FROM="你的发件邮箱" \ - -e EMAIL_PASSWORD="你的邮箱密码或授权码" \ - -e EMAIL_TO="收件人邮箱" \ - -e CRON_SCHEDULE="*/30 * * * *" \ - -e RUN_MODE="cron" \ - -e IMMEDIATE_RUN="true" \ - wantcat/trendradar:latest -``` +> 💡 **建议**: +> - 只需要推送功能:仅部署 `wantcat/trendradar` 镜像 +> - 需要 AI 分析功能:同时部署两个镜像 -#### 方式二:使用 docker-compose(推荐) +--- + +#### 方式一:使用 docker-compose(推荐) 1. **创建项目目录和配置**: + + **方式 1-A:使用 git clone(推荐,最简单)** + ```bash + # 克隆项目到本地 + git clone https://github.com/sansan0/TrendRadar.git + cd TrendRadar + ``` + + **方式 1-B:使用 wget 下载配置文件** ```bash # 创建目录结构 mkdir -p trendradar/{config,docker} @@ -1876,11 +2032,11 @@ docker run -d --name trend-radar \ wget https://raw.githubusercontent.com/sansan0/TrendRadar/master/config/frequency_words.txt -P config/ # 下载 docker-compose 配置 - wget https://raw.githubusercontent.com/sansan0/TrendRadar/master/docker/.env - wget https://raw.githubusercontent.com/sansan0/TrendRadar/master/docker/docker-compose.yml + wget https://raw.githubusercontent.com/sansan0/TrendRadar/master/docker/.env -P docker/ + wget https://raw.githubusercontent.com/sansan0/TrendRadar/master/docker/docker-compose.yml -P docker/ ``` -完成后的目录结构应该是: + > 💡 **说明**:Docker 部署需要的关键目录结构如下: ``` 当前目录/ ├── config/ @@ -1905,10 +2061,13 @@ docker run -d --name trend-radar \ | `ENABLE_CRAWLER` | `crawler.enable_crawler` | `true` / `false` | 是否启用爬虫 | | `ENABLE_NOTIFICATION` | `notification.enable_notification` | `true` / `false` | 是否启用通知 | | `REPORT_MODE` | `report.mode` | `daily` / `incremental` / `current`| 报告模式 | + | `MAX_ACCOUNTS_PER_CHANNEL` | `notification.max_accounts_per_channel` | `3` | 每个渠道最大账号数 | | `PUSH_WINDOW_ENABLED` | `notification.push_window.enabled` | `true` / `false` | 推送时间窗口开关 | | `PUSH_WINDOW_START` | `notification.push_window.time_range.start` | `08:00` | 推送开始时间 | | `PUSH_WINDOW_END` | `notification.push_window.time_range.end` | `22:00` | 推送结束时间 | - | `FEISHU_WEBHOOK_URL` | `notification.webhooks.feishu_url` | `https://...` | 飞书 Webhook | + | `ENABLE_WEBSERVER` | - | `true` / `false` | 是否自动启动 Web 服务器 | + | `WEBSERVER_PORT` | - | `8080` | Web 服务器端口(默认 8080) | + | `FEISHU_WEBHOOK_URL` | `notification.webhooks.feishu_url` | `https://...` | 飞书 Webhook(支持多账号,用 `;` 分隔) | **配置优先级**:环境变量 > config.yaml @@ -1919,22 +2078,52 @@ docker run -d --name trend-radar \ 3. **启动服务**: + + **选项 A:启动所有服务(推送 + AI 分析)** ```bash - # 拉取最新镜像并启动 + # 拉取最新镜像 docker-compose pull + + # 启动所有服务(trend-radar + trend-radar-mcp) docker-compose up -d ``` -4. **查看运行状态**: + **选项 B:仅启动新闻推送服务** ```bash - # 查看日志 - docker logs -f trend-radar - - # 查看容器状态 - docker ps | grep trend-radar + # 只启动 trend-radar(定时抓取和推送) + docker-compose pull trend-radar + docker-compose up -d trend-radar ``` -#### 方式三:本地构建(开发者选项) + **选项 C:仅启动 MCP AI 分析服务** + ```bash + # 只启动 trend-radar-mcp(提供 AI 分析接口) + docker-compose pull trend-radar-mcp + docker-compose up -d trend-radar-mcp + ``` + + > 💡 **提示**: + > - 大多数用户只需启动 `trend-radar` 即可实现新闻推送功能 + > - 只有需要使用 Claude/ChatGPT 进行 AI 对话分析时,才需启动 `trend-radar-mcp` + > - 两个服务相互独立,可根据需求灵活组合 + +4. **查看运行状态**: + ```bash + # 查看新闻推送服务日志 + docker logs -f trend-radar + + # 查看 MCP AI 分析服务日志 + docker logs -f trend-radar-mcp + + # 查看所有容器状态 + docker ps | grep trend-radar + + # 停止特定服务 + docker-compose stop trend-radar # 停止推送服务 + docker-compose stop trend-radar-mcp # 停止 MCP 服务 + ``` + +#### 方式二:本地构建(开发者选项) 如果需要自定义修改代码或构建自己的镜像: @@ -1950,17 +2139,38 @@ vim config/frequency_words.txt # 使用构建版本的 docker-compose cd docker cp docker-compose-build.yml docker-compose.yml +``` -# 构建并启动 +**构建并启动服务**: + +```bash +# 选项 A:构建并启动所有服务 docker-compose build docker-compose up -d + +# 选项 B:仅构建并启动新闻推送服务 +docker-compose build trend-radar +docker-compose up -d trend-radar + +# 选项 C:仅构建并启动 MCP AI 分析服务 +docker-compose build trend-radar-mcp +docker-compose up -d trend-radar-mcp ``` +> 💡 **架构参数说明**: +> - 默认构建 `amd64` 架构镜像(适用于大多数 x86_64 服务器) +> - 如需构建 `arm64` 架构(Apple Silicon、树莓派等),设置环境变量: +> ```bash +> export DOCKER_ARCH=arm64 +> docker-compose build +> ``` + #### 镜像更新 ```bash -# 方式一:手动更新 +# 方式一:手动更新(爬虫 + MCP 镜像) docker pull wantcat/trendradar:latest +docker pull wantcat/trendradar-mcp:latest docker-compose down docker-compose up -d @@ -1969,6 +2179,13 @@ docker-compose pull docker-compose up -d ``` +**可用镜像**: + +| 镜像名称 | 用途 | 说明 | +|---------|------|------| +| `wantcat/trendradar` | 新闻推送服务 | 定时抓取新闻、推送通知 | +| `wantcat/trendradar-mcp` | MCP 服务 | AI 分析功能(可选) | + #### 服务管理命令 ```bash @@ -1987,6 +2204,11 @@ docker exec -it trend-radar python manage.py config # 显示输出文件 docker exec -it trend-radar python manage.py files +# Web 服务器管理(用于浏览器访问生成的报告) +docker exec -it trend-radar python manage.py start_webserver # 启动 Web 服务器 +docker exec -it trend-radar python manage.py stop_webserver # 停止 Web 服务器 +docker exec -it trend-radar python manage.py webserver_status # 查看 Web 服务器状态 + # 查看帮助信息 docker exec -it trend-radar python manage.py help @@ -2000,10 +2222,52 @@ docker stop trend-radar docker rm trend-radar ``` +> 💡 **Web 服务器说明**: +> - 启动后可通过浏览器访问 `http://localhost:8080` 查看最新报告 +> - 通过目录导航访问历史报告(如:`http://localhost:8080/2025年xx月xx日/`) +> - 端口可在 `.env` 文件中配置 `WEBSERVER_PORT` 参数 +> - 自动启动:在 `.env` 中设置 `ENABLE_WEBSERVER=true` +> - 安全提示:仅提供静态文件访问,限制在 output 目录,只绑定本地访问 + #### 数据持久化 生成的报告和数据默认保存在 `./output` 目录下,即使容器重启或删除,数据也会保留。 +**📊 网页版报告访问路径**: + +TrendRadar 生成的当日汇总 HTML 报告会同时保存到两个位置: + +| 文件位置 | 访问方式 | 适用场景 | +|---------|---------|---------| +| `output/index.html` | 宿主机直接访问 | **Docker 部署**(通过 Volume 挂载,宿主机可见) | +| `index.html` | 根目录访问 | **GitHub Pages**(仓库根目录,Pages 自动识别) | +| `output/YYYY年MM月DD日/html/当日汇总.html` | 历史报告访问 | 所有环境(按日期归档) | + +**本地访问示例**: +```bash +# 方式 1:通过 Web 服务器访问(推荐,Docker 环境) +# 1. 启动 Web 服务器 +docker exec -it trend-radar python manage.py start_webserver +# 2. 在浏览器访问 +http://localhost:8080 # 访问最新报告(默认 index.html) +http://localhost:8080/2025年xx月xx日/ # 访问指定日期的报告 +http://localhost:8080/2025年xx月xx日/html/ # 浏览该日期下的所有 HTML 文件 + +# 方式 2:直接打开文件(本地环境) +open ./output/index.html # macOS +start ./output/index.html # Windows +xdg-open ./output/index.html # Linux + +# 方式 3:访问历史归档 +open ./output/2025年xx月xx日/html/当日汇总.html +``` + +**为什么有两个 index.html?** +- `output/index.html`:Docker Volume 挂载到宿主机,本地可直接打开 +- `index.html`:GitHub Actions 推送到仓库,GitHub Pages 自动部署 + +> 💡 **提示**:两个文件内容完全相同,选择任意一个访问即可。 + #### 故障排查 ```bash @@ -2020,6 +2284,513 @@ docker exec -it trend-radar /bin/bash docker exec -it trend-radar ls -la /app/config/ ``` +#### MCP 服务部署(AI 分析功能) + +如果需要使用 AI 分析功能,可以部署独立的 MCP 服务容器。 + +**架构说明**: + +```mermaid +flowchart TB + subgraph trend-radar["trend-radar"] + A1[定时抓取新闻] + A2[推送通知] + end + + subgraph trend-radar-mcp["trend-radar-mcp"] + B1[127.0.0.1:3333] + B2[AI 分析接口] + end + + subgraph shared["共享卷"] + C1["config/ (ro)"] + C2["output/ (ro)"] + end + + trend-radar --> shared + trend-radar-mcp --> shared +``` + +**快速启动**: + +使用 docker-compose 同时启动新闻推送和 MCP 服务: + +```bash +# 下载最新的 docker-compose.yml(已包含 MCP 服务配置) +wget https://raw.githubusercontent.com/sansan0/TrendRadar/master/docker/docker-compose.yml + +# 启动所有服务 +docker-compose up -d + +# 查看运行状态 +docker ps | grep trend-radar +``` + +**单独启动 MCP 服务**: + +```bash +docker run -d --name trend-radar-mcp \ + -p 127.0.0.1:3333:3333 \ + -v ./config:/app/config:ro \ + -v ./output:/app/output:ro \ + -e TZ=Asia/Shanghai \ + wantcat/trendradar-mcp:latest +``` + +**验证服务**: + +```bash +# 检查 MCP 服务是否正常运行 +curl http://127.0.0.1:3333/mcp + +# 查看 MCP 服务日志 +docker logs -f trend-radar-mcp +``` + +**在 AI 客户端中配置**: + +MCP 服务启动后,在 Claude Desktop、Cherry Studio、Cursor 等客户端中配置: + +```json +{ + "mcpServers": { + "trendradar": { + "url": "http://127.0.0.1:3333/mcp", + "description": "TrendRadar 新闻热点分析" + } + } +} +``` + +> 💡 **提示**:MCP 服务仅监听本地端口(127.0.0.1),确保安全性。如需远程访问,请自行配置反向代理和认证。 + +
+ +### 7. 报告配置 + +
+👉 点击展开:报告相关参数配置 +
+ +**配置位置:** `config/config.yaml` 的 `report` 部分 + +```yaml +report: + mode: "daily" # 推送模式 + rank_threshold: 5 # 排名高亮阈值 + sort_by_position_first: false # 排序优先级 + max_news_per_keyword: 0 # 每个关键词最大显示数量 + reverse_content_order: false # 内容顺序配置 +``` + +#### 配置项详解 + +| 配置项 | 类型 | 默认值 | 说明 | +|-------|------|-------|------| +| `mode` | string | `daily` | 推送模式,可选 `daily`/`incremental`/`current`,详见 [推送模式详解](#3-推送模式详解) | +| `rank_threshold` | int | `5` | 排名高亮阈值,排名 ≤ 该值的新闻会加粗显示 | +| `sort_by_position_first` | bool | `false` | 排序优先级:`false`=按热点条数排序,`true`=按配置位置排序 | +| `max_news_per_keyword` | int | `0` | 每个关键词最大显示数量,`0`=不限制 | +| `reverse_content_order` | bool | `false` | 内容顺序:`false`=热点词汇统计在前,`true`=新增热点新闻在前 | + +#### 内容顺序配置(v3.5.0 新增) + +控制推送消息和 HTML 报告中两部分内容的显示顺序: + +| 配置值 | 显示顺序 | +|-------|---------| +| `false`(默认) | ① 热点词汇统计 → ② 新增热点新闻 | +| `true` | ① 新增热点新闻 → ② 热点词汇统计 | + +**适用场景:** +- `false`(默认):适合关注关键词匹配结果的用户,先看分类统计 +- `true`:适合关注最新动态的用户,优先查看新增热点 + +**Docker 环境变量:** +```bash +REVERSE_CONTENT_ORDER=true +``` + +#### 排序优先级配置 + +**示例场景:** 配置顺序 A、B、C,热点数 A(3条)、B(10条)、C(5条) + +| 配置值 | 显示顺序 | 适用场景 | +|-------|---------|---------| +| `false`(默认) | B(10条) → C(5条) → A(3条) | 关注热度趋势 | +| `true` | A(3条) → B(10条) → C(5条) | 关注个人优先级 | + +**Docker 环境变量:** +```bash +SORT_BY_POSITION_FIRST=true +MAX_NEWS_PER_KEYWORD=10 +``` + +
+ +### 8. 推送时间窗口配置 + +
+👉 点击展开:推送时间窗口控制详解 +
+ +**配置位置:** `config/config.yaml` 的 `notification.push_window` 部分 + +```yaml +notification: + push_window: + enabled: false # 是否启用 + time_range: + start: "20:00" # 开始时间(北京时间) + end: "22:00" # 结束时间(北京时间) + once_per_day: true # 每天只推送一次 + push_record_retention_days: 7 # 推送记录保留天数 +``` + +#### 配置项详解 + +| 配置项 | 类型 | 默认值 | 说明 | +|-------|------|-------|------| +| `enabled` | bool | `false` | 是否启用推送时间窗口控制 | +| `time_range.start` | string | `"20:00"` | 推送时间窗口开始时间(北京时间,HH:MM 格式) | +| `time_range.end` | string | `"22:00"` | 推送时间窗口结束时间(北京时间,HH:MM 格式) | +| `once_per_day` | bool | `true` | `true`=每天在窗口内只推送一次,`false`=窗口内每次执行都推送 | +| `push_record_retention_days` | int | `7` | 推送记录保留天数(用于判断是否已推送) | + +#### 使用场景 + +| 场景 | 配置示例 | +|------|---------| +| **工作时间推送** | `start: "09:00"`, `end: "18:00"`, `once_per_day: false` | +| **晚间汇总推送** | `start: "20:00"`, `end: "22:00"`, `once_per_day: true` | +| **午休时间推送** | `start: "12:00"`, `end: "13:00"`, `once_per_day: true` | + +#### 重要提示 + +> ⚠️ **GitHub Actions 用户注意:** +> - GitHub Actions 执行时间不稳定,可能有 ±15 分钟的偏差 +> - 时间范围建议至少留足 **2 小时** +> - 如果想要精准的定时推送,建议使用 **Docker 部署**在个人服务器上 + +#### Docker 环境变量 + +```bash +PUSH_WINDOW_ENABLED=true +PUSH_WINDOW_START=09:00 +PUSH_WINDOW_END=18:00 +PUSH_WINDOW_ONCE_PER_DAY=false +PUSH_WINDOW_RETENTION_DAYS=7 +``` + +#### 完整配置示例 + +**场景:每天晚上 8-10 点只推送一次汇总** + +```yaml +notification: + push_window: + enabled: true + time_range: + start: "20:00" + end: "22:00" + once_per_day: true + push_record_retention_days: 7 +``` + +**场景:工作时间内每小时推送** + +```yaml +notification: + push_window: + enabled: true + time_range: + start: "09:00" + end: "18:00" + once_per_day: false + push_record_retention_days: 7 +``` + +
+ +### 9. 执行频率配置 + +
+👉 点击展开:自动运行频率设置 +
+ +**配置位置:** `.github/workflows/crawler.yml` 的 `schedule` 部分 + +```yaml +on: + schedule: + - cron: "0 * * * *" # 每小时运行一次 +``` + +#### 什么是 Cron 表达式? + +Cron 是一种定时任务格式,由 5 个部分组成:`分 时 日 月 周` + +``` +┌───────────── 分钟 (0-59) +│ ┌───────────── 小时 (0-23) +│ │ ┌───────────── 日期 (1-31) +│ │ │ ┌───────────── 月份 (1-12) +│ │ │ │ ┌───────────── 星期 (0-6,0=周日) +│ │ │ │ │ +* * * * * +``` + +#### 常用配置示例 + +| 想要的效果 | Cron 表达式 | 说明 | +|-----------|------------|------| +| 每小时运行 | `0 * * * *` | 每小时的第 0 分钟运行(默认) | +| 每 30 分钟运行 | `*/30 * * * *` | 每隔 30 分钟运行一次 | +| 每天早 8 点运行 | `0 0 * * *` | UTC 0:00 = 北京时间 8:00 | +| 工作时间运行 | `*/30 0-14 * * *` | 北京 8:00-22:00,每 30 分钟 | +| 每天 3 次 | `0 0,6,12 * * *` | 北京 8:00、14:00、20:00 | + +#### 重要提示 + +> ⚠️ **时区注意**:GitHub Actions 使用 **UTC 时间**,北京时间需要 **减 8 小时** +> - 想要北京时间 8:00 运行 → 设置 UTC 0:00 +> - 想要北京时间 20:00 运行 → 设置 UTC 12:00 + +> ⚠️ **频率限制**:GitHub 对每个账号的 Actions 运行次数有限额 +> - **建议**:不要设置比 30 分钟更短的间隔 +> - **原因**:过于频繁可能被判定为滥用,面临封号风险 +> - **实际情况**:GitHub Actions 执行时间本身就有偏差,设置太精确意义不大 + +#### 修改方法 + +1. 打开你 fork 的仓库 +2. 找到 `.github/workflows/crawler.yml` 文件 +3. 点击编辑(铅笔图标) +4. 修改 `cron: "0 * * * *"` 中的表达式 +5. 点击 "Commit changes" 保存 + +
+ +### 10. 多账号推送配置 + +
+👉 点击展开:多账号推送配置详解 + +> ### ⚠️ **安全警告** +> **GitHub Fork 用户请勿在 `config.yaml` 中配置推送信息!** +> +> - **风险说明**:`config.yaml` 会被提交到公开的 Git 仓库,配置推送信息(Webhook URL、Token 等)会泄露敏感数据 +> - **推荐方式**: +> - **GitHub Actions 用户** → 使用 GitHub Secrets 环境变量 +> - **Docker 用户** → 使用 [`.env` 文件配置](#6-docker-部署)(`.env` 已在 `.gitignore` 中,不会被提交) +> - **本地开发用户**:可以在 `config.yaml` 中配置(确保不会 push 到公开仓库) + +#### 支持的渠道 + +| 渠道 | 配置项 | 是否需要配对 | 说明 | +|------|--------|-------------|------| +| **飞书** | `feishu_url` | 否 | 多个 webhook URL | +| **钉钉** | `dingtalk_url` | 否 | 多个 webhook URL | +| **企业微信** | `wework_url` | 否 | 多个 webhook URL | +| **Telegram** | `telegram_bot_token` + `telegram_chat_id` | ✅ 是 | token 和 chat_id 数量必须一致 | +| **ntfy** | `ntfy_topic` + `ntfy_token` | ✅ 是 | topic 和 token 数量必须一致(token 可选) | +| **Bark** | `bark_url` | 否 | 多个推送 URL | +| **Slack** | `slack_webhook_url` | 否 | 多个 webhook URL | +| **邮件** | `email_to` | - | 已支持多收件人(逗号分隔),无需修改 | + +#### 推荐配置方式 1:GitHub Actions 环境变量 + +**配置位置**:GitHub Repo → Settings → Secrets and variables → Actions → Repository secrets + +**基础配置示例**: +```bash +# 多账号数量限制 +MAX_ACCOUNTS_PER_CHANNEL=3 + +# 飞书多账号(3个群组) +FEISHU_WEBHOOK_URL=https://hook1.feishu.cn/xxx;https://hook2.feishu.cn/yyy;https://hook3.feishu.cn/zzz + +# 钉钉多账号(2个群组) +DINGTALK_WEBHOOK_URL=https://oapi.dingtalk.com/xxx;https://oapi.dingtalk.com/yyy + +# 企业微信多账号(2个群组) +WEWORK_WEBHOOK_URL=https://qyapi.weixin.qq.com/cgi-bin/webhook/send?key=xxx;https://qyapi.weixin.qq.com/cgi-bin/webhook/send?key=yyy + +# Bark多账号(2个设备) +BARK_URL=https://api.day.app/key1;https://api.day.app/key2 + +# Slack多账号(2个频道) +SLACK_WEBHOOK_URL=https://hooks.slack.com/xxx;https://hooks.slack.com/yyy +``` + +**配对配置示例(Telegram 和 ntfy)**: + +
+Telegram 配对配置 + +```bash +# ✅ 正确配置:2个token对应2个chat_id +TELEGRAM_BOT_TOKEN=123456:AAA-BBB;789012:CCC-DDD +TELEGRAM_CHAT_ID=-100111;-100222 + +# ❌ 错误配置:数量不一致,将跳过推送 +TELEGRAM_BOT_TOKEN=token1;token2;token3 +TELEGRAM_CHAT_ID=id1;id2 +``` + +**说明**:`token` 和 `chat_id` 的数量必须完全一致,否则该渠道推送会被跳过。 + +
+ +
+ntfy 配对配置 + +```bash +# ✅ 正确配置:3个topic,只有第2个需要token +NTFY_TOPIC=topic1;topic2;topic3 +NTFY_TOKEN=;token_for_topic2; + +# ✅ 正确配置:2个topic都需要token +NTFY_TOPIC=topic1;topic2 +NTFY_TOKEN=token1;token2 + +# ❌ 错误配置:topic和token数量不匹配 +NTFY_TOPIC=topic1;topic2 +NTFY_TOKEN=token1;token2;token3 +``` + +**说明**: +- 如果某个 topic 不需要 token,在对应位置留空(两个分号之间) +- `topic` 和 `token` 的数量必须一致 + +
+ +--- + +#### 推荐配置方式 2:Docker 环境变量(.env) + +**配置位置**:项目根目录 `docker/.env` 文件 + +**基础配置示例**: +```bash +# 多账号数量限制 +MAX_ACCOUNTS_PER_CHANNEL=3 + +# 飞书多账号(3个群组) +FEISHU_WEBHOOK_URL=https://hook1.feishu.cn/xxx;https://hook2.feishu.cn/yyy;https://hook3.feishu.cn/zzz + +# 钉钉多账号(2个群组) +DINGTALK_WEBHOOK_URL=https://oapi.dingtalk.com/xxx;https://oapi.dingtalk.com/yyy + +# 企业微信多账号(2个群组) +WEWORK_WEBHOOK_URL=https://qyapi.weixin.qq.com/cgi-bin/webhook/send?key=xxx;https://qyapi.weixin.qq.com/cgi-bin/webhook/send?key=yyy + +# Bark多账号(2个设备) +BARK_URL=https://api.day.app/key1;https://api.day.app/key2 + +# Slack多账号(2个频道) +SLACK_WEBHOOK_URL=https://hooks.slack.com/xxx;https://hooks.slack.com/yyy +``` + +**配对配置示例(Telegram 和 ntfy)**: + +
+Telegram 配对配置 + +```bash +# ✅ 正确配置:2个token对应2个chat_id +TELEGRAM_BOT_TOKEN=123456:AAA-BBB;789012:CCC-DDD +TELEGRAM_CHAT_ID=-100111;-100222 + +# ❌ 错误配置:数量不一致,将跳过推送 +TELEGRAM_BOT_TOKEN=token1;token2;token3 +TELEGRAM_CHAT_ID=id1;id2 +``` + +**说明**:`token` 和 `chat_id` 的数量必须完全一致,否则该渠道推送会被跳过。 + +
+ +
+ntfy 配对配置 + +```bash +# ✅ 正确配置:3个topic,只有第2个需要token +NTFY_TOPIC=topic1;topic2;topic3 +NTFY_TOKEN=;token_for_topic2; + +# ✅ 正确配置:2个topic都需要token +NTFY_TOPIC=topic1;topic2 +NTFY_TOKEN=token1;token2 + +# ❌ 错误配置:topic和token数量不匹配 +NTFY_TOPIC=topic1;topic2 +NTFY_TOKEN=token1;token2;token3 +``` + +**说明**: +- 如果某个 topic 不需要 token,在对应位置留空(两个分号之间) +- `topic` 和 `token` 的数量必须一致 + +
+ +--- + +#### 推送行为说明 + +1. **独立推送**:每个账号独立发送,一个失败不影响其他账号 +2. **部分成功判定**:只要有一个账号发送成功,整体视为成功 +3. **日志区分**:多账号时日志会显示"账号1"、"账号2"等标签 +4. **批次间隔**:多账号会增加总发送时间(每个账号独立计算批次间隔) + +--- + +#### 常见问题 + +
+Q1: 超过 3 个账号会怎样? +
+ +系统会自动截断到配置的最大数量,并输出警告日志。可通过 `max_accounts_per_channel` 调整限制。 + +**⚠️ GitHub Actions 用户特别注意**: +- **不建议配置过多账号**(建议不超过 3 个),可能导致: + - **触发 GitHub Actions 速率限制**:频繁的网络请求可能被识别为异常行为 + - **潜在账号风险**:过度使用 GitHub Actions 资源可能影响账号状态 + +
+ +
+Q2: 多账号会影响推送速度吗? +
+ +会。每个账号独立发送,总时间 = 账号数 × 单账号发送时间。建议控制账号数量。 + +
+ +
+Q3: 本地开发用户如何在 config.yaml 中配置? +
+ +如果你是本地开发且**不会将代码推送到公开仓库**,可以直接在 `config/config.yaml` 中配置: + +```yaml +notification: + enable_notification: true + max_accounts_per_channel: 3 + + webhooks: + feishu_url: "https://hook1.feishu.cn/xxx;https://hook2.feishu.cn/yyy" + telegram_bot_token: "token1;token2" + telegram_chat_id: "id1;id2" +``` + +**⚠️ 重要提醒**: +- 确保 `config/config.yaml` 在 `.gitignore` 中(如果会提交代码) +- 或者只在本地开发环境使用,**绝不提交到公开仓库** + +
+

diff --git a/config/config.yaml b/config/config.yaml index 333bedf..81d7cbf 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -29,6 +29,7 @@ report: rank_threshold: 5 # 排名高亮阈值 sort_by_position_first: false # 排序优先级:true=先按配置位置排序,false=先按热点条数排序 max_news_per_keyword: 0 # 每个关键词最大显示数量,0=不限制 + reverse_content_order: false # 内容顺序:false=热点词汇统计在前,true=新增热点新闻在前 notification: enable_notification: true # 是否启用通知功能,如果 false,则不发送手机通知 @@ -39,6 +40,7 @@ notification: slack_batch_size: 4000 # Slack消息分批大小(字节) batch_send_interval: 3 # 批次发送间隔(秒) feishu_message_separator: "━━━━━━━━━━━━━━━━━━━" # feishu 消息分割线 + max_accounts_per_channel: 3 # 每个渠道最大账号数量,建议不超过 3 # 🕐 推送时间窗口控制(可选功能) # 用途:限制推送的时间范围,避免非工作时间打扰 @@ -71,23 +73,39 @@ notification: # - Minor: Spam notifications flooding your devices # - Severe: Webhook abuse leading to security incidents (malicious messages, phishing links, etc.) # + # ⚠️⚠️⚠️ 多账号推送说明 / MULTI-ACCOUNT PUSH NOTICE ⚠️⚠️⚠️ + # + # 🔸 多账号支持: + # • 请使用分号(;)分隔多个账号,如:"url1;url2;url3" + # • 示例:telegram_bot_token: "token1;token2" 对应 telegram_chat_id: "id1;id2" + # • 对于需要配对的配置(如 Telegram 的 token 和 chat_id),数量必须一致 + # • 每个渠道最多支持 max_accounts_per_channel 个账号(见上方配置) + # • 邮箱已支持多收件人(逗号分隔),保持不变 + # + # 🔸 Multi-Account Support: + # • Use semicolon(;) to separate multiple accounts, e.g., "url1;url2;url3" + # • Example: telegram_bot_token: "token1;token2" with telegram_chat_id: "id1;id2" + # • For paired configs (e.g., Telegram token and chat_id), quantities must match + # • Each channel supports up to max_accounts_per_channel accounts (see above config) + # • Email already supports multiple recipients (comma-separated), unchanged + # webhooks: - feishu_url: "" # 飞书机器人的 webhook URL - dingtalk_url: "" # 钉钉机器人的 webhook URL - wework_url: "" # 企业微信机器人的 webhook URL + feishu_url: "" # 飞书机器人的 webhook URL(多账号用 ; 分隔) + dingtalk_url: "" # 钉钉机器人的 webhook URL(多账号用 ; 分隔) + wework_url: "" # 企业微信机器人的 webhook URL(多账号用 ; 分隔) wework_msg_type: "markdown" # 企业微信消息类型:markdown(群机器人) 或 text(个人微信应用) - telegram_bot_token: "" # Telegram Bot Token - telegram_chat_id: "" # Telegram Chat ID + telegram_bot_token: "" # Telegram Bot Token(多账号用 ; 分隔,需与 chat_id 数量一致) + telegram_chat_id: "" # Telegram Chat ID(多账号用 ; 分隔,需与 bot_token 数量一致) email_from: "" # 发件人邮箱地址 email_password: "" # 发件人邮箱密码或授权码 email_to: "" # 收件人邮箱地址,多个收件人用逗号分隔 email_smtp_server: "" # SMTP服务器地址(可选,留空自动识别) email_smtp_port: "" # SMTP端口(可选,留空自动识别) ntfy_server_url: "https://ntfy.sh" # ntfy服务器地址,默认使用公共服务,可改为自托管地址 - ntfy_topic: "" # ntfy主题名称 - ntfy_token: "" # ntfy访问令牌(可选,用于私有主题) - bark_url: "" # Bark推送URL(格式:https://api.day.app/your_device_key 或自建服务器地址) - slack_webhook_url: "" # Slack Incoming Webhook URL(格式:https://hooks.slack.com/services/T00000000/B00000000/XXXXXXXXXXXXXXXXXXXXXXXX) + ntfy_topic: "" # ntfy主题名称(多账号用 ; 分隔) + ntfy_token: "" # ntfy访问令牌(可选,用于私有主题,多账号用 ; 分隔) + bark_url: "" # Bark推送URL(多账号用 ; 分隔,格式:https://api.day.app/your_device_key 或自建服务器地址) + slack_webhook_url: "" # Slack Incoming Webhook URL(多账号用 ; 分隔,格式:https://hooks.slack.com/services/T00000000/B00000000/XXXXXXXXXXXXXXXXXXXXXXXX) # 用于让关注度更高的新闻在更前面显示,即用算法重新组合不同平台的热搜排序形成你侧重的热搜,合起来是 1 就行 weight: diff --git a/docker/.env b/docker/.env index 2c73e17..9946b30 100644 --- a/docker/.env +++ b/docker/.env @@ -12,6 +12,21 @@ REPORT_MODE= SORT_BY_POSITION_FIRST= # 每个关键词最大显示数量 (0=不限制,>0=限制数量) MAX_NEWS_PER_KEYWORD= +# 内容顺序:false=热点词汇统计在前,true=新增热点新闻在前 +REVERSE_CONTENT_ORDER= + +# ============================================ +# Web 服务器配置 +# ============================================ + +# 是否自动启动 Web 服务器托管 output 目录 (true/false) +# 启用后可通过 http://localhost:{WEBSERVER_PORT} 访问生成的报告 +# 手动控制:docker exec -it trend-radar python manage.py start_webserver +ENABLE_WEBSERVER=false + +# Web 服务器端口(默认 8080,可自定义避免冲突) +# 注意:修改后需要重启容器生效 +WEBSERVER_PORT=8080 # ============================================ # 推送时间窗口配置 @@ -29,36 +44,47 @@ PUSH_WINDOW_ONCE_PER_DAY= PUSH_WINDOW_RETENTION_DAYS= # ============================================ -# 通知渠道配置 +# 多账号配置 # ============================================ -# 推送配置 +# 每个渠道最大账号数量(建议不超过 3,避免fork用户触发账号风险) +MAX_ACCOUNTS_PER_CHANNEL= + +# ============================================ +# 通知渠道配置(多账号用 ; 分隔) +# ============================================ + +# 飞书机器人 webhook URL(多账号用 ; 分隔) FEISHU_WEBHOOK_URL= +# Telegram Bot Token(多账号用 ; 分隔,需与 chat_id 数量一致) TELEGRAM_BOT_TOKEN= +# Telegram Chat ID(多账号用 ; 分隔,需与 bot_token 数量一致) TELEGRAM_CHAT_ID= +# 钉钉机器人 webhook URL(多账号用 ; 分隔) DINGTALK_WEBHOOK_URL= +# 企业微信机器人 webhook URL(多账号用 ; 分隔) WEWORK_WEBHOOK_URL= +# 企业微信消息类型(markdown 或 text) WEWORK_MSG_TYPE= +# 邮件配置(邮箱已支持多收件人,逗号分隔) EMAIL_FROM= EMAIL_PASSWORD= EMAIL_TO= EMAIL_SMTP_SERVER= EMAIL_SMTP_PORT= -# ntfy 推送配置 +# ntfy 推送配置(多账号用 ; 分隔,topic 和 token 数量需一致) NTFY_SERVER_URL=https://ntfy.sh -# ntfy主题名称 +# ntfy主题名称(多账号用 ; 分隔) NTFY_TOPIC= -# 可选:访问令牌(用于私有主题) +# 可选:访问令牌(用于私有主题,多账号用 ; 分隔,无令牌的留空占位如 ";token2") NTFY_TOKEN= -# Bark 推送配置 -# Bark推送URL(格式:https://api.day.app/your_device_key 或自建服务器地址) +# Bark 推送配置(多账号用 ; 分隔) BARK_URL= -# Slack 推送配置 -# Slack Incoming Webhook URL(格式:https://hooks.slack.com/services/T00000000/B00000000/XXXXXXXXXXXXXXXXXXXXXXXX) +# Slack 推送配置(多账号用 ; 分隔) SLACK_WEBHOOK_URL= # ============================================ diff --git a/docker/Dockerfile b/docker/Dockerfile index f86f65c..574eb11 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -2,9 +2,9 @@ FROM python:3.10-slim WORKDIR /app -# https://github.com/aptible/supercronic +# Latest releases available at https://github.com/aptible/supercronic/releases ARG TARGETARCH -ENV SUPERCRONIC_VERSION=v0.2.34 +ENV SUPERCRONIC_VERSION=v0.2.39 RUN set -ex && \ apt-get update && \ @@ -12,12 +12,12 @@ RUN set -ex && \ case ${TARGETARCH} in \ amd64) \ export SUPERCRONIC_URL=https://github.com/aptible/supercronic/releases/download/${SUPERCRONIC_VERSION}/supercronic-linux-amd64; \ - export SUPERCRONIC_SHA1SUM=e8631edc1775000d119b70fd40339a7238eece14; \ + export SUPERCRONIC_SHA1SUM=c98bbf82c5f648aaac8708c182cc83046fe48423; \ export SUPERCRONIC=supercronic-linux-amd64; \ ;; \ arm64) \ export SUPERCRONIC_URL=https://github.com/aptible/supercronic/releases/download/${SUPERCRONIC_VERSION}/supercronic-linux-arm64; \ - export SUPERCRONIC_SHA1SUM=4ab6343b52bf9da592e8b4bb7ae6eb5a8e21b71e; \ + export SUPERCRONIC_SHA1SUM=5ef4ccc3d43f12d0f6c3763758bc37cc4e5af76e; \ export SUPERCRONIC=supercronic-linux-arm64; \ ;; \ *) \ @@ -26,26 +26,25 @@ RUN set -ex && \ ;; \ esac && \ echo "Downloading supercronic for ${TARGETARCH} from ${SUPERCRONIC_URL}" && \ - # 添加重试机制和超时设置 - for i in 1 2 3 4 5; do \ - echo "Download attempt $i/5"; \ - if curl --fail --silent --show-error --location --retry 3 --retry-delay 2 --connect-timeout 30 --max-time 120 -o "$SUPERCRONIC" "$SUPERCRONIC_URL"; then \ - echo "Download successful"; \ - break; \ - else \ - echo "Download attempt $i failed, exit code: $?"; \ - if [ $i -eq 5 ]; then \ - echo "All download attempts failed"; \ - exit 1; \ - fi; \ - sleep $((i * 2)); \ - fi; \ + # 重试机制:最多3次,每次超时30秒 + for i in 1 2 3; do \ + echo "Download attempt $i/3"; \ + if curl -fsSL --connect-timeout 30 --max-time 60 -o "$SUPERCRONIC" "$SUPERCRONIC_URL"; then \ + echo "Download successful"; \ + break; \ + else \ + echo "Download attempt $i failed"; \ + if [ $i -eq 3 ]; then \ + echo "All download attempts failed"; \ + exit 1; \ + fi; \ + sleep 2; \ + fi; \ done && \ echo "${SUPERCRONIC_SHA1SUM} ${SUPERCRONIC}" | sha1sum -c - && \ chmod +x "$SUPERCRONIC" && \ mv "$SUPERCRONIC" "/usr/local/bin/${SUPERCRONIC}" && \ ln -s "/usr/local/bin/${SUPERCRONIC}" /usr/local/bin/supercronic && \ - # 验证安装 supercronic -version && \ apt-get remove -y curl && \ apt-get clean && \ diff --git a/docker/Dockerfile.mcp b/docker/Dockerfile.mcp new file mode 100644 index 0000000..6aacfb6 --- /dev/null +++ b/docker/Dockerfile.mcp @@ -0,0 +1,23 @@ +FROM python:3.10-slim + +WORKDIR /app + +# 安装依赖 +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +# 复制 MCP 服务器代码 +COPY mcp_server/ ./mcp_server/ + +# 创建必要目录 +RUN mkdir -p /app/config /app/output + +ENV PYTHONUNBUFFERED=1 \ + CONFIG_PATH=/app/config/config.yaml \ + FREQUENCY_WORDS_PATH=/app/config/frequency_words.txt + +# MCP HTTP 服务端口 +EXPOSE 3333 + +# 启动 MCP 服务器(HTTP 模式) +CMD ["python", "-m", "mcp_server.server", "--transport", "http", "--host", "0.0.0.0", "--port", "3333"] diff --git a/docker/docker-compose-build.yml b/docker/docker-compose-build.yml index 804e12d..955c4c9 100644 --- a/docker/docker-compose-build.yml +++ b/docker/docker-compose-build.yml @@ -6,6 +6,9 @@ services: container_name: trend-radar restart: unless-stopped + ports: + - "127.0.0.1:${WEBSERVER_PORT:-8080}:${WEBSERVER_PORT:-8080}" + volumes: - ../config:/app/config:ro - ../output:/app/output @@ -18,6 +21,12 @@ services: - REPORT_MODE=${REPORT_MODE:-} - SORT_BY_POSITION_FIRST=${SORT_BY_POSITION_FIRST:-} - MAX_NEWS_PER_KEYWORD=${MAX_NEWS_PER_KEYWORD:-} + - REVERSE_CONTENT_ORDER=${REVERSE_CONTENT_ORDER:-} + # Web 服务器 + - ENABLE_WEBSERVER=${ENABLE_WEBSERVER:-false} + - WEBSERVER_PORT=${WEBSERVER_PORT:-8080} + # 多账号配置 + - MAX_ACCOUNTS_PER_CHANNEL=${MAX_ACCOUNTS_PER_CHANNEL:-} # 推送时间窗口 - PUSH_WINDOW_ENABLED=${PUSH_WINDOW_ENABLED:-} - PUSH_WINDOW_START=${PUSH_WINDOW_START:-} @@ -49,3 +58,20 @@ services: - CRON_SCHEDULE=${CRON_SCHEDULE:-*/5 * * * *} - RUN_MODE=${RUN_MODE:-cron} - IMMEDIATE_RUN=${IMMEDIATE_RUN:-true} + + trend-radar-mcp: + build: + context: .. + dockerfile: docker/Dockerfile.mcp + container_name: trend-radar-mcp + restart: unless-stopped + + ports: + - "127.0.0.1:3333:3333" + + volumes: + - ../config:/app/config:ro + - ../output:/app/output:ro + + environment: + - TZ=Asia/Shanghai diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index 206d7d3..c7115b3 100644 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -4,6 +4,9 @@ services: container_name: trend-radar restart: unless-stopped + ports: + - "127.0.0.1:${WEBSERVER_PORT:-8080}:${WEBSERVER_PORT:-8080}" + volumes: - ../config:/app/config:ro - ../output:/app/output @@ -16,6 +19,12 @@ services: - REPORT_MODE=${REPORT_MODE:-} - SORT_BY_POSITION_FIRST=${SORT_BY_POSITION_FIRST:-} - MAX_NEWS_PER_KEYWORD=${MAX_NEWS_PER_KEYWORD:-} + - REVERSE_CONTENT_ORDER=${REVERSE_CONTENT_ORDER:-} + # Web 服务器 + - ENABLE_WEBSERVER=${ENABLE_WEBSERVER:-false} + - WEBSERVER_PORT=${WEBSERVER_PORT:-8080} + # 多账号配置 + - MAX_ACCOUNTS_PER_CHANNEL=${MAX_ACCOUNTS_PER_CHANNEL:-} # 推送时间窗口 - PUSH_WINDOW_ENABLED=${PUSH_WINDOW_ENABLED:-} - PUSH_WINDOW_START=${PUSH_WINDOW_START:-} @@ -47,3 +56,18 @@ services: - CRON_SCHEDULE=${CRON_SCHEDULE:-*/5 * * * *} - RUN_MODE=${RUN_MODE:-cron} - IMMEDIATE_RUN=${IMMEDIATE_RUN:-true} + + trend-radar-mcp: + image: wantcat/trendradar-mcp:latest + container_name: trend-radar-mcp + restart: unless-stopped + + ports: + - "127.0.0.1:3333:3333" + + volumes: + - ../config:/app/config:ro + - ../output:/app/output:ro + + environment: + - TZ=Asia/Shanghai diff --git a/docker/entrypoint.sh b/docker/entrypoint.sh index 01d489d..d33bb1c 100644 --- a/docker/entrypoint.sh +++ b/docker/entrypoint.sh @@ -33,9 +33,15 @@ case "${RUN_MODE:-cron}" in /usr/local/bin/python main.py fi + # 启动 Web 服务器(如果配置了) + if [ "${ENABLE_WEBSERVER:-false}" = "true" ]; then + echo "🌐 启动 Web 服务器..." + /usr/local/bin/python manage.py start_webserver + fi + echo "⏰ 启动supercronic: ${CRON_SCHEDULE:-*/30 * * * *}" echo "🎯 supercronic 将作为 PID 1 运行" - + exec /usr/local/bin/supercronic -passthrough-logs /tmp/crontab ;; *) diff --git a/docker/manage.py b/docker/manage.py index e72d553..944cccf 100644 --- a/docker/manage.py +++ b/docker/manage.py @@ -8,8 +8,14 @@ import os import sys import subprocess import time +import signal from pathlib import Path +# Web 服务器配置 +WEBSERVER_PORT = int(os.environ.get("WEBSERVER_PORT", "8080")) +WEBSERVER_DIR = "/app/output" +WEBSERVER_PID_FILE = "/tmp/webserver.pid" + def run_command(cmd, shell=True, capture_output=True): """执行系统命令""" @@ -374,13 +380,13 @@ def restart_supercronic(): """重启supercronic进程""" print("🔄 重启supercronic...") print("⚠️ 注意: supercronic 是 PID 1,无法直接重启") - + # 检查当前 PID 1 try: with open('/proc/1/cmdline', 'r') as f: pid1_cmdline = f.read().replace('\x00', ' ').strip() print(f" 🔍 当前 PID 1: {pid1_cmdline}") - + if "supercronic" in pid1_cmdline.lower(): print(" ✅ PID 1 是 supercronic") print(" 💡 要重启 supercronic,需要重启整个容器:") @@ -394,29 +400,167 @@ def restart_supercronic(): print(" 💡 建议重启容器: docker restart trend-radar") +def start_webserver(): + """启动 Web 服务器托管 output 目录""" + print(f"🌐 启动 Web 服务器 (端口: {WEBSERVER_PORT})...") + print(f" 🔒 安全提示:仅提供静态文件访问,限制在 {WEBSERVER_DIR} 目录") + + # 检查是否已经运行 + if Path(WEBSERVER_PID_FILE).exists(): + try: + with open(WEBSERVER_PID_FILE, 'r') as f: + old_pid = int(f.read().strip()) + try: + os.kill(old_pid, 0) # 检查进程是否存在 + print(f" ⚠️ Web 服务器已在运行 (PID: {old_pid})") + print(f" 💡 访问: http://localhost:{WEBSERVER_PORT}") + print(" 💡 停止服务: python manage.py stop_webserver") + return + except OSError: + # 进程不存在,删除旧的 PID 文件 + os.remove(WEBSERVER_PID_FILE) + except Exception as e: + print(f" ⚠️ 清理旧的 PID 文件: {e}") + try: + os.remove(WEBSERVER_PID_FILE) + except: + pass + + # 检查目录是否存在 + if not Path(WEBSERVER_DIR).exists(): + print(f" ❌ 目录不存在: {WEBSERVER_DIR}") + return + + try: + # 启动 HTTP 服务器 + # 使用 --bind 绑定到 0.0.0.0 使容器内部可访问 + # 工作目录限制在 WEBSERVER_DIR,防止访问其他目录 + process = subprocess.Popen( + [sys.executable, '-m', 'http.server', str(WEBSERVER_PORT), '--bind', '0.0.0.0'], + cwd=WEBSERVER_DIR, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + start_new_session=True + ) + + # 等待一下确保服务器启动 + time.sleep(1) + + # 检查进程是否还在运行 + if process.poll() is None: + # 保存 PID + with open(WEBSERVER_PID_FILE, 'w') as f: + f.write(str(process.pid)) + + print(f" ✅ Web 服务器已启动 (PID: {process.pid})") + print(f" 📁 服务目录: {WEBSERVER_DIR} (只读,仅静态文件)") + print(f" 🌐 访问地址: http://localhost:{WEBSERVER_PORT}") + print(f" 📄 首页: http://localhost:{WEBSERVER_PORT}/index.html") + print(" 💡 停止服务: python manage.py stop_webserver") + else: + print(f" ❌ Web 服务器启动失败") + except Exception as e: + print(f" ❌ 启动失败: {e}") + + +def stop_webserver(): + """停止 Web 服务器""" + print("🛑 停止 Web 服务器...") + + if not Path(WEBSERVER_PID_FILE).exists(): + print(" ℹ️ Web 服务器未运行") + return + + try: + with open(WEBSERVER_PID_FILE, 'r') as f: + pid = int(f.read().strip()) + + try: + # 尝试终止进程 + os.kill(pid, signal.SIGTERM) + time.sleep(0.5) + + # 检查进程是否已终止 + try: + os.kill(pid, 0) + # 进程还在,强制杀死 + os.kill(pid, signal.SIGKILL) + print(f" ⚠️ 强制停止 Web 服务器 (PID: {pid})") + except OSError: + print(f" ✅ Web 服务器已停止 (PID: {pid})") + except OSError as e: + if e.errno == 3: # No such process + print(f" ℹ️ 进程已不存在 (PID: {pid})") + else: + raise + + # 删除 PID 文件 + os.remove(WEBSERVER_PID_FILE) + except Exception as e: + print(f" ❌ 停止失败: {e}") + # 尝试清理 PID 文件 + try: + os.remove(WEBSERVER_PID_FILE) + except: + pass + + +def webserver_status(): + """查看 Web 服务器状态""" + print("🌐 Web 服务器状态:") + + if not Path(WEBSERVER_PID_FILE).exists(): + print(" ⭕ 未运行") + print(f" 💡 启动服务: python manage.py start_webserver") + return + + try: + with open(WEBSERVER_PID_FILE, 'r') as f: + pid = int(f.read().strip()) + + try: + os.kill(pid, 0) # 检查进程是否存在 + print(f" ✅ 运行中 (PID: {pid})") + print(f" 📁 服务目录: {WEBSERVER_DIR}") + print(f" 🌐 访问地址: http://localhost:{WEBSERVER_PORT}") + print(f" 📄 首页: http://localhost:{WEBSERVER_PORT}/index.html") + print(" 💡 停止服务: python manage.py stop_webserver") + except OSError: + print(f" ⭕ 未运行 (PID 文件存在但进程不存在)") + os.remove(WEBSERVER_PID_FILE) + print(" 💡 启动服务: python manage.py start_webserver") + except Exception as e: + print(f" ❌ 状态检查失败: {e}") + + def show_help(): """显示帮助信息""" help_text = """ 🐳 TrendRadar 容器管理工具 📋 命令列表: - run - 手动执行一次爬虫 - status - 显示容器运行状态 - config - 显示当前配置 - files - 显示输出文件 - logs - 实时查看日志 - restart - 重启说明 - help - 显示此帮助 + run - 手动执行一次爬虫 + status - 显示容器运行状态 + config - 显示当前配置 + files - 显示输出文件 + logs - 实时查看日志 + restart - 重启说明 + start_webserver - 启动 Web 服务器托管 output 目录 + stop_webserver - 停止 Web 服务器 + webserver_status - 查看 Web 服务器状态 + help - 显示此帮助 📖 使用示例: # 在容器中执行 python manage.py run python manage.py status python manage.py logs - + python manage.py start_webserver + # 在宿主机执行 docker exec -it trend-radar python manage.py run docker exec -it trend-radar python manage.py status + docker exec -it trend-radar python manage.py start_webserver docker logs trend-radar 💡 常用操作指南: @@ -424,18 +568,24 @@ def show_help(): - 查看 supercronic 是否为 PID 1 - 检查配置文件和关键文件 - 查看 cron 调度设置 - - 2. 手动执行测试: run + + 2. 手动执行测试: run - 立即执行一次新闻爬取 - 测试程序是否正常工作 - + 3. 查看日志: logs - 实时监控运行情况 - 也可使用: docker logs trend-radar - + 4. 重启服务: restart - 由于 supercronic 是 PID 1,需要重启整个容器 - 使用: docker restart trend-radar + + 5. Web 服务器管理: + - 启动: start_webserver + - 停止: stop_webserver + - 状态: webserver_status + - 访问: http://localhost:8080 """ print(help_text) @@ -453,6 +603,9 @@ def main(): "files": show_files, "logs": show_logs, "restart": restart_supercronic, + "start_webserver": start_webserver, + "stop_webserver": stop_webserver, + "webserver_status": webserver_status, "help": show_help, } diff --git a/main.py b/main.py index b90a519..df9f73a 100644 --- a/main.py +++ b/main.py @@ -20,7 +20,7 @@ import requests import yaml -VERSION = "3.4.1" +VERSION = "3.5.0" # === SMTP邮件配置 === @@ -55,6 +55,109 @@ SMTP_CONFIGS = { } +# === 多账号推送工具函数 === +def parse_multi_account_config(config_value: str, separator: str = ";") -> List[str]: + """ + 解析多账号配置,返回账号列表 + + Args: + config_value: 配置值字符串,多个账号用分隔符分隔 + separator: 分隔符,默认为 ; + + Returns: + 账号列表,空字符串会被保留(用于占位) + """ + if not config_value: + return [] + # 保留空字符串用于占位(如 ";token2" 表示第一个账号无token) + accounts = [acc.strip() for acc in config_value.split(separator)] + # 过滤掉全部为空的情况 + if all(not acc for acc in accounts): + return [] + return accounts + + +def validate_paired_configs( + configs: Dict[str, List[str]], + channel_name: str, + required_keys: Optional[List[str]] = None +) -> Tuple[bool, int]: + """ + 验证配对配置的数量是否一致 + + Args: + configs: 配置字典,key 为配置名,value 为账号列表 + channel_name: 渠道名称,用于日志输出 + required_keys: 必须有值的配置项列表 + + Returns: + (是否验证通过, 账号数量) + """ + # 过滤掉空列表 + non_empty_configs = {k: v for k, v in configs.items() if v} + + if not non_empty_configs: + return True, 0 + + # 检查必须项 + if required_keys: + for key in required_keys: + if key not in non_empty_configs or not non_empty_configs[key]: + return True, 0 # 必须项为空,视为未配置 + + # 获取所有非空配置的长度 + lengths = {k: len(v) for k, v in non_empty_configs.items()} + unique_lengths = set(lengths.values()) + + if len(unique_lengths) > 1: + print(f"❌ {channel_name} 配置错误:配对配置数量不一致,将跳过该渠道推送") + for key, length in lengths.items(): + print(f" - {key}: {length} 个") + return False, 0 + + return True, list(unique_lengths)[0] if unique_lengths else 0 + + +def limit_accounts( + accounts: List[str], + max_count: int, + channel_name: str +) -> List[str]: + """ + 限制账号数量 + + Args: + accounts: 账号列表 + max_count: 最大账号数量 + channel_name: 渠道名称,用于日志输出 + + Returns: + 限制后的账号列表 + """ + if len(accounts) > max_count: + print(f"⚠️ {channel_name} 配置了 {len(accounts)} 个账号,超过最大限制 {max_count},只使用前 {max_count} 个") + print(f" ⚠️ 警告:如果您是 fork 用户,过多账号可能导致 GitHub Actions 运行时间过长,存在账号风险") + return accounts[:max_count] + return accounts + + +def get_account_at_index(accounts: List[str], index: int, default: str = "") -> str: + """ + 安全获取指定索引的账号值 + + Args: + accounts: 账号列表 + index: 索引 + default: 默认值 + + Returns: + 账号值或默认值 + """ + if index < len(accounts): + return accounts[index] if accounts[index] else default + return default + + # === 配置管理 === def load_config(): """加载配置文件""" @@ -84,6 +187,10 @@ def load_config(): os.environ.get("MAX_NEWS_PER_KEYWORD", "").strip() or "0" ) or config_data["report"].get("max_news_per_keyword", 0), + "REVERSE_CONTENT_ORDER": os.environ.get("REVERSE_CONTENT_ORDER", "").strip().lower() + in ("true", "1") + if os.environ.get("REVERSE_CONTENT_ORDER", "").strip() + else config_data["report"].get("reverse_content_order", False), "USE_PROXY": config_data["crawler"]["use_proxy"], "DEFAULT_PROXY": config_data["crawler"]["default_proxy"], "ENABLE_CRAWLER": os.environ.get("ENABLE_CRAWLER", "").strip().lower() @@ -105,6 +212,11 @@ def load_config(): "FEISHU_MESSAGE_SEPARATOR": config_data["notification"][ "feishu_message_separator" ], + # 多账号配置 + "MAX_ACCOUNTS_PER_CHANNEL": int( + os.environ.get("MAX_ACCOUNTS_PER_CHANNEL", "").strip() or "0" + ) + or config_data["notification"].get("max_accounts_per_channel", 3), "PUSH_WINDOW": { "ENABLED": os.environ.get("PUSH_WINDOW_ENABLED", "").strip().lower() in ("true", "1") @@ -210,39 +322,73 @@ def load_config(): # 输出配置来源信息 notification_sources = [] + max_accounts = config["MAX_ACCOUNTS_PER_CHANNEL"] + if config["FEISHU_WEBHOOK_URL"]: + accounts = parse_multi_account_config(config["FEISHU_WEBHOOK_URL"]) + count = min(len(accounts), max_accounts) source = "环境变量" if os.environ.get("FEISHU_WEBHOOK_URL") else "配置文件" - notification_sources.append(f"飞书({source})") + notification_sources.append(f"飞书({source}, {count}个账号)") if config["DINGTALK_WEBHOOK_URL"]: + accounts = parse_multi_account_config(config["DINGTALK_WEBHOOK_URL"]) + count = min(len(accounts), max_accounts) source = "环境变量" if os.environ.get("DINGTALK_WEBHOOK_URL") else "配置文件" - notification_sources.append(f"钉钉({source})") + notification_sources.append(f"钉钉({source}, {count}个账号)") if config["WEWORK_WEBHOOK_URL"]: + accounts = parse_multi_account_config(config["WEWORK_WEBHOOK_URL"]) + count = min(len(accounts), max_accounts) source = "环境变量" if os.environ.get("WEWORK_WEBHOOK_URL") else "配置文件" - notification_sources.append(f"企业微信({source})") + notification_sources.append(f"企业微信({source}, {count}个账号)") if config["TELEGRAM_BOT_TOKEN"] and config["TELEGRAM_CHAT_ID"]: - token_source = ( - "环境变量" if os.environ.get("TELEGRAM_BOT_TOKEN") else "配置文件" + tokens = parse_multi_account_config(config["TELEGRAM_BOT_TOKEN"]) + chat_ids = parse_multi_account_config(config["TELEGRAM_CHAT_ID"]) + # 验证数量一致性 + valid, count = validate_paired_configs( + {"bot_token": tokens, "chat_id": chat_ids}, + "Telegram", + required_keys=["bot_token", "chat_id"] ) - chat_source = "环境变量" if os.environ.get("TELEGRAM_CHAT_ID") else "配置文件" - notification_sources.append(f"Telegram({token_source}/{chat_source})") + if valid and count > 0: + count = min(count, max_accounts) + token_source = "环境变量" if os.environ.get("TELEGRAM_BOT_TOKEN") else "配置文件" + notification_sources.append(f"Telegram({token_source}, {count}个账号)") if config["EMAIL_FROM"] and config["EMAIL_PASSWORD"] and config["EMAIL_TO"]: from_source = "环境变量" if os.environ.get("EMAIL_FROM") else "配置文件" notification_sources.append(f"邮件({from_source})") if config["NTFY_SERVER_URL"] and config["NTFY_TOPIC"]: - server_source = "环境变量" if os.environ.get("NTFY_SERVER_URL") else "配置文件" - notification_sources.append(f"ntfy({server_source})") + topics = parse_multi_account_config(config["NTFY_TOPIC"]) + tokens = parse_multi_account_config(config["NTFY_TOKEN"]) + # ntfy 的 token 是可选的,但如果配置了,数量必须与 topic 一致 + if tokens: + valid, count = validate_paired_configs( + {"topic": topics, "token": tokens}, + "ntfy" + ) + if valid and count > 0: + count = min(count, max_accounts) + server_source = "环境变量" if os.environ.get("NTFY_SERVER_URL") else "配置文件" + notification_sources.append(f"ntfy({server_source}, {count}个账号)") + else: + count = min(len(topics), max_accounts) + server_source = "环境变量" if os.environ.get("NTFY_SERVER_URL") else "配置文件" + notification_sources.append(f"ntfy({server_source}, {count}个账号)") if config["BARK_URL"]: + accounts = parse_multi_account_config(config["BARK_URL"]) + count = min(len(accounts), max_accounts) bark_source = "环境变量" if os.environ.get("BARK_URL") else "配置文件" - notification_sources.append(f"Bark({bark_source})") + notification_sources.append(f"Bark({bark_source}, {count}个账号)") if config["SLACK_WEBHOOK_URL"]: + accounts = parse_multi_account_config(config["SLACK_WEBHOOK_URL"]) + count = min(len(accounts), max_accounts) slack_source = "环境变量" if os.environ.get("SLACK_WEBHOOK_URL") else "配置文件" - notification_sources.append(f"Slack({slack_source})") + notification_sources.append(f"Slack({slack_source}, {count}个账号)") if notification_sources: print(f"通知渠道配置来源: {', '.join(notification_sources)}") + print(f"每个渠道最大账号数: {max_accounts}") else: print("未配置任何通知渠道") @@ -646,8 +792,13 @@ def save_titles_to_file(results: Dict, id_to_name: Dict, failed_ids: List) -> st def load_frequency_words( frequency_file: Optional[str] = None, -) -> Tuple[List[Dict], List[str]]: - """加载频率词配置""" +) -> Tuple[List[Dict], List[str], List[str]]: + """ + 加载频率词配置 + + Returns: + (词组列表, 词组内过滤词, 全局过滤词) + """ if frequency_file is None: frequency_file = os.environ.get( "FREQUENCY_WORDS_PATH", "config/frequency_words.txt" @@ -664,9 +815,37 @@ def load_frequency_words( processed_groups = [] filter_words = [] + global_filters = [] # 新增:全局过滤词列表 + + # 默认区域(向后兼容) + current_section = "WORD_GROUPS" for group in word_groups: - words = [word.strip() for word in group.split("\n") if word.strip()] + lines = [line.strip() for line in group.split("\n") if line.strip()] + + if not lines: + continue + + # 检查是否为区域标记 + if lines[0].startswith("[") and lines[0].endswith("]"): + section_name = lines[0][1:-1].upper() + if section_name in ("GLOBAL_FILTER", "WORD_GROUPS"): + current_section = section_name + lines = lines[1:] # 移除标记行 + + # 处理全局过滤区域 + if current_section == "GLOBAL_FILTER": + # 直接添加所有非空行到全局过滤列表 + for line in lines: + # 忽略特殊语法前缀,只提取纯文本 + if line.startswith(("!", "+", "@")): + continue # 全局过滤区不支持特殊语法 + if line: + global_filters.append(line) + continue + + # 处理词组区域(保持现有逻辑) + words = lines group_required_words = [] group_normal_words = [] @@ -705,7 +884,7 @@ def load_frequency_words( } ) - return processed_groups, filter_words + return processed_groups, filter_words, global_filters def parse_file_titles(file_path: Path) -> Tuple[Dict, Dict]: @@ -992,7 +1171,7 @@ def calculate_news_weight( def matches_word_groups( - title: str, word_groups: List[Dict], filter_words: List[str] + title: str, word_groups: List[Dict], filter_words: List[str], global_filters: Optional[List[str]] = None ) -> bool: """检查标题是否匹配词组规则""" # 防御性类型检查:确保 title 是有效字符串 @@ -1001,12 +1180,17 @@ def matches_word_groups( if not title.strip(): return False + title_lower = title.lower() + + # 全局过滤检查(优先级最高) + if global_filters: + if any(global_word.lower() in title_lower for global_word in global_filters): + return False + # 如果没有配置词组,则匹配所有标题(支持显示全部新闻) if not word_groups: return True - title_lower = title.lower() - # 过滤词检查 if any(filter_word.lower() in title_lower for filter_word in filter_words): return False @@ -1099,8 +1283,9 @@ def count_word_frequency( rank_threshold: int = CONFIG["RANK_THRESHOLD"], new_titles: Optional[Dict] = None, mode: str = "daily", + global_filters: Optional[List[str]] = None, ) -> Tuple[List[Dict], int]: - """统计词频,支持必须词、频率词、过滤词,并标记新增标题""" + """统计词频,支持必须词、频率词、过滤词、全局过滤词,并标记新增标题""" # 如果没有配置词组,创建一个包含所有新闻的虚拟词组 if not word_groups: @@ -1191,7 +1376,7 @@ def count_word_frequency( # 使用统一的匹配逻辑 matches_frequency_words = matches_word_groups( - title, word_groups, filter_words + title, word_groups, filter_words, global_filters ) if not matches_frequency_words: @@ -1443,11 +1628,11 @@ def prepare_report_data( if not hide_new_section: filtered_new_titles = {} if new_titles and id_to_name: - word_groups, filter_words = load_frequency_words() + word_groups, filter_words, global_filters = load_frequency_words() for source_id, titles_data in new_titles.items(): filtered_titles = {} for title, title_data in titles_data.items(): - if matches_word_groups(title, word_groups, filter_words): + if matches_word_groups(title, word_groups, filter_words, global_filters): filtered_titles[title] = title_data if filtered_titles: filtered_new_titles[source_id] = filtered_titles @@ -1742,8 +1927,15 @@ def generate_html_report( f.write(html_content) if is_daily_summary: - root_file_path = Path("index.html") - with open(root_file_path, "w", encoding="utf-8") as f: + # 生成到根目录(供 GitHub Pages 访问) + root_index_path = Path("index.html") + with open(root_index_path, "w", encoding="utf-8") as f: + f.write(html_content) + + # 同时生成到 output 目录(供 Docker Volume 挂载访问) + output_index_path = Path("output") / "index.html" + ensure_directory_exists("output") + with open(output_index_path, "w", encoding="utf-8") as f: f.write(html_content) return file_path @@ -2259,7 +2451,8 @@ def render_html_content( """ - # 处理主要统计数据 + # 生成热点词汇统计部分的HTML + stats_html = "" if report_data["stats"]: total_count = len(report_data["stats"]) @@ -2276,7 +2469,7 @@ def render_html_content( escaped_word = html_escape(stat["word"]) - html += f""" + stats_html += f"""
@@ -2291,7 +2484,7 @@ def render_html_content( is_new = title_data.get("is_new", False) new_class = "new" if is_new else "" - html += f""" + stats_html += f"""
{j}
@@ -2318,7 +2511,7 @@ def render_html_content( else: rank_text = f"{min_rank}-{max_rank}" - html += f'{rank_text}' + stats_html += f'{rank_text}' # 处理时间显示 time_display = title_data.get("time_display", "") @@ -2329,16 +2522,16 @@ def render_html_content( .replace("[", "") .replace("]", "") ) - html += ( + stats_html += ( f'{html_escape(simplified_time)}' ) # 处理出现次数 count_info = title_data.get("count", 1) if count_info > 1: - html += f'{count_info}次' + stats_html += f'{count_info}次' - html += """ + stats_html += """
""" @@ -2348,21 +2541,22 @@ def render_html_content( if link_url: escaped_url = html_escape(link_url) - html += f'{escaped_title}' + stats_html += f'{escaped_title}' else: - html += escaped_title + stats_html += escaped_title - html += """ + stats_html += """
""" - html += """ + stats_html += """
""" - # 处理新增新闻区域 + # 生成新增新闻区域的HTML + new_titles_html = "" if report_data["new_titles"]: - html += f""" + new_titles_html += f"""
本次新增热点 (共 {report_data['total_new_count']} 条)
""" @@ -2370,7 +2564,7 @@ def render_html_content( escaped_source = html_escape(source_data["source_name"]) titles_count = len(source_data["titles"]) - html += f""" + new_titles_html += f"""
{escaped_source} · {titles_count}条
""" @@ -2394,7 +2588,7 @@ def render_html_content( else: rank_text = "?" - html += f""" + new_titles_html += f"""
{idx}
{rank_text}
@@ -2407,21 +2601,29 @@ def render_html_content( if link_url: escaped_url = html_escape(link_url) - html += f'{escaped_title}' + new_titles_html += f'{escaped_title}' else: - html += escaped_title + new_titles_html += escaped_title - html += """ + new_titles_html += """
""" - html += """ + new_titles_html += """
""" - html += """ + new_titles_html += """ """ + # 根据配置决定内容顺序 + if CONFIG.get("REVERSE_CONTENT_ORDER", False): + # 新增热点在前,热点词汇统计在后 + html += new_titles_html + stats_html + else: + # 默认:热点词汇统计在前,新增热点在后 + html += stats_html + new_titles_html + html += """ @@ -2762,57 +2964,47 @@ def render_feishu_content( report_data: Dict, update_info: Optional[Dict] = None, mode: str = "daily" ) -> str: """渲染飞书内容""" - text_content = "" - + # 生成热点词汇统计部分 + stats_content = "" if report_data["stats"]: - text_content += f"📊 **热点词汇统计**\n\n" + stats_content += f"📊 **热点词汇统计**\n\n" - total_count = len(report_data["stats"]) + total_count = len(report_data["stats"]) - for i, stat in enumerate(report_data["stats"]): - word = stat["word"] - count = stat["count"] + for i, stat in enumerate(report_data["stats"]): + word = stat["word"] + count = stat["count"] - sequence_display = f"[{i + 1}/{total_count}]" + sequence_display = f"[{i + 1}/{total_count}]" - if count >= 10: - text_content += f"🔥 {sequence_display} **{word}** : {count} 条\n\n" - elif count >= 5: - text_content += f"📈 {sequence_display} **{word}** : {count} 条\n\n" - else: - text_content += f"📌 {sequence_display} **{word}** : {count} 条\n\n" + if count >= 10: + stats_content += f"🔥 {sequence_display} **{word}** : {count} 条\n\n" + elif count >= 5: + stats_content += f"📈 {sequence_display} **{word}** : {count} 条\n\n" + else: + stats_content += f"📌 {sequence_display} **{word}** : {count} 条\n\n" - for j, title_data in enumerate(stat["titles"], 1): - formatted_title = format_title_for_platform( - "feishu", title_data, show_source=True - ) - text_content += f" {j}. {formatted_title}\n" + for j, title_data in enumerate(stat["titles"], 1): + formatted_title = format_title_for_platform( + "feishu", title_data, show_source=True + ) + stats_content += f" {j}. {formatted_title}\n" - if j < len(stat["titles"]): - text_content += "\n" + if j < len(stat["titles"]): + stats_content += "\n" - if i < len(report_data["stats"]) - 1: - text_content += f"\n{CONFIG['FEISHU_MESSAGE_SEPARATOR']}\n\n" - - if not text_content: - if mode == "incremental": - mode_text = "增量模式下暂无新增匹配的热点词汇" - elif mode == "current": - mode_text = "当前榜单模式下暂无匹配的热点词汇" - else: - mode_text = "暂无匹配的热点词汇" - text_content = f"📭 {mode_text}\n\n" + if i < len(report_data["stats"]) - 1: + stats_content += f"\n{CONFIG['FEISHU_MESSAGE_SEPARATOR']}\n\n" + # 生成新增新闻部分 + new_titles_content = "" if report_data["new_titles"]: - if text_content and "暂无匹配" not in text_content: - text_content += f"\n{CONFIG['FEISHU_MESSAGE_SEPARATOR']}\n\n" - - text_content += ( + new_titles_content += ( f"🆕 **本次新增热点新闻** (共 {report_data['total_new_count']} 条)\n\n" ) for source_data in report_data["new_titles"]: - text_content += ( + new_titles_content += ( f"**{source_data['source_name']}** ({len(source_data['titles'])} 条):\n" ) @@ -2822,9 +3014,37 @@ def render_feishu_content( formatted_title = format_title_for_platform( "feishu", title_data_copy, show_source=False ) - text_content += f" {j}. {formatted_title}\n" + new_titles_content += f" {j}. {formatted_title}\n" - text_content += "\n" + new_titles_content += "\n" + + # 根据配置决定内容顺序 + text_content = "" + if CONFIG.get("REVERSE_CONTENT_ORDER", False): + # 新增热点在前,热点词汇统计在后 + if new_titles_content: + text_content += new_titles_content + if stats_content: + text_content += f"\n{CONFIG['FEISHU_MESSAGE_SEPARATOR']}\n\n" + if stats_content: + text_content += stats_content + else: + # 默认:热点词汇统计在前,新增热点在后 + if stats_content: + text_content += stats_content + if new_titles_content: + text_content += f"\n{CONFIG['FEISHU_MESSAGE_SEPARATOR']}\n\n" + if new_titles_content: + text_content += new_titles_content + + if not text_content: + if mode == "incremental": + mode_text = "增量模式下暂无新增匹配的热点词汇" + elif mode == "current": + mode_text = "当前榜单模式下暂无匹配的热点词汇" + else: + mode_text = "暂无匹配的热点词汇" + text_content = f"📭 {mode_text}\n\n" if report_data["failed_ids"]: if text_content and "暂无匹配" not in text_content: @@ -2849,21 +3069,21 @@ def render_dingtalk_content( report_data: Dict, update_info: Optional[Dict] = None, mode: str = "daily" ) -> str: """渲染钉钉内容""" - text_content = "" - total_titles = sum( len(stat["titles"]) for stat in report_data["stats"] if stat["count"] > 0 ) now = get_beijing_time() - text_content += f"**总新闻数:** {total_titles}\n\n" - text_content += f"**时间:** {now.strftime('%Y-%m-%d %H:%M:%S')}\n\n" - text_content += f"**类型:** 热点分析报告\n\n" - - text_content += "---\n\n" + # 头部信息 + header_content = f"**总新闻数:** {total_titles}\n\n" + header_content += f"**时间:** {now.strftime('%Y-%m-%d %H:%M:%S')}\n\n" + header_content += f"**类型:** 热点分析报告\n\n" + header_content += "---\n\n" + # 生成热点词汇统计部分 + stats_content = "" if report_data["stats"]: - text_content += f"📊 **热点词汇统计**\n\n" + stats_content += f"📊 **热点词汇统计**\n\n" total_count = len(report_data["stats"]) @@ -2874,25 +3094,64 @@ def render_dingtalk_content( sequence_display = f"[{i + 1}/{total_count}]" if count >= 10: - text_content += f"🔥 {sequence_display} **{word}** : **{count}** 条\n\n" + stats_content += f"🔥 {sequence_display} **{word}** : **{count}** 条\n\n" elif count >= 5: - text_content += f"📈 {sequence_display} **{word}** : **{count}** 条\n\n" + stats_content += f"📈 {sequence_display} **{word}** : **{count}** 条\n\n" else: - text_content += f"📌 {sequence_display} **{word}** : {count} 条\n\n" + stats_content += f"📌 {sequence_display} **{word}** : {count} 条\n\n" for j, title_data in enumerate(stat["titles"], 1): formatted_title = format_title_for_platform( "dingtalk", title_data, show_source=True ) - text_content += f" {j}. {formatted_title}\n" + stats_content += f" {j}. {formatted_title}\n" if j < len(stat["titles"]): - text_content += "\n" + stats_content += "\n" if i < len(report_data["stats"]) - 1: - text_content += f"\n---\n\n" + stats_content += f"\n---\n\n" - if not report_data["stats"]: + # 生成新增新闻部分 + new_titles_content = "" + if report_data["new_titles"]: + new_titles_content += ( + f"🆕 **本次新增热点新闻** (共 {report_data['total_new_count']} 条)\n\n" + ) + + for source_data in report_data["new_titles"]: + new_titles_content += f"**{source_data['source_name']}** ({len(source_data['titles'])} 条):\n\n" + + for j, title_data in enumerate(source_data["titles"], 1): + title_data_copy = title_data.copy() + title_data_copy["is_new"] = False + formatted_title = format_title_for_platform( + "dingtalk", title_data_copy, show_source=False + ) + new_titles_content += f" {j}. {formatted_title}\n" + + new_titles_content += "\n" + + # 根据配置决定内容顺序 + text_content = header_content + if CONFIG.get("REVERSE_CONTENT_ORDER", False): + # 新增热点在前,热点词汇统计在后 + if new_titles_content: + text_content += new_titles_content + if stats_content: + text_content += f"\n---\n\n" + if stats_content: + text_content += stats_content + else: + # 默认:热点词汇统计在前,新增热点在后 + if stats_content: + text_content += stats_content + if new_titles_content: + text_content += f"\n---\n\n" + if new_titles_content: + text_content += new_titles_content + + if not stats_content and not new_titles_content: if mode == "incremental": mode_text = "增量模式下暂无新增匹配的热点词汇" elif mode == "current": @@ -2901,29 +3160,8 @@ def render_dingtalk_content( mode_text = "暂无匹配的热点词汇" text_content += f"📭 {mode_text}\n\n" - if report_data["new_titles"]: - if text_content and "暂无匹配" not in text_content: - text_content += f"\n---\n\n" - - text_content += ( - f"🆕 **本次新增热点新闻** (共 {report_data['total_new_count']} 条)\n\n" - ) - - for source_data in report_data["new_titles"]: - text_content += f"**{source_data['source_name']}** ({len(source_data['titles'])} 条):\n\n" - - for j, title_data in enumerate(source_data["titles"], 1): - title_data_copy = title_data.copy() - title_data_copy["is_new"] = False - formatted_title = format_title_for_platform( - "dingtalk", title_data_copy, show_source=False - ) - text_content += f" {j}. {formatted_title}\n" - - text_content += "\n" - if report_data["failed_ids"]: - if text_content and "暂无匹配" not in text_content: + if "暂无匹配" not in text_content: text_content += f"\n---\n\n" text_content += "⚠️ **数据获取失败的平台:**\n\n" @@ -3124,8 +3362,12 @@ def split_content_into_batches( batches.append(final_content) return batches - # 处理热点词汇统计 - if report_data["stats"]: + # 定义处理热点词汇统计的函数 + def process_stats_section(current_batch, current_batch_has_content, batches): + """处理热点词汇统计""" + if not report_data["stats"]: + return current_batch, current_batch_has_content, batches + total_count = len(report_data["stats"]) # 添加统计标题 @@ -3333,8 +3575,14 @@ def split_content_into_batches( ): current_batch = test_content - # 处理新增新闻(同样确保来源标题+第一条新闻的原子性) - if report_data["new_titles"]: + return current_batch, current_batch_has_content, batches + + # 定义处理新增新闻的函数 + def process_new_titles_section(current_batch, current_batch_has_content, batches): + """处理新增新闻""" + if not report_data["new_titles"]: + return current_batch, current_batch_has_content, batches + new_header = "" if format_type in ("wework", "bark"): new_header = f"\n\n\n\n🆕 **本次新增热点新闻** (共 {report_data['total_new_count']} 条)\n\n" @@ -3476,6 +3724,26 @@ def split_content_into_batches( current_batch += "\n" + return current_batch, current_batch_has_content, batches + + # 根据配置决定处理顺序 + if CONFIG.get("REVERSE_CONTENT_ORDER", False): + # 新增热点在前,热点词汇统计在后 + current_batch, current_batch_has_content, batches = process_new_titles_section( + current_batch, current_batch_has_content, batches + ) + current_batch, current_batch_has_content, batches = process_stats_section( + current_batch, current_batch_has_content, batches + ) + else: + # 默认:热点词汇统计在前,新增热点在后 + current_batch, current_batch_has_content, batches = process_stats_section( + current_batch, current_batch_has_content, batches + ) + current_batch, current_batch_has_content, batches = process_new_titles_section( + current_batch, current_batch_has_content, batches + ) + if report_data["failed_ids"]: failed_header = "" if format_type == "wework": @@ -3541,8 +3809,9 @@ def send_to_notifications( mode: str = "daily", html_file_path: Optional[str] = None, ) -> Dict[str, bool]: - """发送数据到多个通知平台""" + """发送数据到多个通知平台(支持多账号)""" results = {} + max_accounts = CONFIG["MAX_ACCOUNTS_PER_CHANNEL"] if CONFIG["PUSH_WINDOW"]["ENABLED"]: push_manager = PushRecordManager() @@ -3565,90 +3834,133 @@ def send_to_notifications( report_data = prepare_report_data(stats, failed_ids, new_titles, id_to_name, mode) - feishu_url = CONFIG["FEISHU_WEBHOOK_URL"] - dingtalk_url = CONFIG["DINGTALK_WEBHOOK_URL"] - wework_url = CONFIG["WEWORK_WEBHOOK_URL"] - telegram_token = CONFIG["TELEGRAM_BOT_TOKEN"] - telegram_chat_id = CONFIG["TELEGRAM_CHAT_ID"] + update_info_to_send = update_info if CONFIG["SHOW_VERSION_UPDATE"] else None + + # 发送到飞书(多账号) + feishu_urls = parse_multi_account_config(CONFIG["FEISHU_WEBHOOK_URL"]) + if feishu_urls: + feishu_urls = limit_accounts(feishu_urls, max_accounts, "飞书") + feishu_results = [] + for i, url in enumerate(feishu_urls): + if url: # 跳过空值 + account_label = f"账号{i+1}" if len(feishu_urls) > 1 else "" + result = send_to_feishu( + url, report_data, report_type, update_info_to_send, proxy_url, mode, account_label + ) + feishu_results.append(result) + results["feishu"] = any(feishu_results) if feishu_results else False + + # 发送到钉钉(多账号) + dingtalk_urls = parse_multi_account_config(CONFIG["DINGTALK_WEBHOOK_URL"]) + if dingtalk_urls: + dingtalk_urls = limit_accounts(dingtalk_urls, max_accounts, "钉钉") + dingtalk_results = [] + for i, url in enumerate(dingtalk_urls): + if url: + account_label = f"账号{i+1}" if len(dingtalk_urls) > 1 else "" + result = send_to_dingtalk( + url, report_data, report_type, update_info_to_send, proxy_url, mode, account_label + ) + dingtalk_results.append(result) + results["dingtalk"] = any(dingtalk_results) if dingtalk_results else False + + # 发送到企业微信(多账号) + wework_urls = parse_multi_account_config(CONFIG["WEWORK_WEBHOOK_URL"]) + if wework_urls: + wework_urls = limit_accounts(wework_urls, max_accounts, "企业微信") + wework_results = [] + for i, url in enumerate(wework_urls): + if url: + account_label = f"账号{i+1}" if len(wework_urls) > 1 else "" + result = send_to_wework( + url, report_data, report_type, update_info_to_send, proxy_url, mode, account_label + ) + wework_results.append(result) + results["wework"] = any(wework_results) if wework_results else False + + # 发送到 Telegram(多账号,需验证配对) + telegram_tokens = parse_multi_account_config(CONFIG["TELEGRAM_BOT_TOKEN"]) + telegram_chat_ids = parse_multi_account_config(CONFIG["TELEGRAM_CHAT_ID"]) + if telegram_tokens and telegram_chat_ids: + valid, count = validate_paired_configs( + {"bot_token": telegram_tokens, "chat_id": telegram_chat_ids}, + "Telegram", + required_keys=["bot_token", "chat_id"] + ) + if valid and count > 0: + telegram_tokens = limit_accounts(telegram_tokens, max_accounts, "Telegram") + telegram_chat_ids = telegram_chat_ids[:len(telegram_tokens)] # 保持数量一致 + telegram_results = [] + for i in range(len(telegram_tokens)): + token = telegram_tokens[i] + chat_id = telegram_chat_ids[i] + if token and chat_id: + account_label = f"账号{i+1}" if len(telegram_tokens) > 1 else "" + result = send_to_telegram( + token, chat_id, report_data, report_type, + update_info_to_send, proxy_url, mode, account_label + ) + telegram_results.append(result) + results["telegram"] = any(telegram_results) if telegram_results else False + + # 发送到 ntfy(多账号,需验证配对) + ntfy_server_url = CONFIG["NTFY_SERVER_URL"] + ntfy_topics = parse_multi_account_config(CONFIG["NTFY_TOPIC"]) + ntfy_tokens = parse_multi_account_config(CONFIG["NTFY_TOKEN"]) + if ntfy_server_url and ntfy_topics: + # 验证 token 和 topic 数量一致(如果配置了 token) + if ntfy_tokens and len(ntfy_tokens) != len(ntfy_topics): + print(f"❌ ntfy 配置错误:topic 数量({len(ntfy_topics)})与 token 数量({len(ntfy_tokens)})不一致,跳过 ntfy 推送") + else: + ntfy_topics = limit_accounts(ntfy_topics, max_accounts, "ntfy") + if ntfy_tokens: + ntfy_tokens = ntfy_tokens[:len(ntfy_topics)] + ntfy_results = [] + for i, topic in enumerate(ntfy_topics): + if topic: + token = get_account_at_index(ntfy_tokens, i, "") if ntfy_tokens else "" + account_label = f"账号{i+1}" if len(ntfy_topics) > 1 else "" + result = send_to_ntfy( + ntfy_server_url, topic, token, report_data, report_type, + update_info_to_send, proxy_url, mode, account_label + ) + ntfy_results.append(result) + results["ntfy"] = any(ntfy_results) if ntfy_results else False + + # 发送到 Bark(多账号) + bark_urls = parse_multi_account_config(CONFIG["BARK_URL"]) + if bark_urls: + bark_urls = limit_accounts(bark_urls, max_accounts, "Bark") + bark_results = [] + for i, url in enumerate(bark_urls): + if url: + account_label = f"账号{i+1}" if len(bark_urls) > 1 else "" + result = send_to_bark( + url, report_data, report_type, update_info_to_send, proxy_url, mode, account_label + ) + bark_results.append(result) + results["bark"] = any(bark_results) if bark_results else False + + # 发送到 Slack(多账号) + slack_urls = parse_multi_account_config(CONFIG["SLACK_WEBHOOK_URL"]) + if slack_urls: + slack_urls = limit_accounts(slack_urls, max_accounts, "Slack") + slack_results = [] + for i, url in enumerate(slack_urls): + if url: + account_label = f"账号{i+1}" if len(slack_urls) > 1 else "" + result = send_to_slack( + url, report_data, report_type, update_info_to_send, proxy_url, mode, account_label + ) + slack_results.append(result) + results["slack"] = any(slack_results) if slack_results else False + + # 发送邮件(保持原有逻辑,已支持多收件人) email_from = CONFIG["EMAIL_FROM"] email_password = CONFIG["EMAIL_PASSWORD"] email_to = CONFIG["EMAIL_TO"] email_smtp_server = CONFIG.get("EMAIL_SMTP_SERVER", "") email_smtp_port = CONFIG.get("EMAIL_SMTP_PORT", "") - ntfy_server_url = CONFIG["NTFY_SERVER_URL"] - ntfy_topic = CONFIG["NTFY_TOPIC"] - ntfy_token = CONFIG.get("NTFY_TOKEN", "") - bark_url = CONFIG["BARK_URL"] - slack_webhook_url = CONFIG["SLACK_WEBHOOK_URL"] - - update_info_to_send = update_info if CONFIG["SHOW_VERSION_UPDATE"] else None - - # 发送到飞书 - if feishu_url: - results["feishu"] = send_to_feishu( - feishu_url, report_data, report_type, update_info_to_send, proxy_url, mode - ) - - # 发送到钉钉 - if dingtalk_url: - results["dingtalk"] = send_to_dingtalk( - dingtalk_url, report_data, report_type, update_info_to_send, proxy_url, mode - ) - - # 发送到企业微信 - if wework_url: - results["wework"] = send_to_wework( - wework_url, report_data, report_type, update_info_to_send, proxy_url, mode - ) - - # 发送到 Telegram - if telegram_token and telegram_chat_id: - results["telegram"] = send_to_telegram( - telegram_token, - telegram_chat_id, - report_data, - report_type, - update_info_to_send, - proxy_url, - mode, - ) - - # 发送到 ntfy - if ntfy_server_url and ntfy_topic: - results["ntfy"] = send_to_ntfy( - ntfy_server_url, - ntfy_topic, - ntfy_token, - report_data, - report_type, - update_info_to_send, - proxy_url, - mode, - ) - - # 发送到 Bark - if bark_url: - results["bark"] = send_to_bark( - bark_url, - report_data, - report_type, - update_info_to_send, - proxy_url, - mode, - ) - - # 发送到 Slack - if slack_webhook_url: - results["slack"] = send_to_slack( - slack_webhook_url, - report_data, - report_type, - update_info_to_send, - proxy_url, - mode, - ) - - # 发送邮件 if email_from and email_password and email_to: results["email"] = send_to_email( email_from, @@ -3682,6 +3994,7 @@ def send_to_feishu( update_info: Optional[Dict] = None, proxy_url: Optional[str] = None, mode: str = "daily", + account_label: str = "", ) -> bool: """发送到飞书(支持分批发送)""" headers = {"Content-Type": "application/json"} @@ -3689,6 +4002,9 @@ def send_to_feishu( if proxy_url: proxies = {"http": proxy_url, "https": proxy_url} + # 日志前缀 + log_prefix = f"飞书{account_label}" if account_label else "飞书" + # 获取分批内容,使用飞书专用的批次大小 feishu_batch_size = CONFIG.get("FEISHU_BATCH_SIZE", 29000) # 预留批次头部空间,避免添加头部后超限 @@ -3704,13 +4020,13 @@ def send_to_feishu( # 统一添加批次头部(已预留空间,不会超限) batches = add_batch_headers(batches, "feishu", feishu_batch_size) - print(f"飞书消息分为 {len(batches)} 批次发送 [{report_type}]") + print(f"{log_prefix}消息分为 {len(batches)} 批次发送 [{report_type}]") # 逐批发送 for i, batch_content in enumerate(batches, 1): batch_size = len(batch_content.encode("utf-8")) print( - f"发送飞书第 {i}/{len(batches)} 批次,大小:{batch_size} 字节 [{report_type}]" + f"发送{log_prefix}第 {i}/{len(batches)} 批次,大小:{batch_size} 字节 [{report_type}]" ) total_titles = sum( @@ -3736,26 +4052,26 @@ def send_to_feishu( result = response.json() # 检查飞书的响应状态 if result.get("StatusCode") == 0 or result.get("code") == 0: - print(f"飞书第 {i}/{len(batches)} 批次发送成功 [{report_type}]") + print(f"{log_prefix}第 {i}/{len(batches)} 批次发送成功 [{report_type}]") # 批次间间隔 if i < len(batches): time.sleep(CONFIG["BATCH_SEND_INTERVAL"]) else: error_msg = result.get("msg") or result.get("StatusMessage", "未知错误") print( - f"飞书第 {i}/{len(batches)} 批次发送失败 [{report_type}],错误:{error_msg}" + f"{log_prefix}第 {i}/{len(batches)} 批次发送失败 [{report_type}],错误:{error_msg}" ) return False else: print( - f"飞书第 {i}/{len(batches)} 批次发送失败 [{report_type}],状态码:{response.status_code}" + f"{log_prefix}第 {i}/{len(batches)} 批次发送失败 [{report_type}],状态码:{response.status_code}" ) return False except Exception as e: - print(f"飞书第 {i}/{len(batches)} 批次发送出错 [{report_type}]:{e}") + print(f"{log_prefix}第 {i}/{len(batches)} 批次发送出错 [{report_type}]:{e}") return False - print(f"飞书所有 {len(batches)} 批次发送完成 [{report_type}]") + print(f"{log_prefix}所有 {len(batches)} 批次发送完成 [{report_type}]") return True @@ -3766,6 +4082,7 @@ def send_to_dingtalk( update_info: Optional[Dict] = None, proxy_url: Optional[str] = None, mode: str = "daily", + account_label: str = "", ) -> bool: """发送到钉钉(支持分批发送)""" headers = {"Content-Type": "application/json"} @@ -3773,6 +4090,9 @@ def send_to_dingtalk( if proxy_url: proxies = {"http": proxy_url, "https": proxy_url} + # 日志前缀 + log_prefix = f"钉钉{account_label}" if account_label else "钉钉" + # 获取分批内容,使用钉钉专用的批次大小 dingtalk_batch_size = CONFIG.get("DINGTALK_BATCH_SIZE", 20000) # 预留批次头部空间,避免添加头部后超限 @@ -3788,13 +4108,13 @@ def send_to_dingtalk( # 统一添加批次头部(已预留空间,不会超限) batches = add_batch_headers(batches, "dingtalk", dingtalk_batch_size) - print(f"钉钉消息分为 {len(batches)} 批次发送 [{report_type}]") + print(f"{log_prefix}消息分为 {len(batches)} 批次发送 [{report_type}]") # 逐批发送 for i, batch_content in enumerate(batches, 1): batch_size = len(batch_content.encode("utf-8")) print( - f"发送钉钉第 {i}/{len(batches)} 批次,大小:{batch_size} 字节 [{report_type}]" + f"发送{log_prefix}第 {i}/{len(batches)} 批次,大小:{batch_size} 字节 [{report_type}]" ) payload = { @@ -3812,25 +4132,25 @@ def send_to_dingtalk( if response.status_code == 200: result = response.json() if result.get("errcode") == 0: - print(f"钉钉第 {i}/{len(batches)} 批次发送成功 [{report_type}]") + print(f"{log_prefix}第 {i}/{len(batches)} 批次发送成功 [{report_type}]") # 批次间间隔 if i < len(batches): time.sleep(CONFIG["BATCH_SEND_INTERVAL"]) else: print( - f"钉钉第 {i}/{len(batches)} 批次发送失败 [{report_type}],错误:{result.get('errmsg')}" + f"{log_prefix}第 {i}/{len(batches)} 批次发送失败 [{report_type}],错误:{result.get('errmsg')}" ) return False else: print( - f"钉钉第 {i}/{len(batches)} 批次发送失败 [{report_type}],状态码:{response.status_code}" + f"{log_prefix}第 {i}/{len(batches)} 批次发送失败 [{report_type}],状态码:{response.status_code}" ) return False except Exception as e: - print(f"钉钉第 {i}/{len(batches)} 批次发送出错 [{report_type}]:{e}") + print(f"{log_prefix}第 {i}/{len(batches)} 批次发送出错 [{report_type}]:{e}") return False - print(f"钉钉所有 {len(batches)} 批次发送完成 [{report_type}]") + print(f"{log_prefix}所有 {len(batches)} 批次发送完成 [{report_type}]") return True @@ -3885,6 +4205,7 @@ def send_to_wework( update_info: Optional[Dict] = None, proxy_url: Optional[str] = None, mode: str = "daily", + account_label: str = "", ) -> bool: """发送到企业微信(支持分批发送,支持 markdown 和 text 两种格式)""" headers = {"Content-Type": "application/json"} @@ -3892,14 +4213,17 @@ def send_to_wework( if proxy_url: proxies = {"http": proxy_url, "https": proxy_url} + # 日志前缀 + log_prefix = f"企业微信{account_label}" if account_label else "企业微信" + # 获取消息类型配置(markdown 或 text) msg_type = CONFIG.get("WEWORK_MSG_TYPE", "markdown").lower() is_text_mode = msg_type == "text" if is_text_mode: - print(f"企业微信使用 text 格式(个人微信模式)[{report_type}]") + print(f"{log_prefix}使用 text 格式(个人微信模式)[{report_type}]") else: - print(f"企业微信使用 markdown 格式(群机器人模式)[{report_type}]") + print(f"{log_prefix}使用 markdown 格式(群机器人模式)[{report_type}]") # text 模式使用 wework_text,markdown 模式使用 wework header_format_type = "wework_text" if is_text_mode else "wework" @@ -3914,7 +4238,7 @@ def send_to_wework( # 统一添加批次头部(已预留空间,不会超限) batches = add_batch_headers(batches, header_format_type, wework_batch_size) - print(f"企业微信消息分为 {len(batches)} 批次发送 [{report_type}]") + print(f"{log_prefix}消息分为 {len(batches)} 批次发送 [{report_type}]") # 逐批发送 for i, batch_content in enumerate(batches, 1): @@ -3930,7 +4254,7 @@ def send_to_wework( batch_size = len(batch_content.encode("utf-8")) print( - f"发送企业微信第 {i}/{len(batches)} 批次,大小:{batch_size} 字节 [{report_type}]" + f"发送{log_prefix}第 {i}/{len(batches)} 批次,大小:{batch_size} 字节 [{report_type}]" ) try: @@ -3940,25 +4264,25 @@ def send_to_wework( if response.status_code == 200: result = response.json() if result.get("errcode") == 0: - print(f"企业微信第 {i}/{len(batches)} 批次发送成功 [{report_type}]") + print(f"{log_prefix}第 {i}/{len(batches)} 批次发送成功 [{report_type}]") # 批次间间隔 if i < len(batches): time.sleep(CONFIG["BATCH_SEND_INTERVAL"]) else: print( - f"企业微信第 {i}/{len(batches)} 批次发送失败 [{report_type}],错误:{result.get('errmsg')}" + f"{log_prefix}第 {i}/{len(batches)} 批次发送失败 [{report_type}],错误:{result.get('errmsg')}" ) return False else: print( - f"企业微信第 {i}/{len(batches)} 批次发送失败 [{report_type}],状态码:{response.status_code}" + f"{log_prefix}第 {i}/{len(batches)} 批次发送失败 [{report_type}],状态码:{response.status_code}" ) return False except Exception as e: - print(f"企业微信第 {i}/{len(batches)} 批次发送出错 [{report_type}]:{e}") + print(f"{log_prefix}第 {i}/{len(batches)} 批次发送出错 [{report_type}]:{e}") return False - print(f"企业微信所有 {len(batches)} 批次发送完成 [{report_type}]") + print(f"{log_prefix}所有 {len(batches)} 批次发送完成 [{report_type}]") return True @@ -3970,6 +4294,7 @@ def send_to_telegram( update_info: Optional[Dict] = None, proxy_url: Optional[str] = None, mode: str = "daily", + account_label: str = "", ) -> bool: """发送到Telegram(支持分批发送)""" headers = {"Content-Type": "application/json"} @@ -3979,6 +4304,9 @@ def send_to_telegram( if proxy_url: proxies = {"http": proxy_url, "https": proxy_url} + # 日志前缀 + log_prefix = f"Telegram{account_label}" if account_label else "Telegram" + # 获取分批内容,预留批次头部空间 telegram_batch_size = CONFIG.get("MESSAGE_BATCH_SIZE", 4000) header_reserve = _get_max_batch_header_size("telegram") @@ -3989,13 +4317,13 @@ def send_to_telegram( # 统一添加批次头部(已预留空间,不会超限) batches = add_batch_headers(batches, "telegram", telegram_batch_size) - print(f"Telegram消息分为 {len(batches)} 批次发送 [{report_type}]") + print(f"{log_prefix}消息分为 {len(batches)} 批次发送 [{report_type}]") # 逐批发送 for i, batch_content in enumerate(batches, 1): batch_size = len(batch_content.encode("utf-8")) print( - f"发送Telegram第 {i}/{len(batches)} 批次,大小:{batch_size} 字节 [{report_type}]" + f"发送{log_prefix}第 {i}/{len(batches)} 批次,大小:{batch_size} 字节 [{report_type}]" ) payload = { @@ -4012,25 +4340,25 @@ def send_to_telegram( if response.status_code == 200: result = response.json() if result.get("ok"): - print(f"Telegram第 {i}/{len(batches)} 批次发送成功 [{report_type}]") + print(f"{log_prefix}第 {i}/{len(batches)} 批次发送成功 [{report_type}]") # 批次间间隔 if i < len(batches): time.sleep(CONFIG["BATCH_SEND_INTERVAL"]) else: print( - f"Telegram第 {i}/{len(batches)} 批次发送失败 [{report_type}],错误:{result.get('description')}" + f"{log_prefix}第 {i}/{len(batches)} 批次发送失败 [{report_type}],错误:{result.get('description')}" ) return False else: print( - f"Telegram第 {i}/{len(batches)} 批次发送失败 [{report_type}],状态码:{response.status_code}" + f"{log_prefix}第 {i}/{len(batches)} 批次发送失败 [{report_type}],状态码:{response.status_code}" ) return False except Exception as e: - print(f"Telegram第 {i}/{len(batches)} 批次发送出错 [{report_type}]:{e}") + print(f"{log_prefix}第 {i}/{len(batches)} 批次发送出错 [{report_type}]:{e}") return False - print(f"Telegram所有 {len(batches)} 批次发送完成 [{report_type}]") + print(f"{log_prefix}所有 {len(batches)} 批次发送完成 [{report_type}]") return True @@ -4183,8 +4511,12 @@ def send_to_ntfy( update_info: Optional[Dict] = None, proxy_url: Optional[str] = None, mode: str = "daily", + account_label: str = "", ) -> bool: """发送到ntfy(支持分批发送,严格遵守4KB限制)""" + # 日志前缀 + log_prefix = f"ntfy{account_label}" if account_label else "ntfy" + # 避免 HTTP header 编码问题 report_type_en_map = { "当日汇总": "Daily Summary", @@ -4227,13 +4559,13 @@ def send_to_ntfy( batches = add_batch_headers(batches, "ntfy", ntfy_batch_size) total_batches = len(batches) - print(f"ntfy消息分为 {total_batches} 批次发送 [{report_type}]") + print(f"{log_prefix}消息分为 {total_batches} 批次发送 [{report_type}]") # 反转批次顺序,使得在ntfy客户端显示时顺序正确 # ntfy显示最新消息在上面,所以我们从最后一批开始推送 reversed_batches = list(reversed(batches)) - print(f"ntfy将按反向顺序推送(最后批次先推送),确保客户端显示顺序正确") + print(f"{log_prefix}将按反向顺序推送(最后批次先推送),确保客户端显示顺序正确") # 逐批发送(反向顺序) success_count = 0 @@ -4243,12 +4575,12 @@ def send_to_ntfy( batch_size = len(batch_content.encode("utf-8")) print( - f"发送ntfy第 {actual_batch_num}/{total_batches} 批次(推送顺序: {idx}/{total_batches}),大小:{batch_size} 字节 [{report_type}]" + f"发送{log_prefix}第 {actual_batch_num}/{total_batches} 批次(推送顺序: {idx}/{total_batches}),大小:{batch_size} 字节 [{report_type}]" ) # 检查消息大小,确保不超过4KB if batch_size > 4096: - print(f"警告:ntfy第 {actual_batch_num} 批次消息过大({batch_size} 字节),可能被拒绝") + print(f"警告:{log_prefix}第 {actual_batch_num} 批次消息过大({batch_size} 字节),可能被拒绝") # 更新 headers 的批次标识 current_headers = headers.copy() @@ -4267,7 +4599,7 @@ def send_to_ntfy( ) if response.status_code == 200: - print(f"ntfy第 {actual_batch_num}/{total_batches} 批次发送成功 [{report_type}]") + print(f"{log_prefix}第 {actual_batch_num}/{total_batches} 批次发送成功 [{report_type}]") success_count += 1 if idx < total_batches: # 公共服务器建议 2-3 秒,自托管可以更短 @@ -4275,7 +4607,7 @@ def send_to_ntfy( time.sleep(interval) elif response.status_code == 429: print( - f"ntfy第 {actual_batch_num}/{total_batches} 批次速率限制 [{report_type}],等待后重试" + f"{log_prefix}第 {actual_batch_num}/{total_batches} 批次速率限制 [{report_type}],等待后重试" ) time.sleep(10) # 等待10秒后重试 # 重试一次 @@ -4287,19 +4619,19 @@ def send_to_ntfy( timeout=30, ) if retry_response.status_code == 200: - print(f"ntfy第 {actual_batch_num}/{total_batches} 批次重试成功 [{report_type}]") + print(f"{log_prefix}第 {actual_batch_num}/{total_batches} 批次重试成功 [{report_type}]") success_count += 1 else: print( - f"ntfy第 {actual_batch_num}/{total_batches} 批次重试失败,状态码:{retry_response.status_code}" + f"{log_prefix}第 {actual_batch_num}/{total_batches} 批次重试失败,状态码:{retry_response.status_code}" ) elif response.status_code == 413: print( - f"ntfy第 {actual_batch_num}/{total_batches} 批次消息过大被拒绝 [{report_type}],消息大小:{batch_size} 字节" + f"{log_prefix}第 {actual_batch_num}/{total_batches} 批次消息过大被拒绝 [{report_type}],消息大小:{batch_size} 字节" ) else: print( - f"ntfy第 {actual_batch_num}/{total_batches} 批次发送失败 [{report_type}],状态码:{response.status_code}" + f"{log_prefix}第 {actual_batch_num}/{total_batches} 批次发送失败 [{report_type}],状态码:{response.status_code}" ) try: print(f"错误详情:{response.text}") @@ -4307,23 +4639,23 @@ def send_to_ntfy( pass except requests.exceptions.ConnectTimeout: - print(f"ntfy第 {actual_batch_num}/{total_batches} 批次连接超时 [{report_type}]") + print(f"{log_prefix}第 {actual_batch_num}/{total_batches} 批次连接超时 [{report_type}]") except requests.exceptions.ReadTimeout: - print(f"ntfy第 {actual_batch_num}/{total_batches} 批次读取超时 [{report_type}]") + print(f"{log_prefix}第 {actual_batch_num}/{total_batches} 批次读取超时 [{report_type}]") except requests.exceptions.ConnectionError as e: - print(f"ntfy第 {actual_batch_num}/{total_batches} 批次连接错误 [{report_type}]:{e}") + print(f"{log_prefix}第 {actual_batch_num}/{total_batches} 批次连接错误 [{report_type}]:{e}") except Exception as e: - print(f"ntfy第 {actual_batch_num}/{total_batches} 批次发送异常 [{report_type}]:{e}") + print(f"{log_prefix}第 {actual_batch_num}/{total_batches} 批次发送异常 [{report_type}]:{e}") # 判断整体发送是否成功 if success_count == total_batches: - print(f"ntfy所有 {total_batches} 批次发送完成 [{report_type}]") + print(f"{log_prefix}所有 {total_batches} 批次发送完成 [{report_type}]") return True elif success_count > 0: - print(f"ntfy部分发送成功:{success_count}/{total_batches} 批次 [{report_type}]") + print(f"{log_prefix}部分发送成功:{success_count}/{total_batches} 批次 [{report_type}]") return True # 部分成功也视为成功 else: - print(f"ntfy发送完全失败 [{report_type}]") + print(f"{log_prefix}发送完全失败 [{report_type}]") return False @@ -4334,8 +4666,12 @@ def send_to_bark( update_info: Optional[Dict] = None, proxy_url: Optional[str] = None, mode: str = "daily", + account_label: str = "", ) -> bool: """发送到Bark(支持分批发送,使用 markdown 格式)""" + # 日志前缀 + log_prefix = f"Bark{account_label}" if account_label else "Bark" + proxies = None if proxy_url: proxies = {"http": proxy_url, "https": proxy_url} @@ -4348,7 +4684,7 @@ def send_to_bark( device_key = parsed_url.path.strip('/').split('/')[0] if parsed_url.path else None if not device_key: - print(f"Bark URL 格式错误,无法提取 device_key: {bark_url}") + print(f"{log_prefix} URL 格式错误,无法提取 device_key: {bark_url}") return False # 构建正确的 API 端点 @@ -4365,13 +4701,13 @@ def send_to_bark( batches = add_batch_headers(batches, "bark", bark_batch_size) total_batches = len(batches) - print(f"Bark消息分为 {total_batches} 批次发送 [{report_type}]") + print(f"{log_prefix}消息分为 {total_batches} 批次发送 [{report_type}]") # 反转批次顺序,使得在Bark客户端显示时顺序正确 # Bark显示最新消息在上面,所以我们从最后一批开始推送 reversed_batches = list(reversed(batches)) - print(f"Bark将按反向顺序推送(最后批次先推送),确保客户端显示顺序正确") + print(f"{log_prefix}将按反向顺序推送(最后批次先推送),确保客户端显示顺序正确") # 逐批发送(反向顺序) success_count = 0 @@ -4381,13 +4717,13 @@ def send_to_bark( batch_size = len(batch_content.encode("utf-8")) print( - f"发送Bark第 {actual_batch_num}/{total_batches} 批次(推送顺序: {idx}/{total_batches}),大小:{batch_size} 字节 [{report_type}]" + f"发送{log_prefix}第 {actual_batch_num}/{total_batches} 批次(推送顺序: {idx}/{total_batches}),大小:{batch_size} 字节 [{report_type}]" ) # 检查消息大小(Bark使用APNs,限制4KB) if batch_size > 4096: print( - f"警告:Bark第 {actual_batch_num}/{total_batches} 批次消息过大({batch_size} 字节),可能被拒绝" + f"警告:{log_prefix}第 {actual_batch_num}/{total_batches} 批次消息过大({batch_size} 字节),可能被拒绝" ) # 构建JSON payload @@ -4411,18 +4747,18 @@ def send_to_bark( if response.status_code == 200: result = response.json() if result.get("code") == 200: - print(f"Bark第 {actual_batch_num}/{total_batches} 批次发送成功 [{report_type}]") + print(f"{log_prefix}第 {actual_batch_num}/{total_batches} 批次发送成功 [{report_type}]") success_count += 1 # 批次间间隔 if idx < total_batches: time.sleep(CONFIG["BATCH_SEND_INTERVAL"]) else: print( - f"Bark第 {actual_batch_num}/{total_batches} 批次发送失败 [{report_type}],错误:{result.get('message', '未知错误')}" + f"{log_prefix}第 {actual_batch_num}/{total_batches} 批次发送失败 [{report_type}],错误:{result.get('message', '未知错误')}" ) else: print( - f"Bark第 {actual_batch_num}/{total_batches} 批次发送失败 [{report_type}],状态码:{response.status_code}" + f"{log_prefix}第 {actual_batch_num}/{total_batches} 批次发送失败 [{report_type}],状态码:{response.status_code}" ) try: print(f"错误详情:{response.text}") @@ -4430,23 +4766,23 @@ def send_to_bark( pass except requests.exceptions.ConnectTimeout: - print(f"Bark第 {actual_batch_num}/{total_batches} 批次连接超时 [{report_type}]") + print(f"{log_prefix}第 {actual_batch_num}/{total_batches} 批次连接超时 [{report_type}]") except requests.exceptions.ReadTimeout: - print(f"Bark第 {actual_batch_num}/{total_batches} 批次读取超时 [{report_type}]") + print(f"{log_prefix}第 {actual_batch_num}/{total_batches} 批次读取超时 [{report_type}]") except requests.exceptions.ConnectionError as e: - print(f"Bark第 {actual_batch_num}/{total_batches} 批次连接错误 [{report_type}]:{e}") + print(f"{log_prefix}第 {actual_batch_num}/{total_batches} 批次连接错误 [{report_type}]:{e}") except Exception as e: - print(f"Bark第 {actual_batch_num}/{total_batches} 批次发送异常 [{report_type}]:{e}") + print(f"{log_prefix}第 {actual_batch_num}/{total_batches} 批次发送异常 [{report_type}]:{e}") # 判断整体发送是否成功 if success_count == total_batches: - print(f"Bark所有 {total_batches} 批次发送完成 [{report_type}]") + print(f"{log_prefix}所有 {total_batches} 批次发送完成 [{report_type}]") return True elif success_count > 0: - print(f"Bark部分发送成功:{success_count}/{total_batches} 批次 [{report_type}]") + print(f"{log_prefix}部分发送成功:{success_count}/{total_batches} 批次 [{report_type}]") return True # 部分成功也视为成功 else: - print(f"Bark发送完全失败 [{report_type}]") + print(f"{log_prefix}发送完全失败 [{report_type}]") return False @@ -4475,6 +4811,7 @@ def send_to_slack( update_info: Optional[Dict] = None, proxy_url: Optional[str] = None, mode: str = "daily", + account_label: str = "", ) -> bool: """发送到Slack(支持分批发送,使用 mrkdwn 格式)""" headers = {"Content-Type": "application/json"} @@ -4482,6 +4819,9 @@ def send_to_slack( if proxy_url: proxies = {"http": proxy_url, "https": proxy_url} + # 日志前缀 + log_prefix = f"Slack{account_label}" if account_label else "Slack" + # 获取分批内容(使用 Slack 批次大小),预留批次头部空间 slack_batch_size = CONFIG["SLACK_BATCH_SIZE"] header_reserve = _get_max_batch_header_size("slack") @@ -4492,7 +4832,7 @@ def send_to_slack( # 统一添加批次头部(已预留空间,不会超限) batches = add_batch_headers(batches, "slack", slack_batch_size) - print(f"Slack消息分为 {len(batches)} 批次发送 [{report_type}]") + print(f"{log_prefix}消息分为 {len(batches)} 批次发送 [{report_type}]") # 逐批发送 for i, batch_content in enumerate(batches, 1): @@ -4501,7 +4841,7 @@ def send_to_slack( batch_size = len(mrkdwn_content.encode("utf-8")) print( - f"发送Slack第 {i}/{len(batches)} 批次,大小:{batch_size} 字节 [{report_type}]" + f"发送{log_prefix}第 {i}/{len(batches)} 批次,大小:{batch_size} 字节 [{report_type}]" ) # 构建 Slack payload(使用简单的 text 字段,支持 mrkdwn) @@ -4516,21 +4856,21 @@ def send_to_slack( # Slack Incoming Webhooks 成功时返回 "ok" 文本 if response.status_code == 200 and response.text == "ok": - print(f"Slack第 {i}/{len(batches)} 批次发送成功 [{report_type}]") + print(f"{log_prefix}第 {i}/{len(batches)} 批次发送成功 [{report_type}]") # 批次间间隔 if i < len(batches): time.sleep(CONFIG["BATCH_SEND_INTERVAL"]) else: error_msg = response.text if response.text else f"状态码:{response.status_code}" print( - f"Slack第 {i}/{len(batches)} 批次发送失败 [{report_type}],错误:{error_msg}" + f"{log_prefix}第 {i}/{len(batches)} 批次发送失败 [{report_type}],错误:{error_msg}" ) return False except Exception as e: - print(f"Slack第 {i}/{len(batches)} 批次发送出错 [{report_type}]:{e}") + print(f"{log_prefix}第 {i}/{len(batches)} 批次发送出错 [{report_type}]:{e}") return False - print(f"Slack所有 {len(batches)} 批次发送完成 [{report_type}]") + print(f"{log_prefix}所有 {len(batches)} 批次发送完成 [{report_type}]") return True @@ -4690,7 +5030,7 @@ class NewsAnalyzer: print(f"读取到 {total_titles} 个标题(已按当前监控平台过滤)") new_titles = detect_latest_new_titles(current_platform_ids) - word_groups, filter_words = load_frequency_words() + word_groups, filter_words, global_filters = load_frequency_words() return ( all_results, @@ -4699,6 +5039,7 @@ class NewsAnalyzer: new_titles, word_groups, filter_words, + global_filters, ) except Exception as e: print(f"数据加载失败: {e}") @@ -4735,6 +5076,7 @@ class NewsAnalyzer: id_to_name: Dict, failed_ids: Optional[List] = None, is_daily_summary: bool = False, + global_filters: Optional[List[str]] = None, ) -> Tuple[List[Dict], str]: """统一的分析流水线:数据处理 → 统计计算 → HTML生成""" @@ -4748,6 +5090,7 @@ class NewsAnalyzer: self.rank_threshold, new_titles, mode=mode, + global_filters=global_filters, ) # HTML生成 @@ -4827,7 +5170,7 @@ class NewsAnalyzer: if not analysis_data: return None - all_results, id_to_name, title_info, new_titles, word_groups, filter_words = ( + all_results, id_to_name, title_info, new_titles, word_groups, filter_words, global_filters = ( analysis_data ) @@ -4841,6 +5184,7 @@ class NewsAnalyzer: filter_words, id_to_name, is_daily_summary=True, + global_filters=global_filters, ) print(f"{summary_type}报告已生成: {html_file}") @@ -4868,7 +5212,7 @@ class NewsAnalyzer: if not analysis_data: return None - all_results, id_to_name, title_info, new_titles, word_groups, filter_words = ( + all_results, id_to_name, title_info, new_titles, word_groups, filter_words, global_filters = ( analysis_data ) @@ -4882,6 +5226,7 @@ class NewsAnalyzer: filter_words, id_to_name, is_daily_summary=True, + global_filters=global_filters, ) print(f"{summary_type}HTML已生成: {html_file}") @@ -4941,7 +5286,7 @@ class NewsAnalyzer: new_titles = detect_latest_new_titles(current_platform_ids) time_info = Path(save_titles_to_file(results, id_to_name, failed_ids)).stem - word_groups, filter_words = load_frequency_words() + word_groups, filter_words, global_filters = load_frequency_words() # current模式下,实时推送需要使用完整的历史数据来保证统计信息的完整性 if self.report_mode == "current": @@ -4955,6 +5300,7 @@ class NewsAnalyzer: historical_new_titles, _, _, + _, ) = analysis_data print( @@ -4970,6 +5316,7 @@ class NewsAnalyzer: filter_words, historical_id_to_name, failed_ids=failed_ids, + global_filters=global_filters, ) combined_id_to_name = {**historical_id_to_name, **id_to_name} @@ -5002,6 +5349,7 @@ class NewsAnalyzer: filter_words, id_to_name, failed_ids=failed_ids, + global_filters=global_filters, ) print(f"HTML报告已生成: {html_file}") diff --git a/version b/version index 8cf6caf..e5b8203 100644 --- a/version +++ b/version @@ -1 +1 @@ -3.4.1 \ No newline at end of file +3.5.0 \ No newline at end of file