v4.0.0 大大大更新

This commit is contained in:
sansan
2025-12-13 13:44:35 +08:00
parent 97c05aa33c
commit c7bacdfff7
61 changed files with 12407 additions and 5889 deletions
+33 -2
View File
@@ -40,8 +40,6 @@ PUSH_WINDOW_START=
PUSH_WINDOW_END=
# 每天只推送一次 (true/false)
PUSH_WINDOW_ONCE_PER_DAY=
# 推送记录保留天数 (数字,如 7)
PUSH_WINDOW_RETENTION_DAYS=
# ============================================
# 多账号配置
@@ -87,6 +85,39 @@ BARK_URL=
# Slack 推送配置(多账号用 ; 分隔)
SLACK_WEBHOOK_URL=
# ============================================
# 存储配置
# ============================================
# 存储后端选择 (local/remote/auto)
# - local: 本地 SQLite + TXT/HTML 文件
# - remote: 远程云存储(S3 兼容协议)
# - auto: 自动选择(GitHub Actions 用 remote,其他用 local
STORAGE_BACKEND=auto
# 本地数据保留天数(0 = 无限制,不清理历史数据)
LOCAL_RETENTION_DAYS=0
# 远程数据保留天数(0 = 无限制,不清理历史数据)
REMOTE_RETENTION_DAYS=0
# 是否生成 TXT 快照 (true/false)
STORAGE_TXT_ENABLED=
# 是否生成 HTML 报告 (true/false)
STORAGE_HTML_ENABLED=
# 远程存储配置(S3 兼容协议,支持 R2/OSS/COS/S3 等)
S3_ENDPOINT_URL=
S3_BUCKET_NAME=
S3_ACCESS_KEY_ID=
S3_SECRET_ACCESS_KEY=
S3_REGION=
# 数据拉取配置(从远程同步到本地)
PULL_ENABLED=false
PULL_DAYS=7
# ============================================
# 运行配置
# ============================================
+1 -1
View File
@@ -53,8 +53,8 @@ RUN set -ex && \
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
COPY main.py .
COPY docker/manage.py .
COPY trendradar/ ./trendradar/
# 复制 entrypoint.sh 并强制转换为 LF 格式
COPY docker/entrypoint.sh /entrypoint.sh.tmp
+2
View File
@@ -8,6 +8,8 @@ RUN pip install --no-cache-dir -r requirements.txt
# 复制 MCP 服务器代码
COPY mcp_server/ ./mcp_server/
# 复制 trendradar 模块(MCP 服务需要读取 SQLite 数据)
COPY trendradar/ ./trendradar/
# 创建必要目录
RUN mkdir -p /app/config /app/output
+16 -2
View File
@@ -32,7 +32,6 @@ services:
- PUSH_WINDOW_START=${PUSH_WINDOW_START:-}
- PUSH_WINDOW_END=${PUSH_WINDOW_END:-}
- PUSH_WINDOW_ONCE_PER_DAY=${PUSH_WINDOW_ONCE_PER_DAY:-}
- PUSH_WINDOW_RETENTION_DAYS=${PUSH_WINDOW_RETENTION_DAYS:-}
# 通知渠道
- FEISHU_WEBHOOK_URL=${FEISHU_WEBHOOK_URL:-}
- TELEGRAM_BOT_TOKEN=${TELEGRAM_BOT_TOKEN:-}
@@ -54,6 +53,21 @@ services:
- BARK_URL=${BARK_URL:-}
# Slack配置
- SLACK_WEBHOOK_URL=${SLACK_WEBHOOK_URL:-}
# 存储配置
- STORAGE_BACKEND=${STORAGE_BACKEND:-auto}
- LOCAL_RETENTION_DAYS=${LOCAL_RETENTION_DAYS:-0}
- REMOTE_RETENTION_DAYS=${REMOTE_RETENTION_DAYS:-0}
- STORAGE_TXT_ENABLED=${STORAGE_TXT_ENABLED:-true}
- STORAGE_HTML_ENABLED=${STORAGE_HTML_ENABLED:-true}
# 远程存储配置(S3 兼容协议)
- S3_ENDPOINT_URL=${S3_ENDPOINT_URL:-}
- S3_BUCKET_NAME=${S3_BUCKET_NAME:-}
- S3_ACCESS_KEY_ID=${S3_ACCESS_KEY_ID:-}
- S3_SECRET_ACCESS_KEY=${S3_SECRET_ACCESS_KEY:-}
- S3_REGION=${S3_REGION:-}
# 数据拉取配置
- PULL_ENABLED=${PULL_ENABLED:-false}
- PULL_DAYS=${PULL_DAYS:-7}
# 运行模式
- CRON_SCHEDULE=${CRON_SCHEDULE:-*/5 * * * *}
- RUN_MODE=${RUN_MODE:-cron}
@@ -71,7 +85,7 @@ services:
volumes:
- ../config:/app/config:ro
- ../output:/app/output:ro
- ../output:/app/output
environment:
- TZ=Asia/Shanghai
+16 -2
View File
@@ -30,7 +30,6 @@ services:
- PUSH_WINDOW_START=${PUSH_WINDOW_START:-}
- PUSH_WINDOW_END=${PUSH_WINDOW_END:-}
- PUSH_WINDOW_ONCE_PER_DAY=${PUSH_WINDOW_ONCE_PER_DAY:-}
- PUSH_WINDOW_RETENTION_DAYS=${PUSH_WINDOW_RETENTION_DAYS:-}
# 通知渠道
- FEISHU_WEBHOOK_URL=${FEISHU_WEBHOOK_URL:-}
- TELEGRAM_BOT_TOKEN=${TELEGRAM_BOT_TOKEN:-}
@@ -52,6 +51,21 @@ services:
- BARK_URL=${BARK_URL:-}
# Slack配置
- SLACK_WEBHOOK_URL=${SLACK_WEBHOOK_URL:-}
# 存储配置
- STORAGE_BACKEND=${STORAGE_BACKEND:-auto}
- LOCAL_RETENTION_DAYS=${LOCAL_RETENTION_DAYS:-0}
- REMOTE_RETENTION_DAYS=${REMOTE_RETENTION_DAYS:-0}
- STORAGE_TXT_ENABLED=${STORAGE_TXT_ENABLED:-true}
- STORAGE_HTML_ENABLED=${STORAGE_HTML_ENABLED:-true}
# 远程存储配置(S3 兼容协议)
- S3_ENDPOINT_URL=${S3_ENDPOINT_URL:-}
- S3_BUCKET_NAME=${S3_BUCKET_NAME:-}
- S3_ACCESS_KEY_ID=${S3_ACCESS_KEY_ID:-}
- S3_SECRET_ACCESS_KEY=${S3_SECRET_ACCESS_KEY:-}
- S3_REGION=${S3_REGION:-}
# 数据拉取配置
- PULL_ENABLED=${PULL_ENABLED:-false}
- PULL_DAYS=${PULL_DAYS:-7}
# 运行模式
- CRON_SCHEDULE=${CRON_SCHEDULE:-*/5 * * * *}
- RUN_MODE=${RUN_MODE:-cron}
@@ -67,7 +81,7 @@ services:
volumes:
- ../config:/app/config:ro
- ../output:/app/output:ro
- ../output:/app/output
environment:
- TZ=Asia/Shanghai
+3 -3
View File
@@ -13,11 +13,11 @@ env >> /etc/environment
case "${RUN_MODE:-cron}" in
"once")
echo "🔄 单次执行"
exec /usr/local/bin/python main.py
exec /usr/local/bin/python -m trendradar
;;
"cron")
# 生成 crontab
echo "${CRON_SCHEDULE:-*/30 * * * *} cd /app && /usr/local/bin/python main.py" > /tmp/crontab
echo "${CRON_SCHEDULE:-*/30 * * * *} cd /app && /usr/local/bin/python -m trendradar" > /tmp/crontab
echo "📅 生成的crontab内容:"
cat /tmp/crontab
@@ -30,7 +30,7 @@ case "${RUN_MODE:-cron}" in
# 立即执行一次(如果配置了)
if [ "${IMMEDIATE_RUN:-false}" = "true" ]; then
echo "▶️ 立即执行一次"
/usr/local/bin/python main.py
/usr/local/bin/python -m trendradar
fi
# 启动 Web 服务器(如果配置了)
+25 -2
View File
@@ -33,7 +33,7 @@ def manual_run():
print("🔄 手动执行爬虫...")
try:
result = subprocess.run(
["python", "main.py"], cwd="/app", capture_output=False, text=True
["python", "-m", "trendradar"], cwd="/app", capture_output=False, text=True
)
if result.returncode == 0:
print("✅ 执行完成")
@@ -285,12 +285,24 @@ def show_config():
"TELEGRAM_CHAT_ID",
"CONFIG_PATH",
"FREQUENCY_WORDS_PATH",
# 存储配置
"STORAGE_BACKEND",
"LOCAL_RETENTION_DAYS",
"REMOTE_RETENTION_DAYS",
"STORAGE_TXT_ENABLED",
"STORAGE_HTML_ENABLED",
"S3_BUCKET_NAME",
"S3_ACCESS_KEY_ID",
"S3_ENDPOINT_URL",
"S3_REGION",
"PULL_ENABLED",
"PULL_DAYS",
]
for var in env_vars:
value = os.environ.get(var, "未设置")
# 隐藏敏感信息
if any(sensitive in var for sensitive in ["WEBHOOK", "TOKEN", "KEY"]):
if any(sensitive in var for sensitive in ["WEBHOOK", "TOKEN", "KEY", "SECRET"]):
if value and value != "未设置":
masked_value = value[:10] + "***" if len(value) > 10 else "***"
print(f" {var}: {masked_value}")
@@ -331,6 +343,17 @@ def show_files():
# 显示最近2天的文件
for date_dir in date_dirs[:2]:
print(f" 📅 {date_dir.name}:")
# 检查 SQLite 数据库文件
db_files = list(date_dir.glob("*.db"))
if db_files:
print(f" 💾 SQLite: {len(db_files)} 个数据库")
for db_file in db_files[:3]:
mtime = time.ctime(db_file.stat().st_mtime)
size_kb = db_file.stat().st_size // 1024
print(f" 📀 {db_file.name} ({size_kb}KB, {mtime.split()[3][:5]})")
# 检查子目录(html, txt
for subdir in ["html", "txt"]:
sub_path = date_dir / subdir
if sub_path.exists():