TrendRadar/trendradar/storage/schema.sql
2025-12-13 13:44:35 +08:00

118 lines
4.0 KiB
SQL
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

-- TrendRadar 数据库表结构
-- ============================================
-- 平台信息表
-- 核心id 不变name 可变
-- ============================================
CREATE TABLE IF NOT EXISTS platforms (
id TEXT PRIMARY KEY,
name TEXT NOT NULL,
is_active INTEGER DEFAULT 1,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
-- ============================================
-- 新闻条目表
-- 以 URL + platform_id 为唯一标识,支持去重存储
-- ============================================
CREATE TABLE IF NOT EXISTS news_items (
id INTEGER PRIMARY KEY AUTOINCREMENT,
title TEXT NOT NULL,
platform_id TEXT NOT NULL,
rank INTEGER NOT NULL,
url TEXT DEFAULT '',
mobile_url TEXT DEFAULT '',
first_crawl_time TEXT NOT NULL, -- 首次抓取时间
last_crawl_time TEXT NOT NULL, -- 最后抓取时间
crawl_count INTEGER DEFAULT 1, -- 抓取次数
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY (platform_id) REFERENCES platforms(id)
);
-- ============================================
-- 标题变更历史表
-- 记录同一 URL 下标题的变化
-- ============================================
CREATE TABLE IF NOT EXISTS title_changes (
id INTEGER PRIMARY KEY AUTOINCREMENT,
news_item_id INTEGER NOT NULL,
old_title TEXT NOT NULL,
new_title TEXT NOT NULL,
changed_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY (news_item_id) REFERENCES news_items(id)
);
-- ============================================
-- 排名历史表
-- 记录每次抓取时的排名变化
-- ============================================
CREATE TABLE IF NOT EXISTS rank_history (
id INTEGER PRIMARY KEY AUTOINCREMENT,
news_item_id INTEGER NOT NULL,
rank INTEGER NOT NULL,
crawl_time TEXT NOT NULL,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY (news_item_id) REFERENCES news_items(id)
);
-- ============================================
-- 抓取记录表
-- 记录每次抓取的时间和数量
-- ============================================
CREATE TABLE IF NOT EXISTS crawl_records (
id INTEGER PRIMARY KEY AUTOINCREMENT,
crawl_time TEXT NOT NULL UNIQUE,
total_items INTEGER DEFAULT 0,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
-- ============================================
-- 抓取来源状态表
-- 记录每次抓取各平台的成功/失败状态
-- ============================================
CREATE TABLE IF NOT EXISTS crawl_source_status (
crawl_record_id INTEGER NOT NULL,
platform_id TEXT NOT NULL,
status TEXT NOT NULL CHECK(status IN ('success', 'failed')),
PRIMARY KEY (crawl_record_id, platform_id),
FOREIGN KEY (crawl_record_id) REFERENCES crawl_records(id),
FOREIGN KEY (platform_id) REFERENCES platforms(id)
);
-- ============================================
-- 推送记录表
-- 用于 push_window once_per_day 功能
-- ============================================
CREATE TABLE IF NOT EXISTS push_records (
id INTEGER PRIMARY KEY AUTOINCREMENT,
date TEXT NOT NULL UNIQUE,
pushed INTEGER DEFAULT 0,
push_time TEXT,
report_type TEXT,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
-- ============================================
-- 索引定义
-- ============================================
-- 平台索引
CREATE INDEX IF NOT EXISTS idx_news_platform ON news_items(platform_id);
-- 时间索引(用于查询最新数据)
CREATE INDEX IF NOT EXISTS idx_news_crawl_time ON news_items(last_crawl_time);
-- 标题索引(用于标题搜索)
CREATE INDEX IF NOT EXISTS idx_news_title ON news_items(title);
-- URL + platform_id 唯一索引(仅对非空 URL实现去重
CREATE UNIQUE INDEX IF NOT EXISTS idx_news_url_platform
ON news_items(url, platform_id) WHERE url != '';
-- 抓取状态索引
CREATE INDEX IF NOT EXISTS idx_crawl_status_record ON crawl_source_status(crawl_record_id);
-- 排名历史索引
CREATE INDEX IF NOT EXISTS idx_rank_history_news ON rank_history(news_item_id);