mirror of
https://gitee.com/houhuan/TrendRadar.git
synced 2025-12-21 11:27:17 +08:00
118 lines
4.0 KiB
SQL
118 lines
4.0 KiB
SQL
-- TrendRadar 数据库表结构
|
||
|
||
-- ============================================
|
||
-- 平台信息表
|
||
-- 核心:id 不变,name 可变
|
||
-- ============================================
|
||
CREATE TABLE IF NOT EXISTS platforms (
|
||
id TEXT PRIMARY KEY,
|
||
name TEXT NOT NULL,
|
||
is_active INTEGER DEFAULT 1,
|
||
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
||
);
|
||
|
||
-- ============================================
|
||
-- 新闻条目表
|
||
-- 以 URL + platform_id 为唯一标识,支持去重存储
|
||
-- ============================================
|
||
CREATE TABLE IF NOT EXISTS news_items (
|
||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||
title TEXT NOT NULL,
|
||
platform_id TEXT NOT NULL,
|
||
rank INTEGER NOT NULL,
|
||
url TEXT DEFAULT '',
|
||
mobile_url TEXT DEFAULT '',
|
||
first_crawl_time TEXT NOT NULL, -- 首次抓取时间
|
||
last_crawl_time TEXT NOT NULL, -- 最后抓取时间
|
||
crawl_count INTEGER DEFAULT 1, -- 抓取次数
|
||
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||
FOREIGN KEY (platform_id) REFERENCES platforms(id)
|
||
);
|
||
|
||
-- ============================================
|
||
-- 标题变更历史表
|
||
-- 记录同一 URL 下标题的变化
|
||
-- ============================================
|
||
CREATE TABLE IF NOT EXISTS title_changes (
|
||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||
news_item_id INTEGER NOT NULL,
|
||
old_title TEXT NOT NULL,
|
||
new_title TEXT NOT NULL,
|
||
changed_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||
FOREIGN KEY (news_item_id) REFERENCES news_items(id)
|
||
);
|
||
|
||
-- ============================================
|
||
-- 排名历史表
|
||
-- 记录每次抓取时的排名变化
|
||
-- ============================================
|
||
CREATE TABLE IF NOT EXISTS rank_history (
|
||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||
news_item_id INTEGER NOT NULL,
|
||
rank INTEGER NOT NULL,
|
||
crawl_time TEXT NOT NULL,
|
||
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||
FOREIGN KEY (news_item_id) REFERENCES news_items(id)
|
||
);
|
||
|
||
-- ============================================
|
||
-- 抓取记录表
|
||
-- 记录每次抓取的时间和数量
|
||
-- ============================================
|
||
CREATE TABLE IF NOT EXISTS crawl_records (
|
||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||
crawl_time TEXT NOT NULL UNIQUE,
|
||
total_items INTEGER DEFAULT 0,
|
||
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
||
);
|
||
|
||
-- ============================================
|
||
-- 抓取来源状态表
|
||
-- 记录每次抓取各平台的成功/失败状态
|
||
-- ============================================
|
||
CREATE TABLE IF NOT EXISTS crawl_source_status (
|
||
crawl_record_id INTEGER NOT NULL,
|
||
platform_id TEXT NOT NULL,
|
||
status TEXT NOT NULL CHECK(status IN ('success', 'failed')),
|
||
PRIMARY KEY (crawl_record_id, platform_id),
|
||
FOREIGN KEY (crawl_record_id) REFERENCES crawl_records(id),
|
||
FOREIGN KEY (platform_id) REFERENCES platforms(id)
|
||
);
|
||
|
||
-- ============================================
|
||
-- 推送记录表
|
||
-- 用于 push_window once_per_day 功能
|
||
-- ============================================
|
||
CREATE TABLE IF NOT EXISTS push_records (
|
||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||
date TEXT NOT NULL UNIQUE,
|
||
pushed INTEGER DEFAULT 0,
|
||
push_time TEXT,
|
||
report_type TEXT,
|
||
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
||
);
|
||
|
||
-- ============================================
|
||
-- 索引定义
|
||
-- ============================================
|
||
|
||
-- 平台索引
|
||
CREATE INDEX IF NOT EXISTS idx_news_platform ON news_items(platform_id);
|
||
|
||
-- 时间索引(用于查询最新数据)
|
||
CREATE INDEX IF NOT EXISTS idx_news_crawl_time ON news_items(last_crawl_time);
|
||
|
||
-- 标题索引(用于标题搜索)
|
||
CREATE INDEX IF NOT EXISTS idx_news_title ON news_items(title);
|
||
|
||
-- URL + platform_id 唯一索引(仅对非空 URL,实现去重)
|
||
CREATE UNIQUE INDEX IF NOT EXISTS idx_news_url_platform
|
||
ON news_items(url, platform_id) WHERE url != '';
|
||
|
||
-- 抓取状态索引
|
||
CREATE INDEX IF NOT EXISTS idx_crawl_status_record ON crawl_source_status(crawl_record_id);
|
||
|
||
-- 排名历史索引
|
||
CREATE INDEX IF NOT EXISTS idx_rank_history_news ON rank_history(news_item_id);
|