openclaw-home-pc/vector_memory/memory_backup.py
2026-03-21 15:31:06 +08:00

222 lines
7.1 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
向量记忆系统 - 自动备份脚本
支持:
1. 导出到本地 JSON/Markdown
2. 同步到飞书云盘
3. 定时自动备份
"""
import os
import sys
import json
import sqlite3
import shutil
from datetime import datetime
from pathlib import Path
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
from vector_memory import VectorMemorySystem
class MemoryBackup:
"""记忆备份管理器"""
def __init__(self, backup_dir: str = "./backups"):
self.backup_dir = Path(backup_dir)
self.backup_dir.mkdir(exist_ok=True)
def export_json(self, vm: VectorMemorySystem, filepath: str = None) -> str:
"""导出为 JSON 格式"""
if not filepath:
filepath = self.backup_dir / f"memory_backup_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
# 从 SQLite 读取全部记忆
conn = sqlite3.connect(vm.db_path)
conn.row_factory = sqlite3.Row
cursor = conn.execute("SELECT * FROM memories ORDER BY created_at DESC")
memories = []
for row in cursor.fetchall():
memories.append({
'id': row['id'],
'content': row['content'],
'metadata': json.loads(row['metadata']) if row['metadata'] else {},
'importance': row['importance'],
'tier': row['tier'],
'created_at': row['created_at'],
'updated_at': row['updated_at']
})
conn.close()
# 写入文件
backup_data = {
'version': '1.0',
'export_time': datetime.now().isoformat(),
'total_count': len(memories),
'memories': memories
}
with open(filepath, 'w', encoding='utf-8') as f:
json.dump(backup_data, f, ensure_ascii=False, indent=2)
print(f"✅ JSON 备份已保存: {filepath}")
return str(filepath)
def export_markdown(self, vm: VectorMemorySystem, filepath: str = None) -> str:
"""导出为 Markdown 格式(可读性好)"""
if not filepath:
filepath = self.backup_dir / f"memory_backup_{datetime.now().strftime('%Y%m%d_%H%M%S')}.md"
# 从 SQLite 读取
conn = sqlite3.connect(vm.db_path)
conn.row_factory = sqlite3.Row
cursor = conn.execute("SELECT * FROM memories ORDER BY created_at DESC")
rows = cursor.fetchall()
conn.close()
# 生成 Markdown
md_lines = [
"# 🧠 OpenClaw 向量记忆备份",
f"",
f"> 导出时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}",
f"> 总数量: {len(rows)}",
f"",
f"---",
f""
]
for i, row in enumerate(rows, 1):
metadata = json.loads(row['metadata']) if row['metadata'] else {}
tags = metadata.get('tags', [])
md_lines.append(f"## {i}. [{'' * row['importance']}] {row['content'][:60]}...")
md_lines.append(f"")
md_lines.append(f"- **ID**: `{row['id']}`")
md_lines.append(f"- **重要性**: {row['importance']}/5")
md_lines.append(f"- **层级**: {row['tier']}")
md_lines.append(f"- **标签**: {', '.join(tags) if tags else ''}")
md_lines.append(f"- **创建时间**: {row['created_at']}")
md_lines.append(f"- **内容**: {row['content']}")
md_lines.append(f"")
md_lines.append(f"---")
md_lines.append(f"")
with open(filepath, 'w', encoding='utf-8') as f:
f.write('\n'.join(md_lines))
print(f"✅ Markdown 备份已保存: {filepath}")
return str(filepath)
def backup_all(self, vm: VectorMemorySystem) -> dict:
"""执行完整备份"""
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
results = {
'json': None,
'markdown': None,
'vector': None,
'timestamp': timestamp
}
# 1. JSON 备份
results['json'] = self.export_json(vm)
# 2. Markdown 备份
results['markdown'] = self.export_markdown(vm)
# 3. 向量数据库备份(复制整个目录)
vector_backup = self.backup_dir / f"vector_{timestamp}"
shutil.copytree(
os.path.dirname(vm.db_path),
vector_backup,
dirs_exist_ok=True
)
results['vector'] = str(vector_backup)
# 生成备份清单
manifest = {
'backup_time': timestamp,
'files': results,
'total_memories': vm.count()
}
manifest_file = self.backup_dir / f"manifest_{timestamp}.json"
with open(manifest_file, 'w', encoding='utf-8') as f:
json.dump(manifest, f, indent=2)
print(f"\n📋 备份清单: {manifest_file}")
return results
def restore_from_json(self, json_file: str, vm: VectorMemorySystem = None):
"""从 JSON 恢复记忆"""
with open(json_file, 'r', encoding='utf-8') as f:
data = json.load(f)
if not vm:
api_key = os.getenv("SILICONFLOW_API_KEY")
vm = VectorMemorySystem(api_key=api_key)
print(f"📥 开始恢复 {len(data['memories'])} 条记忆...")
for mem in data['memories']:
try:
vm.add_memory(
content=mem['content'],
metadata=mem['metadata'],
importance=mem['importance']
)
except Exception as e:
print(f" ⚠️ 跳过重复: {mem['id']}")
print(f"✅ 恢复完成!")
def main():
import argparse
parser = argparse.ArgumentParser(description="向量记忆备份工具")
subparsers = parser.add_subparsers(dest="command")
# 导出命令
subparsers.add_parser("export-json", help="导出为 JSON")
subparsers.add_parser("export-markdown", help="导出为 Markdown")
subparsers.add_parser("backup", help="完整备份JSON + Markdown + 向量库)")
# 恢复命令
restore_parser = subparsers.add_parser("restore", help="从 JSON 恢复")
restore_parser.add_argument("file", help="备份文件路径")
args = parser.parse_args()
# 获取 API Key
api_key = os.getenv("SILICONFLOW_API_KEY")
if not api_key:
print("❌ 请设置 SILICONFLOW_API_KEY 环境变量")
sys.exit(1)
# 初始化
vm = VectorMemorySystem(api_key=api_key)
backup = MemoryBackup()
# 执行命令
if args.command == "export-json":
backup.export_json(vm)
elif args.command == "export-markdown":
backup.export_markdown(vm)
elif args.command == "backup":
backup.backup_all(vm)
elif args.command == "restore":
backup.restore_from_json(args.file, vm)
else:
parser.print_help()
if __name__ == "__main__":
main()