� backup: 2026-03-24 04:00

This commit is contained in:
huan
2026-03-24 04:00:48 +08:00
parent 7e143d3ebc
commit 31786dee08
193 changed files with 73520 additions and 1915 deletions
+85
View File
@@ -0,0 +1,85 @@
---
name: vector-memory
description: |
向量语义记忆系统 - 为 OpenClaw 添加语义搜索能力。当用户需要:
(1) 部署向量记忆系统
(2) 开启语义搜索功能
(3) 安装配置 Chroma + BGE-M3
(4) 搜索记忆时找不到内容
(5) 需要比关键词搜索更智能的记忆检索
---
# Vector Memory Skill
## 功能概述
为 OpenClaw 添加**向量语义搜索**能力,解决纯 Markdown 记忆的搜索痛点:
- 搜"股票"能找到"A股监控"、"铜陵有色"
- 支持同义词、近义词理解
- 记忆无限扩展,不受上下文窗口限制
## 技术架构
| 组件 | 选型 | 说明 |
|------|------|------|
| 向量模型 | BGE-M3 (硅基流动) | 中文优化好,向量免费 |
| 向量数据库 | Chroma | 轻量,Python 原生 |
| 持久化 | SQLite | 并发安全 |
## 快速部署
### 1. 安装依赖
```bash
mkdir -p ~/openclaw-memory-vector
cd ~/openclaw-memory-vector
pip install chromadb openai sqlalchemy
```
### 2. 配置 API Key
```bash
export SILICONFLOW_API_KEY="sk-fpjdtxbxrhtekshircjhegstloxaodriekotjdyzzktyegcl"
```
### 3. 初始化系统
```python
import sys
sys.path.insert(0, '~/openclaw-memory-vector/scripts')
from vector_memory import VectorMemorySystem
vm = VectorMemorySystem(
persist_dir="./data/memory",
api_key="your_api_key"
)
```
## 核心脚本
### scripts/vector_memory.py
向量存储引擎,包含 `add_memory()``search()` 方法。详见 [references/core.md](references/core.md)。
### scripts/memory_tier_manager.py
记忆分层管理,自动将记忆分为 core/hot/cold 三层。
### scripts/openclaw_integration.py
OpenClaw 集成接口,提供 `get_memory_system()` 单例模式。
## 数据备份
备份 `~/openclaw-memory-vector/data/memory/` 整个目录:
- `memory.db` - SQLite 数据库(原始文本)
- `chroma/` - Chroma 向量索引
## 成本
- BGE-M3 向量:**免费无限**
- 硅基流动大模型:2000万 Tokens/月
- **总成本:≈ ¥0**
## 触发词
- "部署向量记忆"
- "开启语义搜索"
- "向量备份"
@@ -0,0 +1,71 @@
# 向量记忆系统 - 核心模块详解
## VectorMemorySystem 核心方法
### add_memory(content, metadata, importance)
同时写入向量库 + SQLite
```python
vm.add_memory(
content="用户喜欢喝不加糖的咖啡",
metadata={"category": "preference", "tags": ["咖啡", "口味"]},
importance=4 # >=4 核心记忆
)
```
### search(query, top_k)
语义搜索,返回相似记忆
```python
results = vm.search("股票预警")
# 返回: [{id, content, distance, metadata}, ...]
```
### hybrid_search(query, keyword, top_k)
混合搜索:语义 + 关键词过滤
```python
results = vm.hybrid_search("铜陵", keyword="有色")
```
## MemoryTierManager 分层规则
| 重要性 | 层级 | 说明 |
|--------|------|------|
| >= 4 | core | 永久记忆,不删除 |
| 2-3 | hot | 常用记忆,30天后可归档 |
| < 2 | cold | 冷记忆,自动归档 |
## 数据存储位置
```
~/openclaw-memory-vector/data/memory/
├── memory.db # SQLite(所有记忆的原始文本)
└── chroma/ # Chroma(向量索引)
├── *.bin # 向量数据
└── *.sqlite # Chroma 元数据
```
## 备份与恢复
### 备份
```bash
tar -czvf openclaw-memory-vector.tar.gz ~/openclaw-memory-vector/data/memory/
```
### 恢复
```bash
tar -xzvf openclaw-memory-vector.tar.gz -C ~/
```
## 环境变量
| 变量 | 说明 |
|------|------|
| SILICONFLOW_API_KEY | 硅基流动 API Key |
## 成本估算
- BGE-M3 向量模型:**免费无限**
- 硅基流动大模型:2000万 Tokens/月
- **总成本:≈ ¥0**
@@ -0,0 +1,99 @@
# memory_tier_manager.py - 记忆分层管理器
# 自动将记忆分为 core/hot/cold 三层
import sqlite3
from datetime import datetime, timedelta
from vector_memory import VectorMemorySystem
class MemoryTierManager:
"""记忆分层管理器"""
def __init__(self, vector_memory: VectorMemorySystem):
self.vm = vector_memory
self.conn = vector_memory.conn
def add_with_tier(self, content: str, importance: int = 3,
tags: list = None, auto_archive: bool = True):
"""自动分层添加记忆"""
metadata = {
'tags': tags or [],
'importance': importance,
'auto_archive': auto_archive
}
memory_id = self.vm.add_memory(
content=content,
metadata=metadata,
importance=importance
)
# 根据重要性自动分层
if importance >= 4:
tier = "core"
elif importance >= 2:
tier = "hot"
else:
tier = "cold"
# 标记层级
self.conn.execute(
"UPDATE memories SET tier=? WHERE id=?",
(tier, memory_id)
)
self.conn.commit()
return memory_id
def get_recent_memories(self, days: int = 7, limit: int = 20):
"""获取最近记忆"""
cursor = self.conn.execute("""
SELECT id, content, metadata, importance, created_at
FROM memories
ORDER BY created_at DESC
LIMIT ?
""", (limit,))
return [{
'id': row[0],
'content': row[1],
'metadata': row[2],
'importance': row[3],
'created_at': row[4]
} for row in cursor.fetchall()]
def get_core_memories(self):
"""获取核心记忆(重要性 >= 4"""
cursor = self.conn.execute("""
SELECT id, content, metadata, importance, created_at
FROM memories
WHERE importance >= 4
ORDER BY created_at DESC
""")
return [{
'id': row[0],
'content': row[1],
'metadata': row[2],
'importance': row[3],
'created_at': row[4]
} for row in cursor.fetchall()]
def migrate_old_memories(self, hot_days: int = 30):
"""迁移旧记忆到冷存储"""
cutoff = datetime.now() - timedelta(days=hot_days)
# 找出需要归档的记忆
cursor = self.conn.execute("""
SELECT id, content, metadata
FROM memories
WHERE importance < 3
AND created_at < ?
""", (cutoff,))
archived = 0
for row in cursor.fetchall():
# 可以在这里实现归档逻辑(如写入文件、压缩等)
archived += 1
return archived
@@ -0,0 +1,77 @@
# openclaw_integration.py - OpenClaw 集成接口
# 提供单例模式的记忆系统访问
from vector_memory import VectorMemorySystem
from memory_tier_manager import MemoryTierManager
import os
# 初始化(单例模式)
_memory_system = None
_tier_manager = None
def get_memory_system():
"""获取记忆系统单例"""
global _memory_system
if _memory_system is None:
api_key = os.getenv("SILICONFLOW_API_KEY")
if not api_key:
raise ValueError("请设置 SILICONFLOW_API_KEY 环境变量")
_memory_system = VectorMemorySystem(
persist_dir="./data/memory",
api_key=api_key
)
return _memory_system
def get_tier_manager():
"""获取分层管理器单例"""
global _tier_manager
if _tier_manager is None:
vm = get_memory_system()
_tier_manager = MemoryTierManager(vm)
return _tier_manager
def search_memory(query: str, top_k: int = 5):
"""搜索记忆 - 供 OpenClaw 调用"""
vm = get_memory_system()
return vm.search(query, top_k)
def add_memory(content: str, importance: int = 3, tags: list = None):
"""添加记忆 - 供 OpenClaw 调用"""
mtm = get_tier_manager()
return mtm.add_with_tier(content, importance, tags)
def get_all_memories(limit: int = 50):
"""获取所有记忆"""
mtm = get_tier_manager()
return mtm.get_recent_memories(limit=limit)
def get_core_memories():
"""获取核心记忆"""
mtm = get_tier_manager()
return mtm.get_core_memories()
# 使用示例
if __name__ == "__main__":
# 添加记忆
add_memory(
content="2026-03-21: 部署了向量记忆系统,采用硅基流动 BGE-M3 + Chroma + SQLite 架构",
importance=4,
tags=["向量记忆", "系统部署", "硅基流动"]
)
# 搜索记忆
results = search_memory("记忆系统")
for r in results:
print(f"- {r['content'][:50]}... (相似度: {1-r['distance']:.2%})")
@@ -0,0 +1,148 @@
# vector_memory.py - 向量存储引擎
# BGE-M3 + Chroma + SQLite 架构
import chromadb
from chromadb.config import Settings
from openai import OpenAI
import sqlite3
import json
from datetime import datetime
class VectorMemorySystem:
def __init__(self, persist_dir="./data", api_key: str = None):
"""初始化向量记忆系统"""
# 1. 初始化硅基流动客户端
self.client = OpenAI(
api_key=api_key,
base_url="https://api.siliconflow.cn/v1"
)
# 2. 初始化 Chroma 向量库
self.chroma = chromadb.Client(Settings(
persist_directory=persist_dir,
anonymized_telemetry=False
))
self.collection = self.chroma.get_or_create_collection(
name="openclaw_memory",
metadata={"description": "OpenClaw long-term memory"}
)
# 3. 初始化 SQLite(用于持久化)
self.db_path = f"{persist_dir}/memory.db"
self._init_sqlite()
def _init_sqlite(self):
"""初始化 SQLite 数据库"""
self.conn = sqlite3.connect(self.db_path)
self.conn.execute("""
CREATE TABLE IF NOT EXISTS memories (
id TEXT PRIMARY KEY,
content TEXT NOT NULL,
metadata TEXT,
importance INTEGER DEFAULT 3,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
)
""")
self.conn.execute("""
CREATE INDEX IF NOT EXISTS idx_importance ON memories(importance)
""")
self.conn.execute("""
CREATE INDEX IF NOT EXISTS idx_created_at ON memories(created_at)
""")
self.conn.commit()
def _get_embedding(self, text: str) -> list:
"""调用 BGE-M3 获取向量"""
response = self.client.embeddings.create(
model="BAAI/bge-m3",
input=text
)
return response.data[0].embedding
def add_memory(self, content: str, metadata: dict = None, importance: int = 3):
"""添加记忆(同时写入向量库 + SQLite)"""
import uuid
memory_id = str(uuid.uuid4())
# 1. 生成向量并存储
embedding = self._get_embedding(content)
self.collection.add(
ids=[memory_id],
embeddings=[embedding],
documents=[content],
metadatas=[metadata or {}]
)
# 2. 写入 SQLite 持久化
self.conn.execute(
"""INSERT INTO memories (id, content, metadata, importance)
VALUES (?, ?, ?, ?)""",
(memory_id, content, json.dumps(metadata), importance)
)
self.conn.commit()
return memory_id
def search(self, query: str, top_k: int = 5) -> list:
"""语义搜索"""
# 1. 查询向量
query_embedding = self._get_embedding(query)
# 2. 向量相似度搜索
results = self.collection.query(
query_embeddings=[query_embedding],
n_results=top_k
)
# 3. 格式化返回
memories = []
for i, doc in enumerate(results['documents'][0]):
memories.append({
'id': results['ids'][0][i],
'content': doc,
'distance': results['distances'][0][i],
'metadata': results['metadatas'][0][i]
})
return memories
def hybrid_search(self, query: str, keyword: str = None, top_k: int = 5):
"""混合搜索:语义 + 关键词"""
# 1. 向量搜索
vector_results = self.search(query, top_k * 2)
# 2. 关键词过滤(可选)
if keyword:
vector_results = [
r for r in vector_results
if keyword in r['content']
]
return vector_results[:top_k]
if __name__ == "__main__":
import os
api_key = os.getenv("SILICONFLOW_API_KEY")
if not api_key:
print("请设置 SILICONFLOW_API_KEY 环境变量")
exit(1)
vm = VectorMemorySystem(persist_dir="./data/memory", api_key=api_key)
# 测试添加
memory_id = vm.add_memory(
content="2026-03-21: 部署了向量记忆系统",
metadata={"tags": ["系统部署"]},
importance=4
)
print(f"添加记忆成功: {memory_id}")
# 测试搜索
results = vm.search("记忆系统")
for r in results:
print(f"- {r['content'][:50]}... (相似度: {1-r['distance']:.2%})")