� backup: 2026-03-24 04:00
This commit is contained in:
@@ -0,0 +1,85 @@
|
||||
---
|
||||
name: vector-memory
|
||||
description: |
|
||||
向量语义记忆系统 - 为 OpenClaw 添加语义搜索能力。当用户需要:
|
||||
(1) 部署向量记忆系统
|
||||
(2) 开启语义搜索功能
|
||||
(3) 安装配置 Chroma + BGE-M3
|
||||
(4) 搜索记忆时找不到内容
|
||||
(5) 需要比关键词搜索更智能的记忆检索
|
||||
---
|
||||
|
||||
# Vector Memory Skill
|
||||
|
||||
## 功能概述
|
||||
|
||||
为 OpenClaw 添加**向量语义搜索**能力,解决纯 Markdown 记忆的搜索痛点:
|
||||
- 搜"股票"能找到"A股监控"、"铜陵有色"
|
||||
- 支持同义词、近义词理解
|
||||
- 记忆无限扩展,不受上下文窗口限制
|
||||
|
||||
## 技术架构
|
||||
|
||||
| 组件 | 选型 | 说明 |
|
||||
|------|------|------|
|
||||
| 向量模型 | BGE-M3 (硅基流动) | 中文优化好,向量免费 |
|
||||
| 向量数据库 | Chroma | 轻量,Python 原生 |
|
||||
| 持久化 | SQLite | 并发安全 |
|
||||
|
||||
## 快速部署
|
||||
|
||||
### 1. 安装依赖
|
||||
|
||||
```bash
|
||||
mkdir -p ~/openclaw-memory-vector
|
||||
cd ~/openclaw-memory-vector
|
||||
pip install chromadb openai sqlalchemy
|
||||
```
|
||||
|
||||
### 2. 配置 API Key
|
||||
|
||||
```bash
|
||||
export SILICONFLOW_API_KEY="sk-fpjdtxbxrhtekshircjhegstloxaodriekotjdyzzktyegcl"
|
||||
```
|
||||
|
||||
### 3. 初始化系统
|
||||
|
||||
```python
|
||||
import sys
|
||||
sys.path.insert(0, '~/openclaw-memory-vector/scripts')
|
||||
from vector_memory import VectorMemorySystem
|
||||
|
||||
vm = VectorMemorySystem(
|
||||
persist_dir="./data/memory",
|
||||
api_key="your_api_key"
|
||||
)
|
||||
```
|
||||
|
||||
## 核心脚本
|
||||
|
||||
### scripts/vector_memory.py
|
||||
向量存储引擎,包含 `add_memory()` 和 `search()` 方法。详见 [references/core.md](references/core.md)。
|
||||
|
||||
### scripts/memory_tier_manager.py
|
||||
记忆分层管理,自动将记忆分为 core/hot/cold 三层。
|
||||
|
||||
### scripts/openclaw_integration.py
|
||||
OpenClaw 集成接口,提供 `get_memory_system()` 单例模式。
|
||||
|
||||
## 数据备份
|
||||
|
||||
备份 `~/openclaw-memory-vector/data/memory/` 整个目录:
|
||||
- `memory.db` - SQLite 数据库(原始文本)
|
||||
- `chroma/` - Chroma 向量索引
|
||||
|
||||
## 成本
|
||||
|
||||
- BGE-M3 向量:**免费无限**
|
||||
- 硅基流动大模型:2000万 Tokens/月
|
||||
- **总成本:≈ ¥0**
|
||||
|
||||
## 触发词
|
||||
|
||||
- "部署向量记忆"
|
||||
- "开启语义搜索"
|
||||
- "向量备份"
|
||||
@@ -0,0 +1,71 @@
|
||||
# 向量记忆系统 - 核心模块详解
|
||||
|
||||
## VectorMemorySystem 核心方法
|
||||
|
||||
### add_memory(content, metadata, importance)
|
||||
同时写入向量库 + SQLite
|
||||
|
||||
```python
|
||||
vm.add_memory(
|
||||
content="用户喜欢喝不加糖的咖啡",
|
||||
metadata={"category": "preference", "tags": ["咖啡", "口味"]},
|
||||
importance=4 # >=4 核心记忆
|
||||
)
|
||||
```
|
||||
|
||||
### search(query, top_k)
|
||||
语义搜索,返回相似记忆
|
||||
|
||||
```python
|
||||
results = vm.search("股票预警")
|
||||
# 返回: [{id, content, distance, metadata}, ...]
|
||||
```
|
||||
|
||||
### hybrid_search(query, keyword, top_k)
|
||||
混合搜索:语义 + 关键词过滤
|
||||
|
||||
```python
|
||||
results = vm.hybrid_search("铜陵", keyword="有色")
|
||||
```
|
||||
|
||||
## MemoryTierManager 分层规则
|
||||
|
||||
| 重要性 | 层级 | 说明 |
|
||||
|--------|------|------|
|
||||
| >= 4 | core | 永久记忆,不删除 |
|
||||
| 2-3 | hot | 常用记忆,30天后可归档 |
|
||||
| < 2 | cold | 冷记忆,自动归档 |
|
||||
|
||||
## 数据存储位置
|
||||
|
||||
```
|
||||
~/openclaw-memory-vector/data/memory/
|
||||
├── memory.db # SQLite(所有记忆的原始文本)
|
||||
└── chroma/ # Chroma(向量索引)
|
||||
├── *.bin # 向量数据
|
||||
└── *.sqlite # Chroma 元数据
|
||||
```
|
||||
|
||||
## 备份与恢复
|
||||
|
||||
### 备份
|
||||
```bash
|
||||
tar -czvf openclaw-memory-vector.tar.gz ~/openclaw-memory-vector/data/memory/
|
||||
```
|
||||
|
||||
### 恢复
|
||||
```bash
|
||||
tar -xzvf openclaw-memory-vector.tar.gz -C ~/
|
||||
```
|
||||
|
||||
## 环境变量
|
||||
|
||||
| 变量 | 说明 |
|
||||
|------|------|
|
||||
| SILICONFLOW_API_KEY | 硅基流动 API Key |
|
||||
|
||||
## 成本估算
|
||||
|
||||
- BGE-M3 向量模型:**免费无限**
|
||||
- 硅基流动大模型:2000万 Tokens/月
|
||||
- **总成本:≈ ¥0**
|
||||
@@ -0,0 +1,99 @@
|
||||
# memory_tier_manager.py - 记忆分层管理器
|
||||
# 自动将记忆分为 core/hot/cold 三层
|
||||
|
||||
import sqlite3
|
||||
from datetime import datetime, timedelta
|
||||
from vector_memory import VectorMemorySystem
|
||||
|
||||
|
||||
class MemoryTierManager:
|
||||
"""记忆分层管理器"""
|
||||
|
||||
def __init__(self, vector_memory: VectorMemorySystem):
|
||||
self.vm = vector_memory
|
||||
self.conn = vector_memory.conn
|
||||
|
||||
def add_with_tier(self, content: str, importance: int = 3,
|
||||
tags: list = None, auto_archive: bool = True):
|
||||
"""自动分层添加记忆"""
|
||||
metadata = {
|
||||
'tags': tags or [],
|
||||
'importance': importance,
|
||||
'auto_archive': auto_archive
|
||||
}
|
||||
|
||||
memory_id = self.vm.add_memory(
|
||||
content=content,
|
||||
metadata=metadata,
|
||||
importance=importance
|
||||
)
|
||||
|
||||
# 根据重要性自动分层
|
||||
if importance >= 4:
|
||||
tier = "core"
|
||||
elif importance >= 2:
|
||||
tier = "hot"
|
||||
else:
|
||||
tier = "cold"
|
||||
|
||||
# 标记层级
|
||||
self.conn.execute(
|
||||
"UPDATE memories SET tier=? WHERE id=?",
|
||||
(tier, memory_id)
|
||||
)
|
||||
self.conn.commit()
|
||||
|
||||
return memory_id
|
||||
|
||||
def get_recent_memories(self, days: int = 7, limit: int = 20):
|
||||
"""获取最近记忆"""
|
||||
cursor = self.conn.execute("""
|
||||
SELECT id, content, metadata, importance, created_at
|
||||
FROM memories
|
||||
ORDER BY created_at DESC
|
||||
LIMIT ?
|
||||
""", (limit,))
|
||||
|
||||
return [{
|
||||
'id': row[0],
|
||||
'content': row[1],
|
||||
'metadata': row[2],
|
||||
'importance': row[3],
|
||||
'created_at': row[4]
|
||||
} for row in cursor.fetchall()]
|
||||
|
||||
def get_core_memories(self):
|
||||
"""获取核心记忆(重要性 >= 4)"""
|
||||
cursor = self.conn.execute("""
|
||||
SELECT id, content, metadata, importance, created_at
|
||||
FROM memories
|
||||
WHERE importance >= 4
|
||||
ORDER BY created_at DESC
|
||||
""")
|
||||
|
||||
return [{
|
||||
'id': row[0],
|
||||
'content': row[1],
|
||||
'metadata': row[2],
|
||||
'importance': row[3],
|
||||
'created_at': row[4]
|
||||
} for row in cursor.fetchall()]
|
||||
|
||||
def migrate_old_memories(self, hot_days: int = 30):
|
||||
"""迁移旧记忆到冷存储"""
|
||||
cutoff = datetime.now() - timedelta(days=hot_days)
|
||||
|
||||
# 找出需要归档的记忆
|
||||
cursor = self.conn.execute("""
|
||||
SELECT id, content, metadata
|
||||
FROM memories
|
||||
WHERE importance < 3
|
||||
AND created_at < ?
|
||||
""", (cutoff,))
|
||||
|
||||
archived = 0
|
||||
for row in cursor.fetchall():
|
||||
# 可以在这里实现归档逻辑(如写入文件、压缩等)
|
||||
archived += 1
|
||||
|
||||
return archived
|
||||
@@ -0,0 +1,77 @@
|
||||
# openclaw_integration.py - OpenClaw 集成接口
|
||||
# 提供单例模式的记忆系统访问
|
||||
|
||||
from vector_memory import VectorMemorySystem
|
||||
from memory_tier_manager import MemoryTierManager
|
||||
import os
|
||||
|
||||
# 初始化(单例模式)
|
||||
_memory_system = None
|
||||
_tier_manager = None
|
||||
|
||||
|
||||
def get_memory_system():
|
||||
"""获取记忆系统单例"""
|
||||
global _memory_system
|
||||
|
||||
if _memory_system is None:
|
||||
api_key = os.getenv("SILICONFLOW_API_KEY")
|
||||
if not api_key:
|
||||
raise ValueError("请设置 SILICONFLOW_API_KEY 环境变量")
|
||||
|
||||
_memory_system = VectorMemorySystem(
|
||||
persist_dir="./data/memory",
|
||||
api_key=api_key
|
||||
)
|
||||
|
||||
return _memory_system
|
||||
|
||||
|
||||
def get_tier_manager():
|
||||
"""获取分层管理器单例"""
|
||||
global _tier_manager
|
||||
|
||||
if _tier_manager is None:
|
||||
vm = get_memory_system()
|
||||
_tier_manager = MemoryTierManager(vm)
|
||||
|
||||
return _tier_manager
|
||||
|
||||
|
||||
def search_memory(query: str, top_k: int = 5):
|
||||
"""搜索记忆 - 供 OpenClaw 调用"""
|
||||
vm = get_memory_system()
|
||||
return vm.search(query, top_k)
|
||||
|
||||
|
||||
def add_memory(content: str, importance: int = 3, tags: list = None):
|
||||
"""添加记忆 - 供 OpenClaw 调用"""
|
||||
mtm = get_tier_manager()
|
||||
return mtm.add_with_tier(content, importance, tags)
|
||||
|
||||
|
||||
def get_all_memories(limit: int = 50):
|
||||
"""获取所有记忆"""
|
||||
mtm = get_tier_manager()
|
||||
return mtm.get_recent_memories(limit=limit)
|
||||
|
||||
|
||||
def get_core_memories():
|
||||
"""获取核心记忆"""
|
||||
mtm = get_tier_manager()
|
||||
return mtm.get_core_memories()
|
||||
|
||||
|
||||
# 使用示例
|
||||
if __name__ == "__main__":
|
||||
# 添加记忆
|
||||
add_memory(
|
||||
content="2026-03-21: 部署了向量记忆系统,采用硅基流动 BGE-M3 + Chroma + SQLite 架构",
|
||||
importance=4,
|
||||
tags=["向量记忆", "系统部署", "硅基流动"]
|
||||
)
|
||||
|
||||
# 搜索记忆
|
||||
results = search_memory("记忆系统")
|
||||
for r in results:
|
||||
print(f"- {r['content'][:50]}... (相似度: {1-r['distance']:.2%})")
|
||||
@@ -0,0 +1,148 @@
|
||||
# vector_memory.py - 向量存储引擎
|
||||
# BGE-M3 + Chroma + SQLite 架构
|
||||
|
||||
import chromadb
|
||||
from chromadb.config import Settings
|
||||
from openai import OpenAI
|
||||
import sqlite3
|
||||
import json
|
||||
from datetime import datetime
|
||||
|
||||
|
||||
class VectorMemorySystem:
|
||||
def __init__(self, persist_dir="./data", api_key: str = None):
|
||||
"""初始化向量记忆系统"""
|
||||
|
||||
# 1. 初始化硅基流动客户端
|
||||
self.client = OpenAI(
|
||||
api_key=api_key,
|
||||
base_url="https://api.siliconflow.cn/v1"
|
||||
)
|
||||
|
||||
# 2. 初始化 Chroma 向量库
|
||||
self.chroma = chromadb.Client(Settings(
|
||||
persist_directory=persist_dir,
|
||||
anonymized_telemetry=False
|
||||
))
|
||||
self.collection = self.chroma.get_or_create_collection(
|
||||
name="openclaw_memory",
|
||||
metadata={"description": "OpenClaw long-term memory"}
|
||||
)
|
||||
|
||||
# 3. 初始化 SQLite(用于持久化)
|
||||
self.db_path = f"{persist_dir}/memory.db"
|
||||
self._init_sqlite()
|
||||
|
||||
def _init_sqlite(self):
|
||||
"""初始化 SQLite 数据库"""
|
||||
self.conn = sqlite3.connect(self.db_path)
|
||||
self.conn.execute("""
|
||||
CREATE TABLE IF NOT EXISTS memories (
|
||||
id TEXT PRIMARY KEY,
|
||||
content TEXT NOT NULL,
|
||||
metadata TEXT,
|
||||
importance INTEGER DEFAULT 3,
|
||||
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
||||
)
|
||||
""")
|
||||
self.conn.execute("""
|
||||
CREATE INDEX IF NOT EXISTS idx_importance ON memories(importance)
|
||||
""")
|
||||
self.conn.execute("""
|
||||
CREATE INDEX IF NOT EXISTS idx_created_at ON memories(created_at)
|
||||
""")
|
||||
self.conn.commit()
|
||||
|
||||
def _get_embedding(self, text: str) -> list:
|
||||
"""调用 BGE-M3 获取向量"""
|
||||
response = self.client.embeddings.create(
|
||||
model="BAAI/bge-m3",
|
||||
input=text
|
||||
)
|
||||
return response.data[0].embedding
|
||||
|
||||
def add_memory(self, content: str, metadata: dict = None, importance: int = 3):
|
||||
"""添加记忆(同时写入向量库 + SQLite)"""
|
||||
import uuid
|
||||
memory_id = str(uuid.uuid4())
|
||||
|
||||
# 1. 生成向量并存储
|
||||
embedding = self._get_embedding(content)
|
||||
self.collection.add(
|
||||
ids=[memory_id],
|
||||
embeddings=[embedding],
|
||||
documents=[content],
|
||||
metadatas=[metadata or {}]
|
||||
)
|
||||
|
||||
# 2. 写入 SQLite 持久化
|
||||
self.conn.execute(
|
||||
"""INSERT INTO memories (id, content, metadata, importance)
|
||||
VALUES (?, ?, ?, ?)""",
|
||||
(memory_id, content, json.dumps(metadata), importance)
|
||||
)
|
||||
self.conn.commit()
|
||||
|
||||
return memory_id
|
||||
|
||||
def search(self, query: str, top_k: int = 5) -> list:
|
||||
"""语义搜索"""
|
||||
# 1. 查询向量
|
||||
query_embedding = self._get_embedding(query)
|
||||
|
||||
# 2. 向量相似度搜索
|
||||
results = self.collection.query(
|
||||
query_embeddings=[query_embedding],
|
||||
n_results=top_k
|
||||
)
|
||||
|
||||
# 3. 格式化返回
|
||||
memories = []
|
||||
for i, doc in enumerate(results['documents'][0]):
|
||||
memories.append({
|
||||
'id': results['ids'][0][i],
|
||||
'content': doc,
|
||||
'distance': results['distances'][0][i],
|
||||
'metadata': results['metadatas'][0][i]
|
||||
})
|
||||
|
||||
return memories
|
||||
|
||||
def hybrid_search(self, query: str, keyword: str = None, top_k: int = 5):
|
||||
"""混合搜索:语义 + 关键词"""
|
||||
# 1. 向量搜索
|
||||
vector_results = self.search(query, top_k * 2)
|
||||
|
||||
# 2. 关键词过滤(可选)
|
||||
if keyword:
|
||||
vector_results = [
|
||||
r for r in vector_results
|
||||
if keyword in r['content']
|
||||
]
|
||||
|
||||
return vector_results[:top_k]
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import os
|
||||
|
||||
api_key = os.getenv("SILICONFLOW_API_KEY")
|
||||
if not api_key:
|
||||
print("请设置 SILICONFLOW_API_KEY 环境变量")
|
||||
exit(1)
|
||||
|
||||
vm = VectorMemorySystem(persist_dir="./data/memory", api_key=api_key)
|
||||
|
||||
# 测试添加
|
||||
memory_id = vm.add_memory(
|
||||
content="2026-03-21: 部署了向量记忆系统",
|
||||
metadata={"tags": ["系统部署"]},
|
||||
importance=4
|
||||
)
|
||||
print(f"添加记忆成功: {memory_id}")
|
||||
|
||||
# 测试搜索
|
||||
results = vm.search("记忆系统")
|
||||
for r in results:
|
||||
print(f"- {r['content'][:50]}... (相似度: {1-r['distance']:.2%})")
|
||||
Reference in New Issue
Block a user