� backup: 2026-03-24 04:00

2026-03-24 04:00:48 +08:00
parent 7e143d3ebc
commit 31786dee08
193 changed files with 73520 additions and 1915 deletions
@@ -0,0 +1,85 @@
+---
+name: vector-memory
+description: |
+  向量语义记忆系统 - 为 OpenClaw 添加语义搜索能力。当用户需要：
+  (1) 部署向量记忆系统
+  (2) 开启语义搜索功能
+  (3) 安装配置 Chroma + BGE-M3
+  (4) 搜索记忆时找不到内容
+  (5) 需要比关键词搜索更智能的记忆检索
+---
+
+# Vector Memory Skill
+
+## 功能概述
+
+为 OpenClaw 添加**向量语义搜索**能力，解决纯 Markdown 记忆的搜索痛点：
+- 搜"股票"能找到"A股监控"、"铜陵有色"
+- 支持同义词、近义词理解
+- 记忆无限扩展，不受上下文窗口限制
+
+## 技术架构
+
+| 组件 | 选型 | 说明 |
+|------|------|------|
+| 向量模型 | BGE-M3 (硅基流动) | 中文优化好，向量免费 |
+| 向量数据库 | Chroma | 轻量，Python 原生 |
+| 持久化 | SQLite | 并发安全 |
+
+## 快速部署
+
+### 1. 安装依赖
+
+```bash
+mkdir -p ~/openclaw-memory-vector
+cd ~/openclaw-memory-vector
+pip install chromadb openai sqlalchemy
+```
+
+### 2. 配置 API Key
+
+```bash
+export SILICONFLOW_API_KEY="sk-fpjdtxbxrhtekshircjhegstloxaodriekotjdyzzktyegcl"
+```
+
+### 3. 初始化系统
+
+```python
+import sys
+sys.path.insert(0, '~/openclaw-memory-vector/scripts')
+from vector_memory import VectorMemorySystem
+
+vm = VectorMemorySystem(
+    persist_dir="./data/memory",
+    api_key="your_api_key"
+)
+```
+
+## 核心脚本
+
+### scripts/vector_memory.py
+向量存储引擎，包含 `add_memory()` 和 `search()` 方法。详见 [references/core.md](references/core.md)。
+
+### scripts/memory_tier_manager.py
+记忆分层管理，自动将记忆分为 core/hot/cold 三层。
+
+### scripts/openclaw_integration.py
+OpenClaw 集成接口，提供 `get_memory_system()` 单例模式。
+
+## 数据备份
+
+备份 `~/openclaw-memory-vector/data/memory/` 整个目录：
+- `memory.db` - SQLite 数据库（原始文本）
+- `chroma/` - Chroma 向量索引
+
+## 成本
+
+- BGE-M3 向量：**免费无限**
+- 硅基流动大模型：2000万 Tokens/月
+- **总成本：≈ ¥0**
+
+## 触发词
+
+- "部署向量记忆"
+- "开启语义搜索"
+- "向量备份"
@@ -0,0 +1,71 @@
+# 向量记忆系统 - 核心模块详解
+
+## VectorMemorySystem 核心方法
+
+### add_memory(content, metadata, importance)
+同时写入向量库 + SQLite
+
+```python
+vm.add_memory(
+    content="用户喜欢喝不加糖的咖啡",
+    metadata={"category": "preference", "tags": ["咖啡", "口味"]},
+    importance=4  # >=4 核心记忆
+)
+```
+
+### search(query, top_k)
+语义搜索，返回相似记忆
+
+```python
+results = vm.search("股票预警")
+# 返回: [{id, content, distance, metadata}, ...]
+```
+
+### hybrid_search(query, keyword, top_k)
+混合搜索：语义 + 关键词过滤
+
+```python
+results = vm.hybrid_search("铜陵", keyword="有色")
+```
+
+## MemoryTierManager 分层规则
+
+| 重要性 | 层级 | 说明 |
+|--------|------|------|
+| >= 4 | core | 永久记忆，不删除 |
+| 2-3 | hot | 常用记忆，30天后可归档 |
+| < 2 | cold | 冷记忆，自动归档 |
+
+## 数据存储位置
+
+```
+~/openclaw-memory-vector/data/memory/
+├── memory.db         # SQLite（所有记忆的原始文本）
+└── chroma/          # Chroma（向量索引）
+    ├── *.bin        # 向量数据
+    └── *.sqlite     # Chroma 元数据
+```
+
+## 备份与恢复
+
+### 备份
+```bash
+tar -czvf openclaw-memory-vector.tar.gz ~/openclaw-memory-vector/data/memory/
+```
+
+### 恢复
+```bash
+tar -xzvf openclaw-memory-vector.tar.gz -C ~/
+```
+
+## 环境变量
+
+| 变量 | 说明 |
+|------|------|
+| SILICONFLOW_API_KEY | 硅基流动 API Key |
+
+## 成本估算
+
+- BGE-M3 向量模型：**免费无限**
+- 硅基流动大模型：2000万 Tokens/月
+- **总成本：≈ ¥0**
@@ -0,0 +1,99 @@
+# memory_tier_manager.py - 记忆分层管理器
+# 自动将记忆分为 core/hot/cold 三层
+
+import sqlite3
+from datetime import datetime, timedelta
+from vector_memory import VectorMemorySystem
+
+
+class MemoryTierManager:
+    """记忆分层管理器"""
+    
+    def __init__(self, vector_memory: VectorMemorySystem):
+        self.vm = vector_memory
+        self.conn = vector_memory.conn
+    
+    def add_with_tier(self, content: str, importance: int = 3, 
+                      tags: list = None, auto_archive: bool = True):
+        """自动分层添加记忆"""
+        metadata = {
+            'tags': tags or [],
+            'importance': importance,
+            'auto_archive': auto_archive
+        }
+        
+        memory_id = self.vm.add_memory(
+            content=content,
+            metadata=metadata,
+            importance=importance
+        )
+        
+        # 根据重要性自动分层
+        if importance >= 4:
+            tier = "core"
+        elif importance >= 2:
+            tier = "hot"
+        else:
+            tier = "cold"
+        
+        # 标记层级
+        self.conn.execute(
+            "UPDATE memories SET tier=? WHERE id=?",
+            (tier, memory_id)
+        )
+        self.conn.commit()
+        
+        return memory_id
+    
+    def get_recent_memories(self, days: int = 7, limit: int = 20):
+        """获取最近记忆"""
+        cursor = self.conn.execute("""
+            SELECT id, content, metadata, importance, created_at 
+            FROM memories 
+            ORDER BY created_at DESC 
+            LIMIT ?
+        """, (limit,))
+        
+        return [{
+            'id': row[0],
+            'content': row[1],
+            'metadata': row[2],
+            'importance': row[3],
+            'created_at': row[4]
+        } for row in cursor.fetchall()]
+    
+    def get_core_memories(self):
+        """获取核心记忆（重要性 >= 4）"""
+        cursor = self.conn.execute("""
+            SELECT id, content, metadata, importance, created_at 
+            FROM memories 
+            WHERE importance >= 4
+            ORDER BY created_at DESC
+        """)
+        
+        return [{
+            'id': row[0],
+            'content': row[1],
+            'metadata': row[2],
+            'importance': row[3],
+            'created_at': row[4]
+        } for row in cursor.fetchall()]
+    
+    def migrate_old_memories(self, hot_days: int = 30):
+        """迁移旧记忆到冷存储"""
+        cutoff = datetime.now() - timedelta(days=hot_days)
+        
+        # 找出需要归档的记忆
+        cursor = self.conn.execute("""
+            SELECT id, content, metadata 
+            FROM memories 
+            WHERE importance < 3
+            AND created_at < ?
+        """, (cutoff,))
+        
+        archived = 0
+        for row in cursor.fetchall():
+            # 可以在这里实现归档逻辑（如写入文件、压缩等）
+            archived += 1
+        
+        return archived
@@ -0,0 +1,77 @@
+# openclaw_integration.py - OpenClaw 集成接口
+# 提供单例模式的记忆系统访问
+
+from vector_memory import VectorMemorySystem
+from memory_tier_manager import MemoryTierManager
+import os
+
+# 初始化（单例模式）
+_memory_system = None
+_tier_manager = None
+
+
+def get_memory_system():
+    """获取记忆系统单例"""
+    global _memory_system
+    
+    if _memory_system is None:
+        api_key = os.getenv("SILICONFLOW_API_KEY")
+        if not api_key:
+            raise ValueError("请设置 SILICONFLOW_API_KEY 环境变量")
+        
+        _memory_system = VectorMemorySystem(
+            persist_dir="./data/memory",
+            api_key=api_key
+        )
+    
+    return _memory_system
+
+
+def get_tier_manager():
+    """获取分层管理器单例"""
+    global _tier_manager
+    
+    if _tier_manager is None:
+        vm = get_memory_system()
+        _tier_manager = MemoryTierManager(vm)
+    
+    return _tier_manager
+
+
+def search_memory(query: str, top_k: int = 5):
+    """搜索记忆 - 供 OpenClaw 调用"""
+    vm = get_memory_system()
+    return vm.search(query, top_k)
+
+
+def add_memory(content: str, importance: int = 3, tags: list = None):
+    """添加记忆 - 供 OpenClaw 调用"""
+    mtm = get_tier_manager()
+    return mtm.add_with_tier(content, importance, tags)
+
+
+def get_all_memories(limit: int = 50):
+    """获取所有记忆"""
+    mtm = get_tier_manager()
+    return mtm.get_recent_memories(limit=limit)
+
+
+def get_core_memories():
+    """获取核心记忆"""
+    mtm = get_tier_manager()
+    return mtm.get_core_memories()
+
+
+# 使用示例
+if __name__ == "__main__":
+    # 添加记忆
+    add_memory(
+        content="2026-03-21: 部署了向量记忆系统，采用硅基流动 BGE-M3 + Chroma + SQLite 架构",
+        importance=4,
+        tags=["向量记忆", "系统部署", "硅基流动"]
+    )
+    
+    # 搜索记忆
+    results = search_memory("记忆系统")
+    for r in results:
+        print(f"- {r['content'][:50]}... (相似度: {1-r['distance']:.2%})")
@@ -0,0 +1,148 @@
+# vector_memory.py - 向量存储引擎
+# BGE-M3 + Chroma + SQLite 架构
+
+import chromadb
+from chromadb.config import Settings
+from openai import OpenAI
+import sqlite3
+import json
+from datetime import datetime
+
+
+class VectorMemorySystem:
+    def __init__(self, persist_dir="./data", api_key: str = None):
+        """初始化向量记忆系统"""
+        
+        # 1. 初始化硅基流动客户端
+        self.client = OpenAI(
+            api_key=api_key,
+            base_url="https://api.siliconflow.cn/v1"
+        )
+        
+        # 2. 初始化 Chroma 向量库
+        self.chroma = chromadb.Client(Settings(
+            persist_directory=persist_dir,
+            anonymized_telemetry=False
+        ))
+        self.collection = self.chroma.get_or_create_collection(
+            name="openclaw_memory",
+            metadata={"description": "OpenClaw long-term memory"}
+        )
+        
+        # 3. 初始化 SQLite（用于持久化）
+        self.db_path = f"{persist_dir}/memory.db"
+        self._init_sqlite()
+    
+    def _init_sqlite(self):
+        """初始化 SQLite 数据库"""
+        self.conn = sqlite3.connect(self.db_path)
+        self.conn.execute("""
+            CREATE TABLE IF NOT EXISTS memories (
+                id TEXT PRIMARY KEY,
+                content TEXT NOT NULL,
+                metadata TEXT,
+                importance INTEGER DEFAULT 3,
+                created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+                updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+            )
+        """)
+        self.conn.execute("""
+            CREATE INDEX IF NOT EXISTS idx_importance ON memories(importance)
+        """)
+        self.conn.execute("""
+            CREATE INDEX IF NOT EXISTS idx_created_at ON memories(created_at)
+        """)
+        self.conn.commit()
+    
+    def _get_embedding(self, text: str) -> list:
+        """调用 BGE-M3 获取向量"""
+        response = self.client.embeddings.create(
+            model="BAAI/bge-m3",
+            input=text
+        )
+        return response.data[0].embedding
+    
+    def add_memory(self, content: str, metadata: dict = None, importance: int = 3):
+        """添加记忆（同时写入向量库 + SQLite）"""
+        import uuid
+        memory_id = str(uuid.uuid4())
+        
+        # 1. 生成向量并存储
+        embedding = self._get_embedding(content)
+        self.collection.add(
+            ids=[memory_id],
+            embeddings=[embedding],
+            documents=[content],
+            metadatas=[metadata or {}]
+        )
+        
+        # 2. 写入 SQLite 持久化
+        self.conn.execute(
+            """INSERT INTO memories (id, content, metadata, importance) 
+               VALUES (?, ?, ?, ?)""",
+            (memory_id, content, json.dumps(metadata), importance)
+        )
+        self.conn.commit()
+        
+        return memory_id
+    
+    def search(self, query: str, top_k: int = 5) -> list:
+        """语义搜索"""
+        # 1. 查询向量
+        query_embedding = self._get_embedding(query)
+        
+        # 2. 向量相似度搜索
+        results = self.collection.query(
+            query_embeddings=[query_embedding],
+            n_results=top_k
+        )
+        
+        # 3. 格式化返回
+        memories = []
+        for i, doc in enumerate(results['documents'][0]):
+            memories.append({
+                'id': results['ids'][0][i],
+                'content': doc,
+                'distance': results['distances'][0][i],
+                'metadata': results['metadatas'][0][i]
+            })
+        
+        return memories
+    
+    def hybrid_search(self, query: str, keyword: str = None, top_k: int = 5):
+        """混合搜索：语义 + 关键词"""
+        # 1. 向量搜索
+        vector_results = self.search(query, top_k * 2)
+        
+        # 2. 关键词过滤（可选）
+        if keyword:
+            vector_results = [
+                r for r in vector_results 
+                if keyword in r['content']
+            ]
+        
+        return vector_results[:top_k]
+
+
+if __name__ == "__main__":
+    import os
+    
+    api_key = os.getenv("SILICONFLOW_API_KEY")
+    if not api_key:
+        print("请设置 SILICONFLOW_API_KEY 环境变量")
+        exit(1)
+    
+    vm = VectorMemorySystem(persist_dir="./data/memory", api_key=api_key)
+    
+    # 测试添加
+    memory_id = vm.add_memory(
+        content="2026-03-21: 部署了向量记忆系统",
+        metadata={"tags": ["系统部署"]},
+        importance=4
+    )
+    print(f"添加记忆成功: {memory_id}")
+    
+    # 测试搜索
+    results = vm.search("记忆系统")
+    for r in results:
+        print(f"- {r['content'][:50]}... (相似度: {1-r['distance']:.2%})")