向量数据库与记忆系统:为 AI 赋予长期记忆能力


文档摘要

向量数据库与记忆系统:为 AI 赋予长期记忆能力 AI 记忆系统概述 为什么需要记忆? LLM 的局限性: 上下文窗口有限 无法跨对话持久化信息 无法从海量数据中快速检索 记忆系统的价值: 持久化知识 快速检索 跨会话记忆 个性化体验 记忆层级 感知记忆(Episodic Memory) 语义记忆(Semantic Memory) 程序记忆(Procedural Memory) 向量数据库 Chroma Pinecone Weaviate 记忆检索 语义搜索 混合检索 记忆管理 记忆巩固 记忆遗忘 实战案例 个人助理 知识库问答 性能优化 增量索引 缓存策略 最佳实践 分层存储:热数据在内存,温数据在 Redis,冷数据在磁盘 定期维护:巩固重要记忆,遗忘无关记忆

向量数据库与记忆系统:为 AI 赋予长期记忆能力

AI 记忆系统概述

为什么需要记忆?

LLM 的局限性

  • 上下文窗口有限
  • 无法跨对话持久化信息
  • 无法从海量数据中快速检索

记忆系统的价值

  • 持久化知识
  • 快速检索
  • 跨会话记忆
  • 个性化体验

记忆层级

1. 感知记忆(Episodic Memory)

class EpisodicMemory: """情景记忆:存储具体的事件和经验""" def __init__(self, max_episodes=1000): self.episodes = [] self.max_episodes = max_episodes def add_episode(self, episode): """添加情景记忆""" self.episodes.append({ 'timestamp': time.time(), 'content': episode, 'embeddings': self.embed(episode) }) # 限制记忆数量 if len(self.episodes) > self.max_episodes: self.episodes.pop(0) def retrieve(self, query, top_k=5): """检索相关情景""" query_embedding = self.embed(query) scores = [ self.cosine_similarity(query_embedding, ep['embeddings']) for ep in self.episodes ] top_indices = np.argsort(scores)[-top_k:][::-1] return [self.episodes[i] for i in top_indices]

2. 语义记忆(Semantic Memory)

class SemanticMemory: """语义记忆:存储通用知识和概念""" def __init__(self, vector_store): self.vector_store = vector_store def add_knowledge(self, knowledge, metadata): """添加知识""" embedding = self.embed(knowledge) self.vector_store.add( embedding=embedding, metadata={'knowledge': knowledge, **metadata} ) def search(self, query, top_k=5): """搜索相关知识""" query_embedding = self.embed(query) return self.vector_store.search( embedding=query_embedding, top_k=top_k )

3. 程序记忆(Procedural Memory)

class ProceduralMemory: """程序记忆:存储技能和操作流程""" def __init__(self): self.skills = {} def add_skill(self, skill_name, skill_steps): """添加技能""" self.skills[skill_name] = { 'steps': skill_steps, 'embeddings': self.embed(skill_name) } def execute_skill(self, skill_name, inputs): """执行技能""" if skill_name not in self.skills: raise ValueError(f"Unknown skill: {skill_name}") skill = self.skills[skill_name] return self.run_steps(skill['steps'], inputs)

向量数据库

1. Chroma

import chromadb # 创建客户端 client = chromadb.Client() # 创建集合 collection = client.create_collection("documents") # 添加文档 collection.add( documents=["AI 是人工智能", "机器学习是 AI 的子集"], metadatas=[{"source": "textbook"}, {"source": "blog"}], ids=["doc1", "doc2"] ) # 查询 results = collection.query( query_texts=["什么是人工智能"], n_results=2 ) print(results)

2. Pinecone

import pinecone # 初始化 pinecone.init( api_key="your-api-key", environment="us-west1-gcp" ) # 创建索引 index_name = "ai-memory" if index_name not in pinecone.list_indexes(): pinecone.create_index( name=index_name, dimension=1536, # OpenAI embedding 维度 metric="cosine" ) # 连接索引 index = pinecone.Index(index_name) # 插入向量 vectors = [ ("vec1", [0.1] * 1536, {"text": "AI 技术"}), ("vec2", [0.2] * 1536, {"text": "机器学习"}) ] index.upsert(vectors=vectors) # 查询 query_vector = [0.15] * 1536 results = index.query( vector=query_vector, top_k=5 )

3. Weaviate

import weaviate # 连接 client = weaviate.Client("http://localhost:8080") # 创建类 client.schema.create_class({ "class": "Document", "properties": [ {"name": "content", "dataType": ["text"]}, {"name": "embedding", "dataType": ["number[]"]} ] }) # 添加对象 data_object = { "content": "向量数据库用于 AI 记忆", "embedding": embedding.tolist() } client.data_object.create( data_object=data_object, class_name="Document" ) # 向量搜索 results = client.query.get( class_name="Document", near_vector=embedding.tolist(), limit=5 )

记忆检索

1. 语义搜索

class MemoryRetriever: def __init__(self, vector_store, llm): self.vector_store = vector_store self.llm = llm def retrieve(self, query, top_k=5): """检索相关记忆""" # 嵌入查询 query_embedding = self.embed(query) # 向量搜索 results = self.vector_store.search( embedding=query_embedding, top_k=top_k ) # 重排序(可选) reranked_results = self.rerank(query, results) return reranked_results def rerank(self, query, results): """使用 LLM 重排序""" # 构建提示词 prompt = f""" Query: {query} Candidates: {self.format_results(results)} Please rank these candidates by relevance to the query. Return only the ranked indices. """ # 获取排序 response = self.llm.generate(prompt) return self.parse_ranking(response, results)

2. 混合检索

class HybridRetriever: def __init__(self, vector_store, keyword_index, llm): self.vector_retriever = VectorRetriever(vector_store) self.keyword_retriever = KeywordRetriever(keyword_index) self.llm = llm def retrieve(self, query, top_k=5): """混合检索:向量 + 关键词""" # 向量检索 vector_results = self.vector_retriever.retrieve(query, top_k=top_k * 2) # 关键词检索 keyword_results = self.keyword_retriever.retrieve(query, top_k=top_k * 2) # 融合结果 combined = self.merge_results(vector_results, keyword_results) # 重排序 reranked = self.rerank(query, combined, top_k=top_k) return reranked def merge_results(self, vector_results, keyword_results): """融合两种检索结果""" scores = {} # 向量得分 for i, result in enumerate(vector_results): doc_id = result['id'] scores[doc_id] = scores.get(doc_id, 0) + (1 - i / len(vector_results)) * 0.5 # 关键词得分 for i, result in enumerate(keyword_results): doc_id = result['id'] scores[doc_id] = scores.get(doc_id, 0) + (1 - i / len(keyword_results)) * 0.5 # 排序 sorted_docs = sorted(scores.items(), key=lambda x: x[1], reverse=True) return [doc[0] for doc in sorted_docs]

记忆管理

1. 记忆巩固

class MemoryConsolidation: def __init__(self, vector_store, llm): self.vector_store = vector_store self.llm = llm def consolidate(self): """巩固记忆:合并相似的记忆""" # 获取所有记忆 all_memories = self.vector_store.get_all() # 聚类相似记忆 clusters = self.cluster_similar(all_memories) # 合并每个簇 for cluster_id, memories in clusters.items(): if len(memories) > 1: # 使用 LLM 生成总结 summary = self.summarize(memories) # 更新向量存储 for memory in memories: if memory['id'] != cluster_id: self.vector_store.delete(memory['id']) self.vector_store.add(summary, id=cluster_id) def cluster_similar(self, memories, threshold=0.9): """聚类相似记忆""" clusters = {} for memory in memories: # 找到相似的已存在簇 found_cluster = False for cluster_id, cluster_memories in clusters.items(): if self.similarity(memory, cluster_memories[0]) > threshold: clusters[cluster_id].append(memory) found_cluster = True break # 创建新簇 if not found_cluster: clusters[memory['id']] = [memory] return clusters

2. 记忆遗忘

class MemoryForgetting: def __init__(self, vector_store): self.vector_store = vector_store def forget_old_memories(self, days=30): """遗忘旧记忆""" cutoff_time = time.time() - (days * 24 * 3600) # 获取所有记忆 all_memories = self.vector_store.get_all() # 删除旧记忆 for memory in all_memories: if memory['timestamp'] < cutoff_time: # 检查重要性 if memory.get('importance', 0) < 0.5: self.vector_store.delete(memory['id']) def forget_less_accessed(self, access_threshold=10): """遗忘不常访问的记忆""" all_memories = self.vector_store.get_all() for memory in all_memories: if memory['access_count'] < access_threshold: if memory.get('importance', 0) < 0.7: self.vector_store.delete(memory['id'])

实战案例

1. 个人助理

class PersonalAssistant: def __init__(self, llm, memory_store): self.llm = llm self.episodic_memory = EpisodicMemory() self.semantic_memory = SemanticMemory(memory_store) self.procedural_memory = ProceduralMemory() def chat(self, user_input): # 检索相关记忆 relevant_memories = self.retrieve_memories(user_input) # 构建提示词 prompt = self.build_prompt(user_input, relevant_memories) # 生成回复 response = self.llm.generate(prompt) # 保存新记忆 self.episodic_memory.add_episode({ 'user_input': user_input, 'response': response }) return response def retrieve_memories(self, query): memories = [] memories.extend(self.episodic_memory.retrieve(query)) memories.extend(self.semantic_memory.search(query)) return memories[:5]

2. 知识库问答

class KnowledgeQA: def __init__(self, vector_store, llm): self.vector_store = vector_store self.llm = llm self.retriever = MemoryRetriever(vector_store, llm) def ask(self, question): # 检索相关文档 contexts = self.retriever.retrieve(question, top_k=3) # 构建提示词 prompt = f""" Context: {self.format_contexts(contexts)} Question: {question} Answer: """ # 生成答案 answer = self.llm.generate(prompt) return answer def format_contexts(self, contexts): return "\n".join([ f"- {ctx['content']}" for ctx in contexts ])

性能优化

1. 增量索引

class IncrementalIndexer: def __init__(self, vector_store): self.vector_store = vector_store self.pending_documents = [] def add_document(self, document): self.pending_documents.append({ 'document': document, 'timestamp': time.time() }) # 批量处理 if len(self.pending_documents) >= 100: self.flush() def flush(self): """批量索引""" documents = [doc['document'] for doc in self.pending_documents] # 批量嵌入 embeddings = self.embed_batch(documents) # 批量添加 self.vector_store.add_batch(documents, embeddings) self.pending_documents = []

2. 缓存策略

from functools import lru_cache class CachedRetriever: def __init__(self, retriever): self.retriever = retriever @lru_cache(maxsize=100) def retrieve(self, query, top_k=5): return self.retriever.retrieve(query, top_k)

最佳实践

  1. 分层存储:热数据在内存,温数据在 Redis,冷数据在磁盘
  2. 定期维护:巩固重要记忆,遗忘无关记忆
  3. 访问统计:记录访问频率,优化检索策略
  4. 并行处理:并行检索多个记忆库
  5. 隐私保护:敏感信息加密存储

总结

AI 记忆系统关键技术:

  1. 记忆类型:情景记忆、语义记忆、程序记忆
  2. 向量数据库:Chroma、Pinecone、Weaviate
  3. 检索策略:语义搜索、混合检索、重排序
  4. 记忆管理:巩固、遗忘、重要性评估
  5. 实际应用:个人助理、知识库问答、客服系统

为 AI 赋予记忆能力,让智能体更加强大!


发布者: 作者: 转发
评论区 (0)
U