January 21, 2026
8 min read
Standard chatbots forget everything after each session. Users have to re-explain context, repeat preferences, and start fresh every conversation. It's frustrating—and unnecessary.
Vector databases enable chatbots that remember. Not just the current conversation, but relevant information from all past interactions. This guide shows you how to build one.
GPT-4 and other language models have context windows—the amount of text they can consider at once. Even with 128K token contexts, you can't include every past conversation.
The solution: store conversations as embeddings and retrieve only what's relevant to the current query. This is Retrieval-Augmented Generation (RAG) applied to chat history.
User Message
↓
[Generate Embedding]
↓
[Search Memory] → Relevant Past Conversations
↓
[Build Context: System + Memory + Recent Messages]
↓
[Call LLM]
↓
[Store New Exchange in Memory]
↓
Response to User
Each memory entry contains:
{
id: 'unique-id',
userId: 'user-123',
sessionId: 'session-456',
role: 'user' | 'assistant',
content: 'The actual message text',
embedding: [0.1, 0.2, ...], // Vector representation
timestamp: '2024-01-15T10:30:00Z',
metadata: {
topic: 'product-support',
sentiment: 'positive',
// Any other useful tags
}
}
const { Pinecone } = require('@pinecone-database/pinecone');
const pinecone = new Pinecone({ apiKey: process.env.PINECONE_API_KEY });
// Create index if needed
async function initializeMemoryIndex() {
const indexes = await pinecone.listIndexes();
if (!indexes.includes('chat-memory')) {
await pinecone.createIndex({
name: 'chat-memory',
dimension: 1536,
metric: 'cosine',
});
}
return pinecone.index('chat-memory');
}
CREATE TABLE chat_memory (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
user_id TEXT NOT NULL,
session_id TEXT NOT NULL,
role TEXT NOT NULL,
content TEXT NOT NULL,
embedding vector(1536),
timestamp TIMESTAMPTZ DEFAULT now(),
metadata JSONB
);
CREATE INDEX ON chat_memory USING ivfflat (embedding vector_cosine_ops);
CREATE INDEX ON chat_memory (user_id);
const OpenAI = require('openai');
const openai = new OpenAI({ apiKey: process.env.OPENAI_API_KEY });
class MemoryStore {
constructor(vectorIndex) {
this.index = vectorIndex;
}
async store(userId, sessionId, role, content, metadata = {}) {
// Generate embedding
const response = await openai.embeddings.create({
model: 'text-embedding-3-small',
input: content,
});
const embedding = response.data[0].embedding;
// Store in vector database
await this.index.upsert([{
id: `${userId}-${Date.now()}`,
values: embedding,
metadata: {
userId,
sessionId,
role,
content,
timestamp: new Date().toISOString(),
...metadata,
},
}]);
}
async storeConversation(userId, sessionId, messages) {
for (const msg of messages) {
await this.store(userId, sessionId, msg.role, msg.content);
}
}
}
class MemoryStore {
// ... previous code ...
async retrieve(userId, query, limit = 5) {
// Generate query embedding
const response = await openai.embeddings.create({
model: 'text-embedding-3-small',
input: query,
});
const queryEmbedding = response.data[0].embedding;
// Search with user filter
const results = await this.index.query({
vector: queryEmbedding,
topK: limit,
filter: { userId: { $eq: userId } },
includeMetadata: true,
});
// Format as conversation snippets
return results.matches.map(m => ({
role: m.metadata.role,
content: m.metadata.content,
timestamp: m.metadata.timestamp,
relevance: m.score,
}));
}
async getRecentFromSession(userId, sessionId, limit = 10) {
// For current session, also get recent messages regardless of relevance
const results = await this.index.query({
vector: new Array(1536).fill(0), // Dummy vector
topK: limit,
filter: {
userId: { $eq: userId },
sessionId: { $eq: sessionId },
},
includeMetadata: true,
});
return results.matches
.sort((a, b) => new Date(a.metadata.timestamp) - new Date(b.metadata.timestamp))
.map(m => ({
role: m.metadata.role,
content: m.metadata.content,
}));
}
}
class ChatWithMemory {
constructor(memoryStore) {
this.memory = memoryStore;
this.systemPrompt = `You are a helpful assistant with access to conversation history.
Use the provided memory context to give personalized, consistent responses.
If you remember something relevant from past conversations, reference it naturally.
If you're unsure about past context, ask for clarification.`;
}
async chat(userId, sessionId, userMessage) {
// 1. Retrieve relevant memories
const relevantMemories = await this.memory.retrieve(userId, userMessage, 5);
// 2. Get recent messages from current session
const recentMessages = await this.memory.getRecentFromSession(
userId, sessionId, 6
);
// 3. Build context
const memoryContext = this.formatMemories(relevantMemories);
const messages = [
{ role: 'system', content: this.systemPrompt },
{ role: 'system', content: `Relevant memory from past conversations:\n${memoryContext}` },
...recentMessages,
{ role: 'user', content: userMessage },
];
// 4. Generate response
const response = await openai.chat.completions.create({
model: 'gpt-4o',
messages,
temperature: 0.7,
});
const assistantMessage = response.choices[0].message.content;
// 5. Store the exchange
await this.memory.store(userId, sessionId, 'user', userMessage);
await this.memory.store(userId, sessionId, 'assistant', assistantMessage);
return assistantMessage;
}
formatMemories(memories) {
if (memories.length === 0) return 'No relevant past conversations found.';
return memories
.map(m => `[${m.timestamp}] ${m.role}: ${m.content}`)
.join('\n');
}
}
Long-term memory should be condensed:
async function summarizeSession(sessionMessages) {
const conversation = sessionMessages
.map(m => `${m.role}: ${m.content}`)
.join('\n');
const response = await openai.chat.completions.create({
model: 'gpt-4o-mini',
messages: [{
role: 'user',
content: `Summarize this conversation, highlighting:
- Key topics discussed
- User preferences mentioned
- Important decisions or conclusions
- Any commitments or follow-ups
Conversation:
${conversation}`,
}],
});
return response.choices[0].message.content;
}
// Store summary instead of full conversation for old sessions
async function archiveSession(userId, sessionId, messages) {
const summary = await summarizeSession(messages);
await memoryStore.store(userId, sessionId, 'summary', summary, {
type: 'session-summary',
messageCount: messages.length,
});
}
Categorize memories for better retrieval:
const MEMORY_TYPES = {
PREFERENCE: 'preference', // User likes/dislikes
FACT: 'fact', // Information about user
DECISION: 'decision', // Choices made
CONVERSATION: 'conversation', // General chat
};
async function categorizeAndStore(userId, sessionId, message) {
// Use AI to categorize
const response = await openai.chat.completions.create({
model: 'gpt-4o-mini',
messages: [{
role: 'user',
content: `Categorize this message: "${message}"
Categories: preference, fact, decision, conversation
Return only the category name.`,
}],
});
const type = response.choices[0].message.content.trim().toLowerCase();
await memoryStore.store(userId, sessionId, 'user', message, {
memoryType: type,
});
}
Some information should always be available:
class UserProfile {
constructor(userId) {
this.userId = userId;
this.profile = {};
}
async update(key, value) {
this.profile[key] = value;
await redis.hset(`profile:${this.userId}`, key, JSON.stringify(value));
}
async load() {
const data = await redis.hgetall(`profile:${this.userId}`);
this.profile = Object.fromEntries(
Object.entries(data).map(([k, v]) => [k, JSON.parse(v)])
);
return this.profile;
}
toContext() {
return Object.entries(this.profile)
.map(([k, v]) => `- ${k}: ${v}`)
.join('\n');
}
}
// In chat engine
async chat(userId, sessionId, userMessage) {
const userProfile = new UserProfile(userId);
await userProfile.load();
const messages = [
{ role: 'system', content: this.systemPrompt },
{ role: 'system', content: `User profile:\n${userProfile.toContext()}` },
// ... rest of context
];
}
// Delete all user memories (GDPR compliance)
async function forgetUser(userId) {
// This requires listing and deleting by filter
// Implementation varies by vector database
await index.deleteMany({
filter: { userId: { $eq: userId } },
});
}
// Delete old, low-relevance memories
async function pruneMemories(userId, maxAge = 90) {
const cutoffDate = new Date();
cutoffDate.setDate(cutoffDate.getDate() - maxAge);
await index.deleteMany({
filter: {
userId: { $eq: userId },
timestamp: { $lt: cutoffDate.toISOString() },
memoryType: { $ne: 'preference' }, // Keep preferences
},
});
}
Prevent unbounded growth:
async function enforceMemoryLimit(userId, maxMemories = 1000) {
// Get count
const stats = await index.describeIndexStats({
filter: { userId: { $eq: userId } },
});
if (stats.totalVectorCount > maxMemories) {
// Summarize and archive oldest memories
const oldest = await getOldestMemories(userId, 100);
const summary = await summarizeMemories(oldest);
await deleteMemories(oldest.map(m => m.id));
await storeMemory(userId, 'archive', 'summary', summary, {
type: 'archive',
summarizedCount: oldest.length,
});
}
}
const express = require('express');
const app = express();
app.use(express.json());
const memoryStore = new MemoryStore(await initializeMemoryIndex());
const chatEngine = new ChatWithMemory(memoryStore);
app.post('/chat', async (req, res) => {
const { userId, sessionId, message } = req.body;
if (!userId || !message) {
return res.status(400).json({ error: 'userId and message required' });
}
const actualSessionId = sessionId || `session-${Date.now()}`;
try {
const response = await chatEngine.chat(userId, actualSessionId, message);
res.json({
response,
sessionId: actualSessionId,
});
} catch (error) {
console.error('Chat error:', error);
res.status(500).json({ error: 'Failed to process message' });
}
});
app.delete('/memory/:userId', async (req, res) => {
await forgetUser(req.params.userId);
res.json({ success: true });
});
app.listen(3000);
Verify your memory system works:
async function testMemory() {
const userId = 'test-user';
const sessionId = 'test-session';
// First conversation
await chatEngine.chat(userId, sessionId, 'My name is Alex and I prefer dark mode.');
await chatEngine.chat(userId, sessionId, 'I work as a software engineer.');
// New session - should remember
const newSessionId = 'test-session-2';
const response = await chatEngine.chat(
userId,
newSessionId,
'Do you remember what I do for work?'
);
console.log(response);
// Should reference being a software engineer
}
A chatbot with vector-based memory feels remarkably human. It remembers your preferences, references past conversations naturally, and builds rapport over time. Users notice the difference immediately—and they keep coming back.
Spread the word about this post