January 21, 2026
7 min read
Traditional search matches keywords. Type "dog" and you find documents containing "dog." But what if someone searches for "pet that barks"? Keyword search fails because the words don't match, even though the intent is clear.
Embedding-based search solves this. It understands meaning, not just words. This guide shows you how to build it.
Embeddings are numerical representations of text (or images, audio, etc.) that capture semantic meaning. Similar concepts end up close together in this numerical space.
"happy" → [0.2, 0.8, 0.1, ...]
"joyful" → [0.21, 0.79, 0.12, ...] // Close to "happy"
"sad" → [0.9, 0.1, 0.3, ...] // Far from "happy"
When you embed a search query and compare it to embedded documents, semantically similar content scores high—even without matching keywords.
A typical embedding search system looks like this:
Indexing Pipeline:
[Documents] → [Embedding Model] → [Vectors] → [Vector Database]
Search Pipeline:
[Query] → [Embedding Model] → [Query Vector] → [Vector Search] → [Results]
Let's build each piece.
OpenAI's embedding models are popular and effective:
const OpenAI = require('openai');
const openai = new OpenAI({ apiKey: process.env.OPENAI_API_KEY });
async function getEmbedding(text) {
const response = await openai.embeddings.create({
model: 'text-embedding-3-small',
input: text,
});
return response.data[0].embedding;
}
// Example
const embedding = await getEmbedding('How do I train a puppy?');
console.log(embedding.length); // 1536 dimensions
OpenAI text-embedding-3-small:
OpenAI text-embedding-3-large:
Open Source (Sentence Transformers):
from sentence_transformers import SentenceTransformer
model = SentenceTransformer('all-MiniLM-L6-v2')
embedding = model.encode('How do I train a puppy?')
Embedding models have token limits (8191 for OpenAI). Split long documents:
function chunkText(text, maxChunkSize = 500, overlap = 50) {
const words = text.split(' ');
const chunks = [];
for (let i = 0; i < words.length; i += maxChunkSize - overlap) {
const chunk = words.slice(i, i + maxChunkSize).join(' ');
if (chunk.trim()) {
chunks.push(chunk);
}
}
return chunks;
}
async function embedDocument(document) {
const chunks = chunkText(document.content);
const embeddings = await Promise.all(
chunks.map(chunk => getEmbedding(chunk))
);
return chunks.map((chunk, i) => ({
documentId: document.id,
chunk,
embedding: embeddings[i],
}));
}
You need a database optimized for vector similarity search.
const { Pinecone } = require('@pinecone-database/pinecone');
const pinecone = new Pinecone({ apiKey: process.env.PINECONE_API_KEY });
const index = pinecone.index('my-search-index');
// Upsert vectors
async function indexDocuments(documents) {
const vectors = [];
for (const doc of documents) {
const chunks = await embedDocument(doc);
chunks.forEach((chunk, i) => {
vectors.push({
id: `${doc.id}-${i}`,
values: chunk.embedding,
metadata: {
documentId: doc.id,
title: doc.title,
chunk: chunk.chunk,
},
});
});
}
await index.upsert(vectors);
}
// Search
async function search(query, topK = 10) {
const queryEmbedding = await getEmbedding(query);
const results = await index.query({
vector: queryEmbedding,
topK,
includeMetadata: true,
});
return results.matches;
}
-- Enable the extension
CREATE EXTENSION IF NOT EXISTS vector;
-- Create table with vector column
CREATE TABLE documents (
id SERIAL PRIMARY KEY,
title TEXT,
content TEXT,
embedding vector(1536)
);
-- Create index for fast search
CREATE INDEX ON documents
USING ivfflat (embedding vector_cosine_ops)
WITH (lists = 100);
const { createClient } = require('@supabase/supabase-js');
const supabase = createClient(
process.env.SUPABASE_URL,
process.env.SUPABASE_KEY
);
async function search(query, limit = 10) {
const queryEmbedding = await getEmbedding(query);
const { data, error } = await supabase.rpc('search_documents', {
query_embedding: queryEmbedding,
match_count: limit,
});
return data;
}
const weaviate = require('weaviate-ts-client');
const client = weaviate.client({
scheme: 'https',
host: 'your-instance.weaviate.network',
});
async function search(query, limit = 10) {
const result = await client.graphql
.get()
.withClassName('Document')
.withFields('title content _additional { certainty }')
.withNearText({ concepts: [query] })
.withLimit(limit)
.do();
return result.data.Get.Document;
}
const express = require('express');
const app = express();
app.get('/api/search', async (req, res) => {
const { q, limit = 10 } = req.query;
if (!q || q.length < 2) {
return res.status(400).json({ error: 'Query too short' });
}
try {
// Get query embedding
const queryEmbedding = await getEmbedding(q);
// Search vector database
const results = await vectorDB.search(queryEmbedding, parseInt(limit));
// Deduplicate by document ID (if chunked)
const seen = new Set();
const deduped = results.filter(r => {
if (seen.has(r.metadata.documentId)) return false;
seen.add(r.metadata.documentId);
return true;
});
// Format response
const formatted = deduped.map(r => ({
id: r.metadata.documentId,
title: r.metadata.title,
excerpt: r.metadata.chunk.substring(0, 200) + '...',
score: r.score,
}));
res.json({ results: formatted });
} catch (error) {
console.error('Search error:', error);
res.status(500).json({ error: 'Search failed' });
}
});
Pure semantic search can miss exact matches. Combine with keyword search:
async function hybridSearch(query, limit = 10) {
// Parallel searches
const [semanticResults, keywordResults] = await Promise.all([
semanticSearch(query, limit * 2),
keywordSearch(query, limit * 2), // Traditional full-text search
]);
// Merge and rank
const scoreMap = new Map();
semanticResults.forEach((r, i) => {
const normalizedScore = 1 - (i / semanticResults.length);
scoreMap.set(r.id, { semantic: normalizedScore, keyword: 0 });
});
keywordResults.forEach((r, i) => {
const normalizedScore = 1 - (i / keywordResults.length);
const existing = scoreMap.get(r.id) || { semantic: 0, keyword: 0 };
existing.keyword = normalizedScore;
scoreMap.set(r.id, existing);
});
// Combined score (adjust weights as needed)
const combined = [...scoreMap.entries()].map(([id, scores]) => ({
id,
score: scores.semantic * 0.7 + scores.keyword * 0.3,
}));
combined.sort((a, b) => b.score - a.score);
return combined.slice(0, limit);
}
Query embeddings are expensive. Cache them:
const LRU = require('lru-cache');
const embeddingCache = new LRU({
max: 1000,
ttl: 1000 * 60 * 60, // 1 hour
});
async function getCachedEmbedding(text) {
const cacheKey = createHash('md5').update(text).digest('hex');
if (embeddingCache.has(cacheKey)) {
return embeddingCache.get(cacheKey);
}
const embedding = await getEmbedding(text);
embeddingCache.set(cacheKey, embedding);
return embedding;
}
Vector databases support metadata filtering:
async function searchWithFilters(query, filters) {
const queryEmbedding = await getEmbedding(query);
const results = await index.query({
vector: queryEmbedding,
topK: 10,
filter: {
category: { $eq: filters.category },
date: { $gte: filters.startDate },
},
});
return results.matches;
}
When indexing many documents:
async function batchEmbed(texts, batchSize = 100) {
const embeddings = [];
for (let i = 0; i < texts.length; i += batchSize) {
const batch = texts.slice(i, i + batchSize);
const response = await openai.embeddings.create({
model: 'text-embedding-3-small',
input: batch,
});
embeddings.push(...response.data.map(d => d.embedding));
}
return embeddings;
}
Not chunking properly: Long documents with single embeddings dilute meaning. Chunk strategically.
Ignoring exact matches: Users searching for "SKU-12345" expect exact matches. Use hybrid search.
Outdated index: Keep embeddings synchronized with content changes.
Wrong similarity metric: Cosine similarity works for most cases. Know when to use others.
Track these metrics:
Embedding search feels like magic when it works—users find what they mean, not just what they type. Start with a managed vector database, validate with real queries, and iterate based on user behavior. The technology is mature enough for production use, and the user experience improvement is substantial.
Spread the word about this post