Calculates the cosine similarity between two embedding vectors. Returns a FLOAT64 value between -1 and 1, where 1 indicates identical vectors, 0 indicates orthogonal (unrelated) vectors, and -1 indicates opposite vectors. Commonly used to find semantically similar content.
Use Cases
Semantic Search: Rank documents by relevance to a query
Quality Scoring: Measure similarity to ideal examples
Syntax
AI.SIMILARITY(
embedding_1,
embedding_2
)
AI.SIMILARITY(
embedding_1,
embedding_2
)
AI.SIMILARITY(
embedding_1,
embedding_2
)
Parameters
embedding_1: ARRAY - First embedding vector
embedding_2: ARRAY - Second embedding vector
Both embeddings must have the same dimensionality.
Code Examples
Example 1: Find Similar Products
-- Get query embeddingDECLARE query_embedding ARRAY<float64>;
SET query_embedding = (SELECT AI.EMBED(
model => 'text-embedding-004',
content => 'noise cancelling wireless headphones',
task_type => 'RETRIEVAL_QUERY',
connection_id => 'us.my_vertex_connection'));
-- Find top 10 similar productsSELECT
product_id,
product_name,
AI.SIMILARITY(
query_embedding,
product_embedding
)AS similarity_score
FROM products_with_embeddings
ORDERBY similarity_score DESCLIMIT10
-- Get query embeddingDECLARE query_embedding ARRAY<float64>;
SET query_embedding = (SELECT AI.EMBED(
model => 'text-embedding-004',
content => 'noise cancelling wireless headphones',
task_type => 'RETRIEVAL_QUERY',
connection_id => 'us.my_vertex_connection'));
-- Find top 10 similar productsSELECT
product_id,
product_name,
AI.SIMILARITY(
query_embedding,
product_embedding
)AS similarity_score
FROM products_with_embeddings
ORDERBY similarity_score DESCLIMIT10
-- Get query embeddingDECLARE query_embedding ARRAY<float64>;
SET query_embedding = (SELECT AI.EMBED(
model => 'text-embedding-004',
content => 'noise cancelling wireless headphones',
task_type => 'RETRIEVAL_QUERY',
connection_id => 'us.my_vertex_connection'));
-- Find top 10 similar productsSELECT
product_id,
product_name,
AI.SIMILARITY(
query_embedding,
product_embedding
)AS similarity_score
FROM products_with_embeddings
ORDERBY similarity_score DESCLIMIT10
Example 2: Compare Two Documents
SELECT
doc1.document_id AS doc1_id,
doc2.document_id AS doc2_id,
AI.SIMILARITY(
doc1.embedding,
doc2.embedding
)AS similarity
FROM document_embeddings doc1
CROSSJOIN document_embeddings doc2
WHERE doc1.document_id < doc2.document_id -- Avoid duplicate pairsAND AI.SIMILARITY(doc1.embedding, doc2.embedding) > 0.8-- Only high similarityORDERBY similarity DESC
SELECT
doc1.document_id AS doc1_id,
doc2.document_id AS doc2_id,
AI.SIMILARITY(
doc1.embedding,
doc2.embedding
)AS similarity
FROM document_embeddings doc1
CROSSJOIN document_embeddings doc2
WHERE doc1.document_id < doc2.document_id -- Avoid duplicate pairsAND AI.SIMILARITY(doc1.embedding, doc2.embedding) > 0.8-- Only high similarityORDERBY similarity DESC
SELECT
doc1.document_id AS doc1_id,
doc2.document_id AS doc2_id,
AI.SIMILARITY(
doc1.embedding,
doc2.embedding
)AS similarity
FROM document_embeddings doc1
CROSSJOIN document_embeddings doc2
WHERE doc1.document_id < doc2.document_id -- Avoid duplicate pairsAND AI.SIMILARITY(doc1.embedding, doc2.embedding) > 0.8-- Only high similarityORDERBY similarity DESC
Example 3: Find Near-Duplicates
WITH pairs AS(SELECT
a.article_id AS id1,
b.article_id AS id2,
AI.SIMILARITY(a.embedding, b.embedding)AS similarity
FROM article_embeddings a
CROSSJOIN article_embeddings b
WHERE a.article_id < b.article_id
)SELECT
id1,
id2,
similarity
FROM pairs
WHERE similarity > 0.95-- Very high similarity threshold for duplicatesORDERBY similarity DESC
WITH pairs AS(SELECT
a.article_id AS id1,
b.article_id AS id2,
AI.SIMILARITY(a.embedding, b.embedding)AS similarity
FROM article_embeddings a
CROSSJOIN article_embeddings b
WHERE a.article_id < b.article_id
)SELECT
id1,
id2,
similarity
FROM pairs
WHERE similarity > 0.95-- Very high similarity threshold for duplicatesORDERBY similarity DESC
WITH pairs AS(SELECT
a.article_id AS id1,
b.article_id AS id2,
AI.SIMILARITY(a.embedding, b.embedding)AS similarity
FROM article_embeddings a
CROSSJOIN article_embeddings b
WHERE a.article_id < b.article_id
)SELECT
id1,
id2,
similarity
FROM pairs
WHERE similarity > 0.95-- Very high similarity threshold for duplicatesORDERBY similarity DESC
Example 4: Recommendation System
-- Given a user's favorite product, find similar itemsDECLARE favorite_product_embedding ARRAY<float64>;
SET favorite_product_embedding = (SELECT product_embedding
FROM products_with_embeddings
WHERE product_id = 'PROD-12345');
SELECT
product_id,
product_name,
price,
category,
AI.SIMILARITY(
favorite_product_embedding,
product_embedding
)AS similarity
FROM products_with_embeddings
WHERE product_id != 'PROD-12345'-- Exclude the original productAND category IN('electronics','accessories')-- Filter by categoryORDERBY similarity DESCLIMIT5
-- Given a user's favorite product, find similar itemsDECLARE favorite_product_embedding ARRAY<float64>;
SET favorite_product_embedding = (SELECT product_embedding
FROM products_with_embeddings
WHERE product_id = 'PROD-12345');
SELECT
product_id,
product_name,
price,
category,
AI.SIMILARITY(
favorite_product_embedding,
product_embedding
)AS similarity
FROM products_with_embeddings
WHERE product_id != 'PROD-12345'-- Exclude the original productAND category IN('electronics','accessories')-- Filter by categoryORDERBY similarity DESCLIMIT5
-- Given a user's favorite product, find similar itemsDECLARE favorite_product_embedding ARRAY<float64>;
SET favorite_product_embedding = (SELECT product_embedding
FROM products_with_embeddings
WHERE product_id = 'PROD-12345');
SELECT
product_id,
product_name,
price,
category,
AI.SIMILARITY(
favorite_product_embedding,
product_embedding
)AS similarity
FROM products_with_embeddings
WHERE product_id != 'PROD-12345'-- Exclude the original productAND category IN('electronics','accessories')-- Filter by categoryORDERBY similarity DESCLIMIT5
Example 5: Content Quality Scoring
-- Compare content against high-quality reference examplesDECLARE reference_embedding ARRAY<float64>;
SET reference_embedding = (SELECT AVG(embedding)-- Average of multiple good examplesFROM article_embeddings
WHERE quality_rating >= 4.5);
SELECT
article_id,
title,
AI.SIMILARITY(
reference_embedding,
embedding
)AS quality_similarity
FROM article_embeddings
WHERE published_date >= CURRENT_DATE() - 7ORDERBY quality_similarity DESC
-- Compare content against high-quality reference examplesDECLARE reference_embedding ARRAY<float64>;
SET reference_embedding = (SELECT AVG(embedding)-- Average of multiple good examplesFROM article_embeddings
WHERE quality_rating >= 4.5);
SELECT
article_id,
title,
AI.SIMILARITY(
reference_embedding,
embedding
)AS quality_similarity
FROM article_embeddings
WHERE published_date >= CURRENT_DATE() - 7ORDERBY quality_similarity DESC
-- Compare content against high-quality reference examplesDECLARE reference_embedding ARRAY<float64>;
SET reference_embedding = (SELECT AVG(embedding)-- Average of multiple good examplesFROM article_embeddings
WHERE quality_rating >= 4.5);
SELECT
article_id,
title,
AI.SIMILARITY(
reference_embedding,
embedding
)AS quality_similarity
FROM article_embeddings
WHERE published_date >= CURRENT_DATE() - 7ORDERBY quality_similarity DESC
Example 6: Multi-Query Search
-- Search with multiple related queries and aggregate resultsWITH query_embeddings AS(SELECT
AI.EMBED(
model => 'text-embedding-004',
content => query_text,
task_type => 'RETRIEVAL_QUERY',
connection_id => 'us.my_vertex_connection')AS embedding
FROMUNNEST(['wireless headphones','bluetooth earbuds','noise cancelling audio'])AS query_text
),
scores AS(SELECT
p.product_id,
p.product_name,
MAX(AI.SIMILARITY(q.embedding, p.product_embedding))AS max_similarity
FROM products_with_embeddings p
CROSSJOIN query_embeddings q
GROUPBY p.product_id, p.product_name
)SELECT *
FROM scores
ORDERBY max_similarity DESCLIMIT20
-- Search with multiple related queries and aggregate resultsWITH query_embeddings AS(SELECT
AI.EMBED(
model => 'text-embedding-004',
content => query_text,
task_type => 'RETRIEVAL_QUERY',
connection_id => 'us.my_vertex_connection')AS embedding
FROMUNNEST(['wireless headphones','bluetooth earbuds','noise cancelling audio'])AS query_text
),
scores AS(SELECT
p.product_id,
p.product_name,
MAX(AI.SIMILARITY(q.embedding, p.product_embedding))AS max_similarity
FROM products_with_embeddings p
CROSSJOIN query_embeddings q
GROUPBY p.product_id, p.product_name
)SELECT *
FROM scores
ORDERBY max_similarity DESCLIMIT20
-- Search with multiple related queries and aggregate resultsWITH query_embeddings AS(SELECT
AI.EMBED(
model => 'text-embedding-004',
content => query_text,
task_type => 'RETRIEVAL_QUERY',
connection_id => 'us.my_vertex_connection')AS embedding
FROMUNNEST(['wireless headphones','bluetooth earbuds','noise cancelling audio'])AS query_text
),
scores AS(SELECT
p.product_id,
p.product_name,
MAX(AI.SIMILARITY(q.embedding, p.product_embedding))AS max_similarity
FROM products_with_embeddings p
CROSSJOIN query_embeddings q
GROUPBY p.product_id, p.product_name
)SELECT *
FROM scores
ORDERBY max_similarity DESCLIMIT20
Data Output Examples
Product Similarity Search
product_name
similarity_score
"Sony WH-1000XM5 Noise Cancelling Headphones"
0.94
"Bose QuietComfort 45 Wireless"
0.91
"Apple AirPods Max"
0.88
"Sennheiser Momentum 4"
0.85
Duplicate Detection
article_id_1
article_id_2
similarity
A001
A045
0.98
A023
A089
0.97
A012
A034
0.96
Similarity Score Interpretation
0.95 - 1.0: Near duplicates or very highly related
*dbt® and dbt Core® are federally registered trademarks of dbt Labs, Inc. in the United States and various jurisdictions around the world. Paradime is not a partner of dbt Labs. All rights therein are reserved to dbt Labs. Paradime is not a product or service of or endorsed by dbt Labs, Inc.
*dbt® and dbt Core® are federally registered trademarks of dbt Labs, Inc. in the United States and various jurisdictions around the world. Paradime is not a partner of dbt Labs. All rights therein are reserved to dbt Labs. Paradime is not a product or service of or endorsed by dbt Labs, Inc.
*dbt® and dbt Core® are federally registered trademarks of dbt Labs, Inc. in the United States and various jurisdictions around the world. Paradime is not a partner of dbt Labs. All rights therein are reserved to dbt Labs. Paradime is not a product or service of or endorsed by dbt Labs, Inc.