SQL Keywords

AI_TRANSCRIBE

Feb 23, 2026

min read

AI_TRANSCRIBE

Overview

Transcribes audio and video files stored in a stage, extracting text, timestamps, and speaker information.

Syntax

AI_TRANSCRIBE(
  file_reference,
  [options]
)

AI_TRANSCRIBE(
  file_reference,
  [options]
)

AI_TRANSCRIBE(
  file_reference,
  [options]
)

Parameters

file_reference (FILE): Reference to audio/video file in stage (using TO_FILE)
options (OBJECT): Optional settings for language, speaker detection, timestamps

Use Cases

Meeting transcription
Call center analysis
Video content indexing
Podcast transcription
Accessibility (captions)
Compliance and documentation

Supported File Formats

Audio: MP3, WAV, M4A, AAC, FLAC
Video: MP4, MOV, AVI, MKV

Code Examples

Example 1: Basic Audio Transcription

SELECT 
    file_name,
    AI_TRANSCRIBE(
        TO_FILE('@audio_stage/' || file_name)
    ) AS transcription
FROM DIRECTORY('@audio_stage')
WHERE file_name LIKE '%.mp3'

SELECT 
    file_name,
    AI_TRANSCRIBE(
        TO_FILE('@audio_stage/' || file_name)
    ) AS transcription
FROM DIRECTORY('@audio_stage')
WHERE file_name LIKE '%.mp3'

SELECT 
    file_name,
    AI_TRANSCRIBE(
        TO_FILE('@audio_stage/' || file_name)
    ) AS transcription
FROM DIRECTORY('@audio_stage')
WHERE file_name LIKE '%.mp3'

Output:

{
  "text": "Welcome to today's meeting. Let's discuss the Q4 roadmap.",
  "duration": 125.5
}

{
  "text": "Welcome to today's meeting. Let's discuss the Q4 roadmap.",
  "duration": 125.5
}

{
  "text": "Welcome to today's meeting. Let's discuss the Q4 roadmap.",
  "duration": 125.5
}

Example 2: Transcription with Timestamps

SELECT 
    AI_TRANSCRIBE(
        TO_FILE('@meetings/team_meeting_2024_01.mp4'),
        {'include_timestamps': true}
    ) AS

SELECT 
    AI_TRANSCRIBE(
        TO_FILE('@meetings/team_meeting_2024_01.mp4'),
        {'include_timestamps': true}
    ) AS

SELECT 
    AI_TRANSCRIBE(
        TO_FILE('@meetings/team_meeting_2024_01.mp4'),
        {'include_timestamps': true}
    ) AS

Output:

{
  "segments": [
    {"start": 0.0, "end": 3.5, "text": "Good morning everyone"},
    {"start": 3.5, "end": 8.2, "text": "Let's review the quarterly results"},
    {"start": 8.2, "end": 15.1, "text": "Revenue increased by 23 percent"}
  ],
  "duration": 1825.3
}

{
  "segments": [
    {"start": 0.0, "end": 3.5, "text": "Good morning everyone"},
    {"start": 3.5, "end": 8.2, "text": "Let's review the quarterly results"},
    {"start": 8.2, "end": 15.1, "text": "Revenue increased by 23 percent"}
  ],
  "duration": 1825.3
}

{
  "segments": [
    {"start": 0.0, "end": 3.5, "text": "Good morning everyone"},
    {"start": 3.5, "end": 8.2, "text": "Let's review the quarterly results"},
    {"start": 8.2, "end": 15.1, "text": "Revenue increased by 23 percent"}
  ],
  "duration": 1825.3
}

Example 3: Speaker Diarization

SELECT 
    meeting_id,
    AI_TRANSCRIBE(
        TO_FILE('@recordings/' || filename),
        {
            'identify_speakers': true,
            'include_timestamps': true
        }
    ) AS transcription
FROM meeting_recordings
WHERE meeting_date >= CURRENT_DATE - 7

SELECT 
    meeting_id,
    AI_TRANSCRIBE(
        TO_FILE('@recordings/' || filename),
        {
            'identify_speakers': true,
            'include_timestamps': true
        }
    ) AS transcription
FROM meeting_recordings
WHERE meeting_date >= CURRENT_DATE - 7

SELECT 
    meeting_id,
    AI_TRANSCRIBE(
        TO_FILE('@recordings/' || filename),
        {
            'identify_speakers': true,
            'include_timestamps': true
        }
    ) AS transcription
FROM meeting_recordings
WHERE meeting_date >= CURRENT_DATE - 7

Output:

{
  "segments": [
    {"speaker": "Speaker 1", "start": 0.0, "end": 5.2, "text": "Let's begin the standup"},
    {"speaker": "Speaker 2", "start": 5.5, "end": 12.3, "text": "I completed the API integration"},
    {"speaker": "Speaker 1", "start": 12.5, "end": 18.0, "text": "Great work. What's next?"}
  ]
}

{
  "segments": [
    {"speaker": "Speaker 1", "start": 0.0, "end": 5.2, "text": "Let's begin the standup"},
    {"speaker": "Speaker 2", "start": 5.5, "end": 12.3, "text": "I completed the API integration"},
    {"speaker": "Speaker 1", "start": 12.5, "end": 18.0, "text": "Great work. What's next?"}
  ]
}

{
  "segments": [
    {"speaker": "Speaker 1", "start": 0.0, "end": 5.2, "text": "Let's begin the standup"},
    {"speaker": "Speaker 2", "start": 5.5, "end": 12.3, "text": "I completed the API integration"},
    {"speaker": "Speaker 1", "start": 12.5, "end": 18.0, "text": "Great work. What's next?"}
  ]
}

Example 4: Multilingual Transcription

SELECT 
    AI_TRANSCRIBE(
        TO_FILE('@podcasts/episode_45.mp3'),
        {'language': 'es'}  -- Spanish
    ) AS

SELECT 
    AI_TRANSCRIBE(
        TO_FILE('@podcasts/episode_45.mp3'),
        {'language': 'es'}  -- Spanish
    ) AS

SELECT 
    AI_TRANSCRIBE(
        TO_FILE('@podcasts/episode_45.mp3'),
        {'language': 'es'}  -- Spanish
    ) AS

Example 5: Batch Processing Call Recordings

CREATE TABLE call_transcriptions AS
SELECT 
    call_id,
    customer_id,
    call_date,
    AI_TRANSCRIBE(
        TO_FILE('@call_recordings/' || recording_file),
        {'identify_speakers': true}
    ) AS transcription,
    CURRENT_TIMESTAMP() AS transcribed_at
FROM customer_calls
WHERE transcribed_at IS NULL
  AND call_date >= CURRENT_DATE - 30

CREATE TABLE call_transcriptions AS
SELECT 
    call_id,
    customer_id,
    call_date,
    AI_TRANSCRIBE(
        TO_FILE('@call_recordings/' || recording_file),
        {'identify_speakers': true}
    ) AS transcription,
    CURRENT_TIMESTAMP() AS transcribed_at
FROM customer_calls
WHERE transcribed_at IS NULL
  AND call_date >= CURRENT_DATE - 30

CREATE TABLE call_transcriptions AS
SELECT 
    call_id,
    customer_id,
    call_date,
    AI_TRANSCRIBE(
        TO_FILE('@call_recordings/' || recording_file),
        {'identify_speakers': true}
    ) AS transcription,
    CURRENT_TIMESTAMP() AS transcribed_at
FROM customer_calls
WHERE transcribed_at IS NULL
  AND call_date >= CURRENT_DATE - 30

Data Output Examples

Customer Service Call

Input: 5-minute customer support call
Output:
{
  "duration": 305.2,
  "segments": [
    {"speaker": "Agent", "text": "Thank you for calling support. How can I help?"},
    {"speaker": "Customer", "text": "I'm having trouble logging into my account"},
    {"speaker": "Agent", "text": "I can help with that. What error message do you see?"}
  ]
}

Input: 5-minute customer support call
Output:
{
  "duration": 305.2,
  "segments": [
    {"speaker": "Agent", "text": "Thank you for calling support. How can I help?"},
    {"speaker": "Customer", "text": "I'm having trouble logging into my account"},
    {"speaker": "Agent", "text": "I can help with that. What error message do you see?"}
  ]
}

Input: 5-minute customer support call
Output:
{
  "duration": 305.2,
  "segments": [
    {"speaker": "Agent", "text": "Thank you for calling support. How can I help?"},
    {"speaker": "Customer", "text": "I'm having trouble logging into my account"},
    {"speaker": "Agent", "text": "I can help with that. What error message do you see?"}
  ]
}

Podcast Episode

Input: 45-minute podcast episode
Output:
{
  "duration": 2701.8,
  "text": "Welcome to Tech Talk. Today we're discussing artificial intelligence...",
  "word_count": 7250
}

Input: 45-minute podcast episode
Output:
{
  "duration": 2701.8,
  "text": "Welcome to Tech Talk. Today we're discussing artificial intelligence...",
  "word_count": 7250
}

Input: 45-minute podcast episode
Output:
{
  "duration": 2701.8,
  "text": "Welcome to Tech Talk. Today we're discussing artificial intelligence...",
  "word_count": 7250
}

Model Information

Billing: 50 tokens per second of audio
Languages: Multiple languages supported
Speaker Detection: Up to 10 speakers

Limitations & Considerations

File Size and Duration

Maximum file size varies by region
Longer files take more time to process
Consider splitting very long recordings

Audio Quality

Better audio quality = better transcription
Background noise can affect accuracy
Clear speech improves results

Cost

Billed at 50 tokens per second of audio
A 10-minute file = 600 seconds = 30,000 tokens
Speaker detection may increase cost

Processing Time

Not real-time; designed for batch processing
Processing time scales with file duration
Use async patterns for large batches

Regional Availability

AWS US West 2: ✓
AWS US East 1: ✓
Azure East US 2: ✓
EU regions: ✓

Best Practices

1. Create Dedicated Stages

CREATE STAGE audio_transcriptions
  DIRECTORY = (ENABLE = true)
  ENCRYPTION = (TYPE = 'SNOWFLAKE_SSE');

PUT file://local_audio/*.mp3 @audio_transcriptions AUTO_COMPRESS = FALSE;

CREATE STAGE audio_transcriptions
  DIRECTORY = (ENABLE = true)
  ENCRYPTION = (TYPE = 'SNOWFLAKE_SSE');

PUT file://local_audio/*.mp3 @audio_transcriptions AUTO_COMPRESS = FALSE;

CREATE STAGE audio_transcriptions
  DIRECTORY = (ENABLE = true)
  ENCRYPTION = (TYPE = 'SNOWFLAKE_SSE');

PUT file://local_audio/*.mp3 @audio_transcriptions AUTO_COMPRESS = FALSE;

2. Store Results in Tables

CREATE TABLE meeting_transcripts (
    meeting_id VARCHAR,
    recording_file VARCHAR,
    transcription VARIANT,
    transcription_date TIMESTAMP DEFAULT CURRENT_TIMESTAMP()
);

INSERT INTO meeting_transcripts
SELECT 
    meeting_id,
    filename,
    AI_TRANSCRIBE(TO_FILE('@meetings/' || filename))
FROM

CREATE TABLE meeting_transcripts (
    meeting_id VARCHAR,
    recording_file VARCHAR,
    transcription VARIANT,
    transcription_date TIMESTAMP DEFAULT CURRENT_TIMESTAMP()
);

INSERT INTO meeting_transcripts
SELECT 
    meeting_id,
    filename,
    AI_TRANSCRIBE(TO_FILE('@meetings/' || filename))
FROM

CREATE TABLE meeting_transcripts (
    meeting_id VARCHAR,
    recording_file VARCHAR,
    transcription VARIANT,
    transcription_date TIMESTAMP DEFAULT CURRENT_TIMESTAMP()
);

INSERT INTO meeting_transcripts
SELECT 
    meeting_id,
    filename,
    AI_TRANSCRIBE(TO_FILE('@meetings/' || filename))
FROM

3. Extract and Analyze Transcriptions

-- Extract text from transcription
SELECT 
    meeting_id,
    transcription:text::STRING AS full_text,
    transcription:duration::FLOAT AS duration_seconds
FROM meeting_transcripts;

-- Analyze with other AI functions
SELECT 
    meeting_id,
    AI_SENTIMENT(transcription:text::STRING) AS meeting_sentiment,
    AI_EXTRACT(
        transcription:text::STRING,
        'Extract action items and decisions made'
    ) AS key_outcomes
FROM

-- Extract text from transcription
SELECT 
    meeting_id,
    transcription:text::STRING AS full_text,
    transcription:duration::FLOAT AS duration_seconds
FROM meeting_transcripts;

-- Analyze with other AI functions
SELECT 
    meeting_id,
    AI_SENTIMENT(transcription:text::STRING) AS meeting_sentiment,
    AI_EXTRACT(
        transcription:text::STRING,
        'Extract action items and decisions made'
    ) AS key_outcomes
FROM

-- Extract text from transcription
SELECT 
    meeting_id,
    transcription:text::STRING AS full_text,
    transcription:duration::FLOAT AS duration_seconds
FROM meeting_transcripts;

-- Analyze with other AI functions
SELECT 
    meeting_id,
    AI_SENTIMENT(transcription:text::STRING) AS meeting_sentiment,
    AI_EXTRACT(
        transcription:text::STRING,
        'Extract action items and decisions made'
    ) AS key_outcomes
FROM

4. Process in Batches

-- Process recordings in manageable batches
CREATE OR REPLACE TASK transcribe_daily_recordings
  WAREHOUSE = compute_wh
  SCHEDULE = 'USING CRON 0 2 * * * UTC'  -- 2 AM daily
AS
  INSERT INTO transcriptions
  SELECT 
      file_url,
      AI_TRANSCRIBE(TO_FILE(file_url))
  FROM pending_transcriptions
  LIMIT 100;  -- Process 100 at a time

-- Process recordings in manageable batches
CREATE OR REPLACE TASK transcribe_daily_recordings
  WAREHOUSE = compute_wh
  SCHEDULE = 'USING CRON 0 2 * * * UTC'  -- 2 AM daily
AS
  INSERT INTO transcriptions
  SELECT 
      file_url,
      AI_TRANSCRIBE(TO_FILE(file_url))
  FROM pending_transcriptions
  LIMIT 100;  -- Process 100 at a time

-- Process recordings in manageable batches
CREATE OR REPLACE TASK transcribe_daily_recordings
  WAREHOUSE = compute_wh
  SCHEDULE = 'USING CRON 0 2 * * * UTC'  -- 2 AM daily
AS
  INSERT INTO transcriptions
  SELECT 
      file_url,
      AI_TRANSCRIBE(TO_FILE(file_url))
  FROM pending_transcriptions
  LIMIT 100;  -- Process 100 at a time

Common Use Cases

Meeting Minutes Generation

SELECT 
    meeting_id,
    meeting_title,
    AI_COMPLETE(
        'claude-4-sonnet',
        'Create meeting minutes from this transcript: ' || transcription:text::STRING
    ) AS meeting_minutes
FROM meeting_transcripts
WHERE meeting_date >= CURRENT_DATE - 7

SELECT 
    meeting_id,
    meeting_title,
    AI_COMPLETE(
        'claude-4-sonnet',
        'Create meeting minutes from this transcript: ' || transcription:text::STRING
    ) AS meeting_minutes
FROM meeting_transcripts
WHERE meeting_date >= CURRENT_DATE - 7

SELECT 
    meeting_id,
    meeting_title,
    AI_COMPLETE(
        'claude-4-sonnet',
        'Create meeting minutes from this transcript: ' || transcription:text::STRING
    ) AS meeting_minutes
FROM meeting_transcripts
WHERE meeting_date >= CURRENT_DATE - 7

Call Center Quality Analysis

SELECT 
    agent_id,
    COUNT(*) AS calls_analyzed,
    AVG(AI_SENTIMENT(transcription:text::STRING)) AS avg_sentiment,
    AI_AGG(
        transcription:text::STRING,
        'Identify common customer complaints'
    ) AS common_issues
FROM call_transcriptions
GROUP BY

SELECT 
    agent_id,
    COUNT(*) AS calls_analyzed,
    AVG(AI_SENTIMENT(transcription:text::STRING)) AS avg_sentiment,
    AI_AGG(
        transcription:text::STRING,
        'Identify common customer complaints'
    ) AS common_issues
FROM call_transcriptions
GROUP BY

SELECT 
    agent_id,
    COUNT(*) AS calls_analyzed,
    AVG(AI_SENTIMENT(transcription:text::STRING)) AS avg_sentiment,
    AI_AGG(
        transcription:text::STRING,
        'Identify common customer complaints'
    ) AS common_issues
FROM call_transcriptions
GROUP BY

Compliance Monitoring

SELECT 
    call_id,
    AI_FILTER(
        'Does this call mention required compliance disclosures?',
        transcription:text::STRING
    ) AS compliance_check
FROM call_transcriptions
WHERE compliance_check = false

SELECT 
    call_id,
    AI_FILTER(
        'Does this call mention required compliance disclosures?',
        transcription:text::STRING
    ) AS compliance_check
FROM call_transcriptions
WHERE compliance_check = false

SELECT 
    call_id,
    AI_FILTER(
        'Does this call mention required compliance disclosures?',
        transcription:text::STRING
    ) AS compliance_check
FROM call_transcriptions
WHERE compliance_check = false

Related Functions

AI_EXTRACT - Extract specific information from transcripts
AI_SUMMARIZE_AGG - Summarize multiple transcripts
AI_SENTIMENT - Analyze transcript sentiment
TO_FILE - Reference files in stages

Interested to Learn More?
Try Out the Free 14-Days Trial

Start free trial

Learn

Feb 18, 2026

BigQuery Global Queries: How to Run Cross-Region SQL in 2026

Analytics

Feb 18, 2026

Context Engineering and AI Quality for Data Teams

Product

Feb 18, 2026

Accelerate Analytics Development with Paradime and Tableau

Product

Feb 18, 2026

Accelerate Analytics Development with Paradime and Tableau

Experience Analytics for the AI-Era

Start your 14-day trial today - it's free and no credit card needed

Start for free

Experience Analytics for the AI-Era

Start your 14-day trial today - it's free and no credit card needed

Start for free

Experience Analytics for the AI-Era

Start your 14-day trial today - it's free and no credit card needed

Start for free

Platform

Resources

ADD-ONs

Industries

About

Legal

Made with ❤️ in San Francisco ・ London

*dbt® and dbt Core® are federally registered trademarks of dbt Labs, Inc. in the United States and various jurisdictions around the world. Paradime is not a partner of dbt Labs. All rights therein are reserved to dbt Labs. Paradime is not a product or service of or endorsed by dbt Labs, Inc.

Start for free

Platform

Resources