SQL Keywords

SQL Keywords

AI_PARSE_DOCUMENT

Feb 23, 2026

·

5

min read

AI_PARSE_DOCUMENT

Overview

Extracts text and layout information from documents using OCR. Supports PDF, Word, images, and other document formats.

Syntax

AI_PARSE_DOCUMENT(
  file_reference,
  mode
)
AI_PARSE_DOCUMENT(
  file_reference,
  mode
)
AI_PARSE_DOCUMENT(
  file_reference,
  mode
)

Parameters

  • file_reference (FILE): Document file in stage (using TO_FILE)

  • mode (VARCHAR): 'OCR' for text extraction, 'LAYOUT' for text with layout info

Modes

OCR Mode

  • Extracts raw text from documents

  • Removes formatting and layout

  • Best for text analysis and search

LAYOUT Mode

  • Preserves document structure

  • Includes position, formatting, tables

  • Best for document understanding

Use Cases

  • Invoice and receipt processing

  • Contract analysis

  • Document digitization

  • Form extraction

  • PDF text extraction

  • Scanned document processing

Code Examples

Example 1: Extract Text from PDF (OCR Mode)

SELECT 
    file_name,
    AI_PARSE_DOCUMENT(
        TO_FILE('@documents/' || file_name),
        'OCR'
    ) AS extracted_text
FROM DIRECTORY('@documents')
WHERE file_name LIKE '%.pdf'

SELECT 
    file_name,
    AI_PARSE_DOCUMENT(
        TO_FILE('@documents/' || file_name),
        'OCR'
    ) AS extracted_text
FROM DIRECTORY('@documents')
WHERE file_name LIKE '%.pdf'

SELECT 
    file_name,
    AI_PARSE_DOCUMENT(
        TO_FILE('@documents/' || file_name),
        'OCR'
    ) AS extracted_text
FROM DIRECTORY('@documents')
WHERE file_name LIKE '%.pdf'

Output:

Invoice
Date: January 15, 2024
Invoice #: INV-2024-001
Total: $1,250.00

Invoice
Date: January 15, 2024
Invoice #: INV-2024-001
Total: $1,250.00

Invoice
Date: January 15, 2024
Invoice #: INV-2024-001
Total: $1,250.00

Example 2: Parse with Layout Information

SELECT 
    AI_PARSE_DOCUMENT(
        TO_FILE('@contracts/agreement_2024.pdf'),
        'LAYOUT'
    ) AS

SELECT 
    AI_PARSE_DOCUMENT(
        TO_FILE('@contracts/agreement_2024.pdf'),
        'LAYOUT'
    ) AS

SELECT 
    AI_PARSE_DOCUMENT(
        TO_FILE('@contracts/agreement_2024.pdf'),
        'LAYOUT'
    ) AS

Output:

{
  "pages": [
    {
      "page_number": 1,
      "blocks": [
        {"type": "heading", "text": "Service Agreement", "position": {"x": 100, "y": 50}},
        {"type": "paragraph", "text": "This agreement is made...", "position": {"x": 50, "y": 120}},
        {"type": "table", "rows": 3, "columns": 4}
      ]
    }
  ]
}
{
  "pages": [
    {
      "page_number": 1,
      "blocks": [
        {"type": "heading", "text": "Service Agreement", "position": {"x": 100, "y": 50}},
        {"type": "paragraph", "text": "This agreement is made...", "position": {"x": 50, "y": 120}},
        {"type": "table", "rows": 3, "columns": 4}
      ]
    }
  ]
}
{
  "pages": [
    {
      "page_number": 1,
      "blocks": [
        {"type": "heading", "text": "Service Agreement", "position": {"x": 100, "y": 50}},
        {"type": "paragraph", "text": "This agreement is made...", "position": {"x": 50, "y": 120}},
        {"type": "table", "rows": 3, "columns": 4}
      ]
    }
  ]
}

Example 3: Process Invoices in Bulk

CREATE TABLE invoice_data AS
SELECT 
    relative_path AS filename,
    AI_PARSE_DOCUMENT(
        TO_FILE('@invoices/' || relative_path),
        'OCR'
    ) AS invoice_text,
    AI_EXTRACT(
        AI_PARSE_DOCUMENT(TO_FILE('@invoices/' || relative_path), 'OCR'),
        'Extract invoice number, date, vendor, and total amount'
    ) AS invoice_details
FROM DIRECTORY('@invoices')
WHERE relative_path LIKE '%.pdf'

CREATE TABLE invoice_data AS
SELECT 
    relative_path AS filename,
    AI_PARSE_DOCUMENT(
        TO_FILE('@invoices/' || relative_path),
        'OCR'
    ) AS invoice_text,
    AI_EXTRACT(
        AI_PARSE_DOCUMENT(TO_FILE('@invoices/' || relative_path), 'OCR'),
        'Extract invoice number, date, vendor, and total amount'
    ) AS invoice_details
FROM DIRECTORY('@invoices')
WHERE relative_path LIKE '%.pdf'

CREATE TABLE invoice_data AS
SELECT 
    relative_path AS filename,
    AI_PARSE_DOCUMENT(
        TO_FILE('@invoices/' || relative_path),
        'OCR'
    ) AS invoice_text,
    AI_EXTRACT(
        AI_PARSE_DOCUMENT(TO_FILE('@invoices/' || relative_path), 'OCR'),
        'Extract invoice number, date, vendor, and total amount'
    ) AS invoice_details
FROM DIRECTORY('@invoices')
WHERE relative_path LIKE '%.pdf'

Example 4: Extract Tables from Documents

SELECT 
    document_id,
    AI_PARSE_DOCUMENT(
        TO_FILE('@reports/' || filename),
        'LAYOUT'
    ):pages[0]:blocks[?(@.type == 'table')] AS tables
FROM

SELECT 
    document_id,
    AI_PARSE_DOCUMENT(
        TO_FILE('@reports/' || filename),
        'LAYOUT'
    ):pages[0]:blocks[?(@.type == 'table')] AS tables
FROM

SELECT 
    document_id,
    AI_PARSE_DOCUMENT(
        TO_FILE('@reports/' || filename),
        'LAYOUT'
    ):pages[0]:blocks[?(@.type == 'table')] AS tables
FROM

Example 5: Multi-page Document Processing

WITH parsed_doc AS (
    SELECT 
        AI_PARSE_DOCUMENT(
            TO_FILE('@legal_docs/contract.pdf'),
            'LAYOUT'
        ) AS doc_content
)
SELECT 
    page.value:page_number::INT AS page_num,
    page.value:blocks AS page_blocks,
    COUNT(block.value) AS block_count
FROM parsed_doc,
     LATERAL FLATTEN(doc_content:pages) page,
     LATERAL FLATTEN(page.value:blocks) block
GROUP BY page_num, page_blocks
ORDER BY

WITH parsed_doc AS (
    SELECT 
        AI_PARSE_DOCUMENT(
            TO_FILE('@legal_docs/contract.pdf'),
            'LAYOUT'
        ) AS doc_content
)
SELECT 
    page.value:page_number::INT AS page_num,
    page.value:blocks AS page_blocks,
    COUNT(block.value) AS block_count
FROM parsed_doc,
     LATERAL FLATTEN(doc_content:pages) page,
     LATERAL FLATTEN(page.value:blocks) block
GROUP BY page_num, page_blocks
ORDER BY

WITH parsed_doc AS (
    SELECT 
        AI_PARSE_DOCUMENT(
            TO_FILE('@legal_docs/contract.pdf'),
            'LAYOUT'
        ) AS doc_content
)
SELECT 
    page.value:page_number::INT AS page_num,
    page.value:blocks AS page_blocks,
    COUNT(block.value) AS block_count
FROM parsed_doc,
     LATERAL FLATTEN(doc_content:pages) page,
     LATERAL FLATTEN(page.value:blocks) block
GROUP BY page_num, page_blocks
ORDER BY

Data Output Examples

OCR Mode Output

Input: Scanned invoice PDF
Output:
"Invoice
Company: Acme Corporation
Date: 2024-01-15
Invoice Number: INV-001
Items:
- Product A: $500
- Product B: $750
Total: $1,250

Input: Scanned invoice PDF
Output:
"Invoice
Company: Acme Corporation
Date: 2024-01-15
Invoice Number: INV-001
Items:
- Product A: $500
- Product B: $750
Total: $1,250

Input: Scanned invoice PDF
Output:
"Invoice
Company: Acme Corporation
Date: 2024-01-15
Invoice Number: INV-001
Items:
- Product A: $500
- Product B: $750
Total: $1,250

LAYOUT Mode Output

Input: Multi-page contract
Output:
{
  "page_count": 5,
  "pages": [
    {
      "page_number": 1,
      "blocks": [
        {"type": "title", "text": "Employment Agreement"},
        {"type": "paragraph", "text": "This agreement..."},
        {"type": "signature_block"}
      ]
    }
  ]
}
Input: Multi-page contract
Output:
{
  "page_count": 5,
  "pages": [
    {
      "page_number": 1,
      "blocks": [
        {"type": "title", "text": "Employment Agreement"},
        {"type": "paragraph", "text": "This agreement..."},
        {"type": "signature_block"}
      ]
    }
  ]
}
Input: Multi-page contract
Output:
{
  "page_count": 5,
  "pages": [
    {
      "page_number": 1,
      "blocks": [
        {"type": "title", "text": "Employment Agreement"},
        {"type": "paragraph", "text": "This agreement..."},
        {"type": "signature_block"}
      ]
    }
  ]
}

Model Information

  • Billing: Based on number of pages

  • Supported Formats: PDF, DOCX, images (JPG, PNG)

  • OCR Quality: Optimized for printed text

Limitations & Considerations

Document Quality

  • Better scan quality = better OCR results

  • Handwriting may have lower accuracy

  • Complex layouts may need LAYOUT mode

Page Limits

  • Each page counts as 970 tokens for billing

  • Large documents can be expensive

  • Consider extracting specific pages

Cost

  • Billed per page processed

  • LAYOUT mode same cost as OCR mode

  • Use selectively for large documents

Regional Availability

  • AWS US West/East: ✓

  • Azure East US: ✓

  • EU regions: ✓

Best Practices

1. Choose the Right Mode

-- Use OCR for simple text extraction
SELECT AI_PARSE_DOCUMENT(TO_FILE('@docs/simple.pdf'), 'OCR');

-- Use LAYOUT for structured documents
SELECT AI_PARSE_DOCUMENT(TO_FILE('@docs/invoice.pdf'), 'LAYOUT')

-- Use OCR for simple text extraction
SELECT AI_PARSE_DOCUMENT(TO_FILE('@docs/simple.pdf'), 'OCR');

-- Use LAYOUT for structured documents
SELECT AI_PARSE_DOCUMENT(TO_FILE('@docs/invoice.pdf'), 'LAYOUT')

-- Use OCR for simple text extraction
SELECT AI_PARSE_DOCUMENT(TO_FILE('@docs/simple.pdf'), 'OCR');

-- Use LAYOUT for structured documents
SELECT AI_PARSE_DOCUMENT(TO_FILE('@docs/invoice.pdf'), 'LAYOUT')

2. Combine with AI_EXTRACT

-- Parse document then extract structured data
WITH parsed AS (
    SELECT AI_PARSE_DOCUMENT(
        TO_FILE('@contracts/agreement.pdf'),
        'OCR'
    ) AS doc_text
)
SELECT 
    AI_EXTRACT(
        doc_text,
        'Extract: party names, start date, end date, payment terms'
    ) AS contract_details
FROM

-- Parse document then extract structured data
WITH parsed AS (
    SELECT AI_PARSE_DOCUMENT(
        TO_FILE('@contracts/agreement.pdf'),
        'OCR'
    ) AS doc_text
)
SELECT 
    AI_EXTRACT(
        doc_text,
        'Extract: party names, start date, end date, payment terms'
    ) AS contract_details
FROM

-- Parse document then extract structured data
WITH parsed AS (
    SELECT AI_PARSE_DOCUMENT(
        TO_FILE('@contracts/agreement.pdf'),
        'OCR'
    ) AS doc_text
)
SELECT 
    AI_EXTRACT(
        doc_text,
        'Extract: party names, start date, end date, payment terms'
    ) AS contract_details
FROM

3. Cache Parsed Results

-- Store parsed documents to avoid re-processing
CREATE TABLE parsed_documents AS
SELECT 
    file_url,
    AI_PARSE_DOCUMENT(TO_FILE(file_url), 'OCR') AS parsed_text,
    CURRENT_TIMESTAMP() AS parsed_at
FROM

-- Store parsed documents to avoid re-processing
CREATE TABLE parsed_documents AS
SELECT 
    file_url,
    AI_PARSE_DOCUMENT(TO_FILE(file_url), 'OCR') AS parsed_text,
    CURRENT_TIMESTAMP() AS parsed_at
FROM

-- Store parsed documents to avoid re-processing
CREATE TABLE parsed_documents AS
SELECT 
    file_url,
    AI_PARSE_DOCUMENT(TO_FILE(file_url), 'OCR') AS parsed_text,
    CURRENT_TIMESTAMP() AS parsed_at
FROM

4. Handle Multi-page Documents

-- Extract specific pages
SELECT 
    doc:pages[0]:blocks AS first_page_content
FROM (
    SELECT AI_PARSE_DOCUMENT(
        TO_FILE('@docs/report.pdf'),
        'LAYOUT'
    ) AS doc
)

-- Extract specific pages
SELECT 
    doc:pages[0]:blocks AS first_page_content
FROM (
    SELECT AI_PARSE_DOCUMENT(
        TO_FILE('@docs/report.pdf'),
        'LAYOUT'
    ) AS doc
)

-- Extract specific pages
SELECT 
    doc:pages[0]:blocks AS first_page_content
FROM (
    SELECT AI_PARSE_DOCUMENT(
        TO_FILE('@docs/report.pdf'),
        'LAYOUT'
    ) AS doc
)

Common Use Cases

Invoice Processing

SELECT 
    filename,
    AI_EXTRACT(
        AI_PARSE_DOCUMENT(TO_FILE('@invoices/' || filename), 'OCR'),
        'Extract: invoice number, date, vendor name, total amount, line items'
    ) AS invoice_data
FROM DIRECTORY('@invoices')

SELECT 
    filename,
    AI_EXTRACT(
        AI_PARSE_DOCUMENT(TO_FILE('@invoices/' || filename), 'OCR'),
        'Extract: invoice number, date, vendor name, total amount, line items'
    ) AS invoice_data
FROM DIRECTORY('@invoices')

SELECT 
    filename,
    AI_EXTRACT(
        AI_PARSE_DOCUMENT(TO_FILE('@invoices/' || filename), 'OCR'),
        'Extract: invoice number, date, vendor name, total amount, line items'
    ) AS invoice_data
FROM DIRECTORY('@invoices')

Contract Analysis

WITH contracts AS (
    SELECT 
        contract_id,
        AI_PARSE_DOCUMENT(TO_FILE(file_path), 'OCR') AS contract_text
    FROM legal_documents
    WHERE doc_type = 'contract'
)
SELECT 
    contract_id,
    AI_COMPLETE(
        'claude-4-sonnet',
        'Identify key terms and obligations in this contract: ' || contract_text
    ) AS analysis
FROM

WITH contracts AS (
    SELECT 
        contract_id,
        AI_PARSE_DOCUMENT(TO_FILE(file_path), 'OCR') AS contract_text
    FROM legal_documents
    WHERE doc_type = 'contract'
)
SELECT 
    contract_id,
    AI_COMPLETE(
        'claude-4-sonnet',
        'Identify key terms and obligations in this contract: ' || contract_text
    ) AS analysis
FROM

WITH contracts AS (
    SELECT 
        contract_id,
        AI_PARSE_DOCUMENT(TO_FILE(file_path), 'OCR') AS contract_text
    FROM legal_documents
    WHERE doc_type = 'contract'
)
SELECT 
    contract_id,
    AI_COMPLETE(
        'claude-4-sonnet',
        'Identify key terms and obligations in this contract: ' || contract_text
    ) AS analysis
FROM

Form Processing

SELECT 
    application_id,
    AI_EXTRACT(
        AI_PARSE_DOCUMENT(TO_FILE(form_pdf), 'LAYOUT'),
        'Extract all filled form fields and their values'
    ) AS form_data
FROM

SELECT 
    application_id,
    AI_EXTRACT(
        AI_PARSE_DOCUMENT(TO_FILE(form_pdf), 'LAYOUT'),
        'Extract all filled form fields and their values'
    ) AS form_data
FROM

SELECT 
    application_id,
    AI_EXTRACT(
        AI_PARSE_DOCUMENT(TO_FILE(form_pdf), 'LAYOUT'),
        'Extract all filled form fields and their values'
    ) AS form_data
FROM

Related Functions

  • AI_EXTRACT - Extract structured data from parsed text

  • AI_COMPLETE - Analyze document content

  • TO_FILE - Reference document files

Interested to Learn More?
Try Out the Free 14-Days Trial

More Articles

decorative icon

Experience Analytics for the AI-Era

Start your 14-day trial today - it's free and no credit card needed

decorative icon

Experience Analytics for the AI-Era

Start your 14-day trial today - it's free and no credit card needed

decorative icon

Experience Analytics for the AI-Era

Start your 14-day trial today - it's free and no credit card needed

Copyright © 2026 Paradime Labs, Inc.

Made with ❤️ in San Francisco ・ London

*dbt® and dbt Core® are federally registered trademarks of dbt Labs, Inc. in the United States and various jurisdictions around the world. Paradime is not a partner of dbt Labs. All rights therein are reserved to dbt Labs. Paradime is not a product or service of or endorsed by dbt Labs, Inc.

Copyright © 2026 Paradime Labs, Inc.

Made with ❤️ in San Francisco ・ London

*dbt® and dbt Core® are federally registered trademarks of dbt Labs, Inc. in the United States and various jurisdictions around the world. Paradime is not a partner of dbt Labs. All rights therein are reserved to dbt Labs. Paradime is not a product or service of or endorsed by dbt Labs, Inc.

Copyright © 2026 Paradime Labs, Inc.

Made with ❤️ in San Francisco ・ London

*dbt® and dbt Core® are federally registered trademarks of dbt Labs, Inc. in the United States and various jurisdictions around the world. Paradime is not a partner of dbt Labs. All rights therein are reserved to dbt Labs. Paradime is not a product or service of or endorsed by dbt Labs, Inc.