Skip to content

Last updated: July 22, 2025

DQOps data quality sampling sensors, SQL examples

All data quality sensors in the sampling category supported by DQOps are listed below. Those sensors are measured on a column level.


column samples

Column level sensor that retrieves a column value samples. Column value sampling is used in profiling and in capturing error samples for failed data quality checks.

Sensor summary

The column samples sensor is documented below.

Target Category Full sensor name Source code on GitHub
column sampling column/sampling/column_samples sensors/column/sampling

Sensor parameters

Field name Description Allowed data type Required Allowed values
limit The limit of results that are returned. The default value is 100 sample values with the highest count (the most popular). integer

Jinja2 SQL templates

The templates used to generate the SQL query for each data source supported by DQOps is shown below.

{% import '/dialects/bigquery.sql.jinja2' as lib with context -%}
WITH column_samples AS (
    SELECT
        unlimited_samples.sample_value AS sample_value,
        unlimited_samples.sample_count AS sample_count,
        ROW_NUMBER() OVER (ORDER BY unlimited_samples.sample_count DESC) AS sample_index
    FROM
    (
        SELECT
            {{ lib.render_target_column('analyzed_table') }} AS sample_value,
            COUNT(*) AS sample_count
        FROM
            {{ lib.render_target_table() }} AS analyzed_table
        {{- lib.render_where_clause(table_alias_prefix = 'analyzed_table', indentation = '        ') }}
        GROUP BY sample_value
    ) AS unlimited_samples
)
SELECT
   sample_table.sample_value AS actual_value,
   sample_table.sample_count AS sample_count,
   sample_table.sample_index AS sample_index
FROM column_samples AS sample_table
WHERE sample_table.sample_index <= {{ parameters.limit }}
ORDER BY sample_index DESC
{% import '/dialects/clickhouse.sql.jinja2' as lib with context -%}
WITH column_samples AS (
    SELECT
        unlimited_samples.sample_value AS sample_value,
        unlimited_samples.sample_count AS sample_count,
        ROW_NUMBER() OVER (ORDER BY unlimited_samples.sample_count DESC) AS sample_index
    FROM
    (
        SELECT
            {{ lib.render_target_column('analyzed_table') }} AS sample_value,
            COUNT(*) AS sample_count
        FROM
            {{ lib.render_target_table() }} AS analyzed_table
        {{- lib.render_where_clause(table_alias_prefix = 'analyzed_table', indentation = '        ') }}
        GROUP BY sample_value
    ) AS unlimited_samples
)
SELECT
   sample_table.sample_value AS actual_value,
   sample_table.sample_count AS sample_count,
   sample_table.sample_index AS sample_index
FROM column_samples AS sample_table
WHERE sample_table.sample_index <= {{ parameters.limit }}
ORDER BY sample_index DESC
{% import '/dialects/databricks.sql.jinja2' as lib with context -%}
WITH column_samples AS (
    SELECT
        unlimited_samples.sample_value AS sample_value,
        unlimited_samples.sample_count AS sample_count,
        ROW_NUMBER() OVER (ORDER BY unlimited_samples.sample_count DESC) AS sample_index
    FROM
    (
        SELECT
            {{ lib.render_target_column('analyzed_table') }} AS sample_value,
            COUNT(*) AS sample_count
        FROM
            {{ lib.render_target_table() }} AS analyzed_table
        {{- lib.render_where_clause(table_alias_prefix = 'analyzed_table', indentation = '        ') }}
        GROUP BY sample_value
    ) AS unlimited_samples
)
SELECT
   sample_table.sample_value AS actual_value,
   sample_table.sample_count AS sample_count,
   sample_table.sample_index AS sample_index
FROM column_samples AS sample_table
WHERE sample_table.sample_index <= {{ parameters.limit }}
ORDER BY sample_index DESC
{% import '/dialects/db2.sql.jinja2' as lib with context -%}
WITH column_samples AS (
    SELECT
        unlimited_samples.sample_value AS sample_value,
        unlimited_samples.sample_count AS sample_count,
        ROW_NUMBER() OVER (ORDER BY unlimited_samples.sample_count DESC) AS sample_index
    FROM
    (
        SELECT
            {{ lib.render_target_column('analyzed_table') }} AS sample_value,
            COUNT(*) AS sample_count
        FROM
            {{ lib.render_target_table() }} AS analyzed_table
        {{- lib.render_where_clause(table_alias_prefix = 'analyzed_table', indentation = '        ') }}
        GROUP BY {{ lib.render_target_column('analyzed_table') }}
    ) AS unlimited_samples
)
SELECT
   sample_table.sample_value AS actual_value,
   sample_table.sample_count AS sample_count,
   sample_table.sample_index AS sample_index
FROM column_samples AS sample_table
WHERE sample_table.sample_index <= {{ parameters.limit }}
ORDER BY sample_table.sample_index DESC
{% import '/dialects/duckdb.sql.jinja2' as lib with context -%}
WITH column_samples AS (
    SELECT
        unlimited_samples.sample_value AS sample_value,
        unlimited_samples.sample_count AS sample_count,
        ROW_NUMBER() OVER (ORDER BY unlimited_samples.sample_count DESC) AS sample_index
    FROM
    (
        SELECT
            {{ lib.render_target_column('analyzed_table') }} AS sample_value,
            COUNT(*) AS sample_count
        FROM
            {{ lib.render_target_table() }} AS analyzed_table
        {{- lib.render_where_clause(table_alias_prefix = 'analyzed_table', indentation = '        ') }}
        GROUP BY sample_value
    ) AS unlimited_samples
)
SELECT
   sample_table.sample_value AS actual_value,
   sample_table.sample_count AS sample_count,
   sample_table.sample_index AS sample_index
FROM column_samples AS sample_table
WHERE sample_table.sample_index <= {{ parameters.limit }}
ORDER BY sample_index DESC
{% import '/dialects/hana.sql.jinja2' as lib with context -%}
WITH column_samples AS (
    SELECT
        unlimited_samples.sample_value AS sample_value,
        unlimited_samples.sample_count AS sample_count,
        ROW_NUMBER() OVER (ORDER BY unlimited_samples.sample_count DESC) AS sample_index
    FROM
    (
        SELECT
            {{ lib.render_target_column('analyzed_table') }} AS sample_value,
            COUNT(*) AS sample_count
        FROM
            {{ lib.render_target_table() }} AS analyzed_table
        {{- lib.render_where_clause(table_alias_prefix = 'analyzed_table', indentation = '        ') }}
        GROUP BY {{ lib.render_target_column('analyzed_table') }}
    ) AS unlimited_samples
)
SELECT
   sample_table.sample_value AS actual_value,
   sample_table.sample_count AS sample_count,
   sample_table.sample_index AS sample_index
FROM column_samples AS sample_table
WHERE sample_table.sample_index <= {{ parameters.limit }}
ORDER BY sample_table.sample_index DESC
{% import '/dialects/mariadb.sql.jinja2' as lib with context -%}
WITH column_samples AS (
    SELECT
        unlimited_samples.sample_value AS sample_value,
        unlimited_samples.sample_count AS sample_count,
        ROW_NUMBER() OVER (ORDER BY unlimited_samples.sample_count DESC) AS sample_index
    FROM
    (
        SELECT
            {{ lib.render_target_column('analyzed_table') }} AS sample_value,
            COUNT(*) AS sample_count
        FROM
            {{ lib.render_target_table() }} AS analyzed_table
        {{- lib.render_where_clause(table_alias_prefix = 'analyzed_table', indentation = '        ') }}
        GROUP BY sample_value
    ) AS unlimited_samples
)
SELECT
   sample_table.sample_value AS actual_value,
   sample_table.sample_count AS sample_count,
   sample_table.sample_index AS sample_index
FROM column_samples AS sample_table
WHERE sample_table.sample_index <= {{ parameters.limit }}
ORDER BY sample_index DESC
{% import '/dialects/mysql.sql.jinja2' as lib with context -%}
WITH column_samples AS (
    SELECT
        unlimited_samples.sample_value AS sample_value,
        unlimited_samples.sample_count AS sample_count,
        ROW_NUMBER() OVER (ORDER BY unlimited_samples.sample_count DESC) AS sample_index
    FROM
    (
        SELECT
            {{ lib.render_target_column('analyzed_table') }} AS sample_value,
            COUNT(*) AS sample_count
        FROM
            {{ lib.render_target_table() }} AS analyzed_table
        {{- lib.render_where_clause(table_alias_prefix = 'analyzed_table', indentation = '        ') }}
        GROUP BY sample_value
    ) AS unlimited_samples
)
SELECT
   sample_table.sample_value AS actual_value,
   sample_table.sample_count AS sample_count,
   sample_table.sample_index AS sample_index
FROM column_samples AS sample_table
WHERE sample_table.sample_index <= {{ parameters.limit }}
ORDER BY sample_index DESC
{% import '/dialects/oracle.sql.jinja2' as lib with context -%}
WITH column_samples AS (
    SELECT
        unlimited_samples.sample_value AS sample_value,
        unlimited_samples.sample_count AS sample_count,
        ROW_NUMBER() OVER (ORDER BY unlimited_samples.sample_count DESC) AS sample_index
    FROM
    (
        SELECT
            {{ lib.render_target_column('analyzed_table') }} AS sample_value,
            COUNT(*) AS sample_count
        FROM
            {{ lib.render_target_table() }} analyzed_table
        {{- lib.render_where_clause(table_alias_prefix = 'analyzed_table', indentation = '        ') }}
        GROUP BY {{ lib.render_target_column('analyzed_table') }}
    ) unlimited_samples
)
SELECT
   sample_table.sample_value AS actual_value,
   sample_table.sample_count AS sample_count,
   sample_table.sample_index AS sample_index
FROM column_samples sample_table
WHERE sample_table.sample_index <= {{ parameters.limit }}
ORDER BY sample_table.sample_index DESC
{% import '/dialects/postgresql.sql.jinja2' as lib with context -%}
WITH column_samples AS (
    SELECT
        unlimited_samples.sample_value AS sample_value,
        unlimited_samples.sample_count AS sample_count,
        ROW_NUMBER() OVER (ORDER BY unlimited_samples.sample_count DESC) AS sample_index
    FROM
    (
        SELECT
            {{ lib.render_target_column('analyzed_table') }} AS sample_value,
            COUNT(*) AS sample_count
        FROM
            {{ lib.render_target_table() }} AS analyzed_table
        {{- lib.render_where_clause(table_alias_prefix = 'analyzed_table', indentation = '        ') }}
        GROUP BY sample_value
    ) AS unlimited_samples
)
SELECT
   sample_table.sample_value AS actual_value,
   sample_table.sample_count AS sample_count,
   sample_table.sample_index AS sample_index
FROM column_samples AS sample_table
WHERE sample_table.sample_index <= {{ parameters.limit }}
ORDER BY sample_index DESC
{% import '/dialects/presto.sql.jinja2' as lib with context -%}
WITH column_samples AS (
    SELECT
        unlimited_samples.sample_value AS sample_value,
        unlimited_samples.sample_count AS sample_count,
        ROW_NUMBER() OVER (ORDER BY unlimited_samples.sample_count DESC) AS sample_index
    FROM
    (
        SELECT
            {{ lib.render_target_column('analyzed_table') }} AS sample_value,
            COUNT(*) AS sample_count
        FROM
            {{ lib.render_target_table() }} AS analyzed_table
        {{- lib.render_where_clause(table_alias_prefix = 'analyzed_table', indentation = '        ') }}
        GROUP BY 1
    ) AS unlimited_samples
)
SELECT
   sample_table.sample_value AS actual_value,
   sample_table.sample_count AS sample_count,
   sample_table.sample_index AS sample_index
FROM column_samples AS sample_table
WHERE sample_table.sample_index <= {{ parameters.limit }}
ORDER BY sample_table.sample_index DESC
{% import '/dialects/questdb.sql.jinja2' as lib with context -%}
WITH column_samples AS (
    SELECT
        unlimited_samples.sample_value AS sample_value,
        unlimited_samples.sample_count AS sample_count,
        ROW_NUMBER() OVER (ORDER BY unlimited_samples.sample_count DESC) AS sample_index
    FROM
    (
        SELECT
            {{ lib.render_target_column('analyzed_table') }} AS sample_value,
            COUNT() AS sample_count
        FROM
            {{ lib.render_target_table() }} AS analyzed_table
        {{- lib.render_where_clause(table_alias_prefix = 'analyzed_table', indentation = '        ') }}
        GROUP BY sample_value
    ) AS unlimited_samples
)
SELECT
   sample_table.sample_value AS actual_value,
   sample_table.sample_count AS sample_count,
   sample_table.sample_index AS sample_index
FROM column_samples AS sample_table
WHERE sample_table.sample_index <= {{ parameters.limit }}
ORDER BY sample_index DESC
{% import '/dialects/redshift.sql.jinja2' as lib with context -%}
WITH column_samples AS (
    SELECT
        unlimited_samples.sample_value AS sample_value,
        unlimited_samples.sample_count AS sample_count,
        ROW_NUMBER() OVER (ORDER BY unlimited_samples.sample_count DESC) AS sample_index
    FROM
    (
        SELECT
            {{ lib.render_target_column('analyzed_table') }} AS sample_value,
            COUNT(*) AS sample_count
        FROM
            {{ lib.render_target_table() }} AS analyzed_table
        {{- lib.render_where_clause(table_alias_prefix = 'analyzed_table', indentation = '        ') }}
        GROUP BY sample_value
    ) AS unlimited_samples
)
SELECT
   sample_table.sample_value AS actual_value,
   sample_table.sample_count AS sample_count,
   sample_table.sample_index AS sample_index
FROM column_samples AS sample_table
WHERE sample_table.sample_index <= {{ parameters.limit }}
ORDER BY sample_index DESC
{% import '/dialects/snowflake.sql.jinja2' as lib with context -%}
WITH column_samples AS (
    SELECT
        unlimited_samples.sample_value AS sample_value,
        unlimited_samples.sample_count AS sample_count,
        ROW_NUMBER() OVER (ORDER BY unlimited_samples.sample_count DESC) AS sample_index
    FROM
    (
        SELECT
            {{ lib.render_target_column('analyzed_table') }} AS sample_value,
            COUNT(*) AS sample_count
        FROM
            {{ lib.render_target_table() }} AS analyzed_table
        {{- lib.render_where_clause(table_alias_prefix = 'analyzed_table', indentation = '        ') }}
        GROUP BY sample_value
    ) AS unlimited_samples
)
SELECT
   sample_table.sample_value AS actual_value,
   sample_table.sample_count AS sample_count,
   sample_table.sample_index AS sample_index
FROM column_samples AS sample_table
WHERE sample_table.sample_index <= {{ parameters.limit }}
ORDER BY sample_index DESC
{% import '/dialects/spark.sql.jinja2' as lib with context -%}
WITH column_samples AS (
    SELECT
        unlimited_samples.sample_value AS sample_value,
        unlimited_samples.sample_count AS sample_count,
        ROW_NUMBER() OVER (ORDER BY unlimited_samples.sample_count DESC) AS sample_index
    FROM
    (
        SELECT
            {{ lib.render_target_column('analyzed_table') }} AS sample_value,
            COUNT(*) AS sample_count
        FROM
            {{ lib.render_target_table() }} AS analyzed_table
        {{- lib.render_where_clause(table_alias_prefix = 'analyzed_table', indentation = '        ') }}
        GROUP BY sample_value
    ) AS unlimited_samples
)
SELECT
   sample_table.sample_value AS actual_value,
   sample_table.sample_count AS sample_count,
   sample_table.sample_index AS sample_index
FROM column_samples AS sample_table
WHERE sample_table.sample_index <= {{ parameters.limit }}
ORDER BY sample_index DESC
{% import '/dialects/sqlserver.sql.jinja2' as lib with context -%}
WITH column_samples AS (
    SELECT
        unlimited_samples.sample_value AS sample_value,
        unlimited_samples.sample_count AS sample_count,
        ROW_NUMBER() OVER (ORDER BY unlimited_samples.sample_count DESC) AS sample_index
    FROM
    (
        SELECT
            {{ lib.render_target_column('analyzed_table') }} AS sample_value,
            COUNT(*) AS sample_count
        FROM
            {{ lib.render_target_table() }} AS analyzed_table
        {{- lib.render_where_clause(table_alias_prefix = 'analyzed_table', indentation = '        ') }}
        GROUP BY {{ lib.render_target_column('analyzed_table') }}
    ) AS unlimited_samples
)
SELECT
   sample_table.sample_value AS actual_value,
   sample_table.sample_count AS sample_count,
   sample_table.sample_index AS sample_index
FROM column_samples AS sample_table
WHERE sample_table.sample_index <= {{ parameters.limit }}
ORDER BY sample_index DESC
{% import '/dialects/teradata.sql.jinja2' as lib with context -%}
WITH column_samples AS (
    SELECT
        unlimited_samples.sample_value AS sample_value,
        unlimited_samples.sample_count AS sample_count,
        ROW_NUMBER() OVER (ORDER BY unlimited_samples.sample_count DESC) AS sample_index
    FROM
    (
        SELECT
            {{ lib.render_target_column('analyzed_table') }} AS sample_value,
            COUNT(*) AS sample_count
        FROM
            {{ lib.render_target_table() }} AS analyzed_table
        {{- lib.render_where_clause(table_alias_prefix = 'analyzed_table', indentation = '        ') }}
        GROUP BY sample_value
    ) AS unlimited_samples
)
SELECT
   sample_table.sample_value AS actual_value,
   sample_table.sample_count AS sample_count,
   sample_table.sample_index AS sample_index
FROM column_samples AS sample_table
WHERE sample_table.sample_index <= {{ parameters.limit }}
ORDER BY sample_index DESC
{% import '/dialects/trino.sql.jinja2' as lib with context -%}
WITH column_samples AS (
    SELECT
        unlimited_samples.sample_value AS sample_value,
        unlimited_samples.sample_count AS sample_count,
        ROW_NUMBER() OVER (ORDER BY unlimited_samples.sample_count DESC) AS sample_index
    FROM
    (
        SELECT
            {{ lib.render_target_column('analyzed_table') }} AS sample_value,
            COUNT(*) AS sample_count
        FROM
            {{ lib.render_target_table() }} AS analyzed_table
        {{- lib.render_where_clause(table_alias_prefix = 'analyzed_table', indentation = '        ') }}
        GROUP BY 1
    ) AS unlimited_samples
)
SELECT
   sample_table.sample_value AS actual_value,
   sample_table.sample_count AS sample_count,
   sample_table.sample_index AS sample_index
FROM column_samples AS sample_table
WHERE sample_table.sample_index <= {{ parameters.limit }}
ORDER BY sample_table.sample_index DESC

What's next