sampling column sensors
column samples
Full sensor name
DescriptionColumn level sensor that retrieves a column value samples. Column value sampling is used in profiling and in capturing error samples for failed data quality checks.
Parameters
Field name | Description | Allowed data type | Is it required? | Allowed values |
---|---|---|---|---|
limit | The limit of results that are returned. The default value is 10 sample values with the highest count (the most popular). | integer |
SQL Template (Jinja2)
{% import '/dialects/bigquery.sql.jinja2' as lib with context -%}
WITH column_samples AS (
SELECT
unlimited_samples.sample_value AS sample_value,
unlimited_samples.sample_count AS sample_count,
ROW_NUMBER() OVER (ORDER BY unlimited_samples.sample_count DESC) AS sample_index
FROM
(
SELECT
{{ lib.render_target_column('analyzed_table') }} AS sample_value,
COUNT(*) AS sample_count
FROM
{{ lib.render_target_table() }} AS analyzed_table
GROUP BY sample_value
{{- lib.render_where_clause(table_alias_prefix = 'analyzed_table', indentation = ' ') }}
) AS unlimited_samples
)
SELECT
sample_table.sample_value AS actual_value,
sample_table.sample_count AS sample_count,
sample_table.sample_index AS sample_index
FROM column_samples AS sample_table
WHERE sample_table.sample_index <= {{ parameters.limit }}
ORDER BY sample_index DESC
{% import '/dialects/mysql.sql.jinja2' as lib with context -%}
WITH column_samples AS (
SELECT
unlimited_samples.sample_value AS sample_value,
unlimited_samples.sample_count AS sample_count,
ROW_NUMBER() OVER (ORDER BY unlimited_samples.sample_count DESC) AS sample_index
FROM
(
SELECT
{{ lib.render_target_column('analyzed_table') }} AS sample_value,
COUNT(*) AS sample_count
FROM
{{ lib.render_target_table() }} AS analyzed_table
GROUP BY sample_value
{{- lib.render_where_clause(table_alias_prefix = 'analyzed_table', indentation = ' ') }}
) AS unlimited_samples
)
SELECT
sample_table.sample_value AS actual_value,
sample_table.sample_count AS sample_count,
sample_table.sample_index AS sample_index
FROM column_samples AS sample_table
WHERE sample_table.sample_index <= {{ parameters.limit }}
ORDER BY sample_index DESC
{% import '/dialects/postgresql.sql.jinja2' as lib with context -%}
WITH column_samples AS (
SELECT
unlimited_samples.sample_value AS sample_value,
unlimited_samples.sample_count AS sample_count,
ROW_NUMBER() OVER (ORDER BY unlimited_samples.sample_count DESC) AS sample_index
FROM
(
SELECT
{{ lib.render_target_column('analyzed_table') }} AS sample_value,
COUNT(*) AS sample_count
FROM
{{ lib.render_target_table() }} AS analyzed_table
GROUP BY sample_value
{{- lib.render_where_clause(table_alias_prefix = 'analyzed_table', indentation = ' ') }}
) AS unlimited_samples
)
SELECT
sample_table.sample_value AS actual_value,
sample_table.sample_count AS sample_count,
sample_table.sample_index AS sample_index
FROM column_samples AS sample_table
WHERE sample_table.sample_index <= {{ parameters.limit }}
ORDER BY sample_index DESC
{% import '/dialects/redshift.sql.jinja2' as lib with context -%}
WITH column_samples AS (
SELECT
unlimited_samples.sample_value AS sample_value,
unlimited_samples.sample_count AS sample_count,
ROW_NUMBER() OVER (ORDER BY unlimited_samples.sample_count DESC) AS sample_index
FROM
(
SELECT
{{ lib.render_target_column('analyzed_table') }} AS sample_value,
COUNT(*) AS sample_count
FROM
{{ lib.render_target_table() }} AS analyzed_table
GROUP BY sample_value
{{- lib.render_where_clause(table_alias_prefix = 'analyzed_table', indentation = ' ') }}
) AS unlimited_samples
)
SELECT
sample_table.sample_value AS actual_value,
sample_table.sample_count AS sample_count,
sample_table.sample_index AS sample_index
FROM column_samples AS sample_table
WHERE sample_table.sample_index <= {{ parameters.limit }}
ORDER BY sample_index DESC
{% import '/dialects/snowflake.sql.jinja2' as lib with context -%}
WITH column_samples AS (
SELECT
unlimited_samples.sample_value AS sample_value,
unlimited_samples.sample_count AS sample_count,
ROW_NUMBER() OVER (ORDER BY unlimited_samples.sample_count DESC) AS sample_index
FROM
(
SELECT
{{ lib.render_target_column('analyzed_table') }} AS sample_value,
COUNT(*) AS sample_count
FROM
{{ lib.render_target_table() }} AS analyzed_table
GROUP BY sample_value
{{- lib.render_where_clause(table_alias_prefix = 'analyzed_table', indentation = ' ') }}
) AS unlimited_samples
)
SELECT
sample_table.sample_value AS actual_value,
sample_table.sample_count AS sample_count,
sample_table.sample_index AS sample_index
FROM column_samples AS sample_table
WHERE sample_table.sample_index <= {{ parameters.limit }}
ORDER BY sample_index DESC
{% import '/dialects/sqlserver.sql.jinja2' as lib with context -%}
WITH column_samples AS (
SELECT
unlimited_samples.sample_value AS sample_value,
unlimited_samples.sample_count AS sample_count,
ROW_NUMBER() OVER (ORDER BY unlimited_samples.sample_count DESC) AS sample_index
FROM
(
SELECT
{{ lib.render_target_column('analyzed_table') }} AS sample_value,
COUNT(*) AS sample_count
FROM
{{ lib.render_target_table() }} AS analyzed_table
GROUP BY {{ lib.render_target_column('analyzed_table') }}
{{- lib.render_where_clause(table_alias_prefix = 'analyzed_table', indentation = ' ') }}
) AS unlimited_samples
)
SELECT
sample_table.sample_value AS actual_value,
sample_table.sample_count AS sample_count,
sample_table.sample_index AS sample_index
FROM column_samples AS sample_table
WHERE sample_table.sample_index <= {{ parameters.limit }}
ORDER BY sample_index DESC