Skip to content

Last updated: July 22, 2025

DQOps data quality uniqueness sensors, SQL examples

All data quality sensors in the uniqueness category supported by DQOps are listed below. Those sensors are measured on a table level.


duplicate record count

Table sensor that executes a duplicate record count query.

Sensor summary

The duplicate record count sensor is documented below.

Target Category Full sensor name Source code on GitHub
table uniqueness table/uniqueness/duplicate_record_count sensors/table/uniqueness

Sensor parameters

Field name Description Allowed data type Required Allowed values
columns A list of columns used for uniqueness record duplicate verification. string_list

Jinja2 SQL templates

The templates used to generate the SQL query for each data source supported by DQOps is shown below.

{% import '/dialects/bigquery.sql.jinja2' as lib with context -%}

{%- macro extract_in_list(values_list, column_prefix = none, column_suffix = none, separate_by_comma = false) %}
    {%- set column_names = table.columns if values_list is none or (values_list | length()) == 0 else values_list -%}
    {%- for item in column_names -%}
        {{ (column_prefix) if column_prefix is not none -}} {{- lib.quote_identifier(item) -}} {{- (column_suffix) if column_suffix is not none -}} {{- ", " if not loop.last }} {{- "', ', " if separate_by_comma and not loop.last }}
    {%- endfor -%}
{% endmacro -%}

SELECT
    CASE
        WHEN SUM(duplicated_count) IS NULL THEN 0
        ELSE SUM(CASE WHEN duplicated_count > 1 THEN 1 ELSE 0 END)
        END AS actual_value
    {{- lib.render_data_grouping_projections_reference('grouping_table') }}
    {{- lib.render_time_dimension_projection_reference('grouping_table') }}
FROM (
    SELECT COUNT(*) AS duplicated_count
    {{- lib.render_data_grouping_projections('analyzed_table', indentation='        ') }}
    {{- lib.render_time_dimension_projection('analyzed_table', indentation='        ') }}
    FROM {{ lib.render_target_table() }} AS analyzed_table
    {{- lib.render_where_clause(indentation='    ', extra_filter = 'COALESCE(' ~ extract_in_list(parameters.columns, column_prefix='CAST(', column_suffix=' AS STRING)') ~ ') IS NOT NULL') }}
    GROUP BY {{ extract_in_list(parameters.columns) -}} {{- (", " ~ lib.render_grouping_column_names()) if (lib.data_groupings is not none and (lib.data_groupings | length()) > 0) or lib.time_series is not none }}
) grouping_table
{{- lib.render_group_by() -}}
{{- lib.render_order_by() -}}
{% import '/dialects/clickhouse.sql.jinja2' as lib with context -%}

{%- macro extract_in_list(values_list, column_prefix = none, column_suffix = none, separate_by_comma = false) %}
    {%- set column_names = table.columns if values_list is none or (values_list | length()) == 0 else values_list -%}
    {%- for item in column_names -%}
        {{ (column_prefix) if column_prefix is not none -}} {{- lib.quote_identifier(item) -}} {{- (column_suffix) if column_suffix is not none -}} {{- ", " if not loop.last }} {{- "', ', " if separate_by_comma and not loop.last }}
    {%- endfor -%}
{% endmacro -%}

SELECT
    CASE
        WHEN sumOrNull(duplicated_count) IS NULL THEN 0
        ELSE SUM(CASE WHEN duplicated_count > 1 THEN 1 ELSE 0 END)
        END AS actual_value
    {{- lib.render_data_grouping_projections_reference('grouping_table') }}
    {{- lib.render_time_dimension_projection_reference('grouping_table') }}
FROM (
    SELECT COUNT(*) AS duplicated_count
    {{- lib.render_data_grouping_projections('analyzed_table', indentation='        ') }}
    {{- lib.render_time_dimension_projection('analyzed_table', indentation='        ') }}
    FROM {{ lib.render_target_table() }} AS analyzed_table
    {{- lib.render_where_clause(indentation='    ', extra_filter = 'COALESCE(' ~ extract_in_list(parameters.columns, column_prefix='toString(', column_suffix=')') ~ ') IS NOT NULL') }}
    GROUP BY {{ extract_in_list(parameters.columns) -}} {{- (", " ~ lib.render_grouping_column_names()) if (lib.data_groupings is not none and (lib.data_groupings | length()) > 0) or lib.time_series is not none }}
) grouping_table
{{- lib.render_group_by() -}}
{{- lib.render_order_by() -}}
{% import '/dialects/databricks.sql.jinja2' as lib with context -%}

{%- macro extract_in_list(values_list, column_prefix = none, column_suffix = none, separate_by_comma = false) %}
    {%- set column_names = table.columns if values_list is none or (values_list | length()) == 0 else values_list -%}
    {%- for item in column_names -%}
        {{ (column_prefix) if column_prefix is not none -}} {{- lib.quote_identifier(item) -}} {{- (column_suffix) if column_suffix is not none -}} {{- ", " if not loop.last }} {{- "', ', " if separate_by_comma and not loop.last }}
    {%- endfor -%}
{% endmacro -%}

SELECT
    CASE
        WHEN SUM(duplicated_count) IS NULL THEN 0
        ELSE SUM(CASE WHEN duplicated_count > 1 THEN 1 ELSE 0 END)
        END AS actual_value
    {{- lib.render_data_grouping_projections_reference('grouping_table') }}
    {{- lib.render_time_dimension_projection_reference('grouping_table') }}
FROM (
    SELECT COUNT(*) AS duplicated_count
    {{- lib.render_data_grouping_projections('analyzed_table', indentation='        ') }}
    {{- lib.render_time_dimension_projection('analyzed_table', indentation='        ') }}
    FROM {{ lib.render_target_table() }} AS analyzed_table
    {{- lib.render_where_clause(indentation='    ', extra_filter = 'COALESCE(' ~ extract_in_list(parameters.columns, column_prefix='CAST(', column_suffix=' AS STRING)') ~ ') IS NOT NULL') }}
    GROUP BY {{ extract_in_list(parameters.columns) -}} {{- (", " ~ lib.render_grouping_column_names()) if (lib.data_groupings is not none and (lib.data_groupings | length()) > 0) or lib.time_series is not none }}
) grouping_table
{{- lib.render_group_by() -}}
{{- lib.render_order_by() -}}
{% import '/dialects/db2.sql.jinja2' as lib with context -%}

{% macro extract_in_list(values_list, column_prefix = none, column_suffix = none, separate_by_comma = false) %}
    {%- set column_names = table.columns if values_list is none or (values_list | length()) == 0 else values_list -%}
    {%- for item in column_names -%}
        {{ (column_prefix) if column_prefix is not none -}} {{- lib.quote_identifier(item) -}} {{- (column_suffix) if column_suffix is not none -}} {{- ", " if not loop.last }} {{- "', ', " if separate_by_comma and not loop.last }}
    {%- endfor -%}
{% endmacro %}

SELECT
    CASE
        WHEN SUM(duplicated_count) IS NULL THEN 0
        ELSE SUM(CASE WHEN duplicated_count > 1 THEN 1 ELSE 0 END)
        END AS actual_value
    {{- lib.render_data_grouping_projections_reference('grouping_table') }}
    {{- lib.render_time_dimension_projection_reference('grouping_table') }}
    FROM (
        SELECT COUNT(*) AS duplicated_count
        {{- lib.render_data_grouping_projections_reference('analyzed_table_nested', indentation='            ') }}
        {{- lib.render_time_dimension_projection_reference('analyzed_table_nested', indentation='            ') }}
        FROM (
            SELECT
                {{ extract_in_list(parameters.columns) -}}
                {{- lib.render_data_grouping_projections('analyzed_table_nested', indentation='            ') }}
                {{- lib.render_time_dimension_projection('analyzed_table_nested', indentation='            ') }}
            FROM {{ lib.render_target_table() }} analyzed_table_nested
            {{- lib.render_where_clause(table_alias_prefix = 'analyzed_table_nested', indentation='            ', extra_filter = 'COALESCE(' ~ extract_in_list(parameters.columns, column_prefix='CAST(', column_suffix=' AS VARCHAR(4000))') ~ ') IS NOT NULL') }}
    ) analyzed_table
    GROUP BY {{ extract_in_list(parameters.columns) -}} {{- (", " ~ lib.render_grouping_column_names()) if (lib.data_groupings is not none and (lib.data_groupings | length()) > 0) or lib.time_series is not none }}
) grouping_table
{{- lib.render_group_by() -}}
{{- lib.render_order_by() -}}
{% import '/dialects/duckdb.sql.jinja2' as lib with context -%}

{% macro extract_in_list(values_list, column_prefix = none, column_suffix = none, separate_by_comma = false) %}
    {%- set column_names = table.columns if values_list is none or (values_list | length()) == 0 else values_list -%}
    {%- for item in column_names -%}
        {{ (column_prefix) if column_prefix is not none -}} {{- lib.quote_identifier(item) -}} {{- (column_suffix) if column_suffix is not none -}} {{- ", " if not loop.last }} {{- "', ', " if separate_by_comma and not loop.last }}
    {%- endfor -%}
{% endmacro %}

SELECT
    CASE
        WHEN SUM(duplicated_count) IS NULL THEN 0
        ELSE SUM(CASE WHEN duplicated_count > 1 THEN 1 ELSE 0 END)
        END AS actual_value
    {{- lib.render_data_grouping_projections_reference('grouping_table') }}
    {{- lib.render_time_dimension_projection_reference('grouping_table') }}
FROM (
    SELECT COUNT(*) AS duplicated_count
    {{- lib.render_data_grouping_projections('analyzed_table', indentation='        ') }}
    {{- lib.render_time_dimension_projection('analyzed_table', indentation='        ') }}
    FROM {{ lib.render_target_table() }} AS analyzed_table
    {{- lib.render_where_clause(indentation='    ', extra_filter = 'COALESCE(' ~ extract_in_list(parameters.columns, column_prefix='CAST( ', column_suffix=' AS VARCHAR)') ~ ') IS NOT NULL') }}
    GROUP BY {{ extract_in_list(parameters.columns) -}} {{- (", " ~ lib.render_grouping_column_names()) if (lib.data_groupings is not none and (lib.data_groupings | length()) > 0) or lib.time_series is not none }}
) grouping_table
{{- lib.render_group_by() -}}
{{- lib.render_order_by() -}}
{% import '/dialects/hana.sql.jinja2' as lib with context -%}

{% macro extract_in_list(values_list, column_prefix = none, column_suffix = none, separate_by_comma = false) %}
    {%- set column_names = table.columns if values_list is none or (values_list | length()) == 0 else values_list -%}
    {%- for item in column_names -%}
        {{ (column_prefix) if column_prefix is not none -}} {{- lib.quote_identifier(item) -}} {{- (column_suffix) if column_suffix is not none -}} {{- ", " if not loop.last }} {{- "', ', " if separate_by_comma and not loop.last }}
    {%- endfor -%}
{% endmacro %}

SELECT
    CASE
        WHEN SUM(duplicated_count) IS NULL THEN 0
        ELSE SUM(CASE WHEN duplicated_count > 1 THEN 1 ELSE 0 END)
        END AS actual_value
    {{- lib.render_data_grouping_projections_reference('grouping_table') }}
    {{- lib.render_time_dimension_projection_reference('grouping_table') }}
    FROM (
        SELECT COUNT(*) AS duplicated_count
        {{- lib.render_data_grouping_projections_reference('analyzed_table_nested', indentation='        ') }}
        {{- lib.render_time_dimension_projection_reference('analyzed_table_nested', indentation='        ') }}
        FROM (
            SELECT
                {{ extract_in_list(parameters.columns) -}}
                {{- lib.render_data_grouping_projections('analyzed_table_nested', indentation='            ') }}
                {{- lib.render_time_dimension_projection('analyzed_table_nested', indentation='            ') }}
            FROM {{ lib.render_target_table() }} AS analyzed_table_nested
            {{- lib.render_where_clause(table_alias_prefix = 'analyzed_table_nested', indentation='        ', extra_filter = 'COALESCE(' ~ extract_in_list(parameters.columns, column_prefix='CAST(', column_suffix=' AS VARCHAR)') ~ ') IS NOT NULL') }}
    )
    GROUP BY {{ extract_in_list(parameters.columns) -}} {{- (", " ~ lib.render_grouping_column_names()) if (lib.data_groupings is not none and (lib.data_groupings | length()) > 0) or lib.time_series is not none }}
) grouping_table
{{- lib.render_group_by() -}}
{{- lib.render_order_by() -}}
{% import '/dialects/mariadb.sql.jinja2' as lib with context -%}

{% macro extract_in_list(values_list, column_prefix = none, column_suffix = none, separate_by_comma = false) %}
    {%- set column_names = table.columns if values_list is none or (values_list | length()) == 0 else values_list -%}
    {%- for item in column_names -%}
        {{ (column_prefix) if column_prefix is not none -}} {{- lib.quote_identifier(item) -}} {{- (column_suffix) if column_suffix is not none -}} {{- ", " if not loop.last }} {{- "', ', " if separate_by_comma and not loop.last }}
    {%- endfor -%}
{% endmacro %}

SELECT
    CASE
        WHEN SUM(duplicated_count) IS NULL THEN 0
        ELSE SUM(CASE WHEN duplicated_count > 1 THEN 1 ELSE 0 END)
        END AS actual_value
    {{- lib.render_data_grouping_projections_reference('grouping_table') }}
    {{- lib.render_time_dimension_projection_reference('grouping_table') }}
FROM (
    SELECT COUNT(*) AS duplicated_count
    {{- lib.render_data_grouping_projections('analyzed_table', indentation='        ') }}
    {{- lib.render_time_dimension_projection('analyzed_table', indentation='        ') }}
    FROM {{ lib.render_target_table() }} AS analyzed_table
    {{- lib.render_where_clause(indentation='    ', extra_filter = 'COALESCE(' ~ extract_in_list(parameters.columns) ~ ') IS NOT NULL') }}
    GROUP BY {{ extract_in_list(parameters.columns) -}} {{- (", " ~ lib.render_grouping_column_names()) if (lib.data_groupings is not none and (lib.data_groupings | length()) > 0) or lib.time_series is not none }}
) grouping_table
{{- lib.render_group_by() -}}
{{- lib.render_order_by() -}}
{% import '/dialects/mysql.sql.jinja2' as lib with context -%}

{% macro extract_in_list(values_list, column_prefix = none, column_suffix = none, separate_by_comma = false) %}
    {%- set column_names = table.columns if values_list is none or (values_list | length()) == 0 else values_list -%}
    {%- for item in column_names -%}
        {{ (column_prefix) if column_prefix is not none -}} {{- lib.quote_identifier(item) -}} {{- (column_suffix) if column_suffix is not none -}} {{- ", " if not loop.last }} {{- "', ', " if separate_by_comma and not loop.last }}
    {%- endfor -%}
{% endmacro %}

SELECT
    CASE
        WHEN SUM(duplicated_count) IS NULL THEN 0
        ELSE SUM(CASE WHEN duplicated_count > 1 THEN 1 ELSE 0 END)
        END AS actual_value
    {{- lib.render_data_grouping_projections_reference('grouping_table') }}
    {{- lib.render_time_dimension_projection_reference('grouping_table') }}
FROM (
    SELECT COUNT(*) AS duplicated_count
    {{- lib.render_data_grouping_projections('analyzed_table', indentation='        ') }}
    {{- lib.render_time_dimension_projection('analyzed_table', indentation='        ') }}
    FROM {{ lib.render_target_table() }} AS analyzed_table
    {{- lib.render_where_clause(indentation='    ', extra_filter = 'COALESCE(' ~ extract_in_list(parameters.columns) ~ ') IS NOT NULL') }}
    GROUP BY {{ extract_in_list(parameters.columns) -}} {{- (", " ~ lib.render_grouping_column_names()) if (lib.data_groupings is not none and (lib.data_groupings | length()) > 0) or lib.time_series is not none }}
) grouping_table
{{- lib.render_group_by() -}}
{{- lib.render_order_by() -}}
{% import '/dialects/oracle.sql.jinja2' as lib with context -%}

{% macro extract_in_list(values_list, column_prefix = none, column_suffix = none, separate_by_comma = false) %}
    {%- set column_names = table.columns if values_list is none or (values_list | length()) == 0 else values_list -%}
    {%- for item in column_names -%}
        {{ (column_prefix) if column_prefix is not none -}} {{- lib.quote_identifier(item) -}} {{- (column_suffix) if column_suffix is not none -}} {{- ", " if not loop.last }} {{- "', ', " if separate_by_comma and not loop.last }}
    {%- endfor -%}
{% endmacro %}

SELECT
    CASE
        WHEN SUM(duplicated_count) IS NULL THEN 0
        ELSE SUM(CASE WHEN duplicated_count > 1 THEN 1 ELSE 0 END)
        END AS actual_value
    {{- lib.render_data_grouping_projections_reference('grouping_table') }}
    {{- lib.render_time_dimension_projection_reference('grouping_table') }}
    FROM (
        SELECT COUNT(*) AS duplicated_count
        {{- lib.render_data_grouping_projections_reference('analyzed_table_nested', indentation='            ') }}
        {{- lib.render_time_dimension_projection_reference('analyzed_table_nested', indentation='            ') }}
        FROM (
            SELECT
                {{ extract_in_list(parameters.columns) -}}
                {{- lib.render_data_grouping_projections('analyzed_table_nested', indentation='            ') }}
                {{- lib.render_time_dimension_projection('analyzed_table_nested', indentation='            ') }}
            FROM {{ lib.render_target_table() }} analyzed_table_nested
            {{- lib.render_where_clause(table_alias_prefix = 'analyzed_table_nested', indentation='            ', extra_filter = 'COALESCE(' ~ extract_in_list(parameters.columns, column_prefix='CAST(', column_suffix=' AS VARCHAR(4000))') ~ ') IS NOT NULL') }}
    ) analyzed_table
    GROUP BY {{ extract_in_list(parameters.columns) -}} {{- (", " ~ lib.render_grouping_column_names()) if (lib.data_groupings is not none and (lib.data_groupings | length()) > 0) or lib.time_series is not none }}
) grouping_table
{{- lib.render_group_by() -}}
{{- lib.render_order_by() -}}
{% import '/dialects/postgresql.sql.jinja2' as lib with context -%}

{% macro extract_in_list(values_list, column_prefix = none, column_suffix = none, separate_by_comma = false) %}
    {%- set column_names = table.columns if values_list is none or (values_list | length()) == 0 else values_list -%}
    {%- for item in column_names -%}
        {{ (column_prefix) if column_prefix is not none -}} {{- lib.quote_identifier(item) -}} {{- (column_suffix) if column_suffix is not none -}} {{- ", " if not loop.last }} {{- "', ', " if separate_by_comma and not loop.last }}
    {%- endfor -%}
{% endmacro %}

SELECT
    CASE
        WHEN SUM(duplicated_count) IS NULL THEN 0
        ELSE SUM(CASE WHEN duplicated_count > 1 THEN 1 ELSE 0 END)
        END AS actual_value
    {{- lib.render_data_grouping_projections_reference('grouping_table') }}
    {{- lib.render_time_dimension_projection_reference('grouping_table') }}
FROM (
    SELECT COUNT(*) AS duplicated_count
    {{- lib.render_data_grouping_projections('analyzed_table', indentation='        ') }}
    {{- lib.render_time_dimension_projection('analyzed_table', indentation='        ') }}
    FROM {{ lib.render_target_table() }} AS analyzed_table
    {{- lib.render_where_clause(indentation='    ', extra_filter = 'COALESCE(' ~ extract_in_list(parameters.columns, column_suffix='::VARCHAR') ~ ') IS NOT NULL') }}
    GROUP BY {{ extract_in_list(parameters.columns) -}} {{- (", " ~ lib.render_grouping_column_names()) if (lib.data_groupings is not none and (lib.data_groupings | length()) > 0) or lib.time_series is not none }}
) grouping_table
{{- lib.render_group_by() -}}
{{- lib.render_order_by() -}}
{% import '/dialects/presto.sql.jinja2' as lib with context -%}

{% macro extract_in_list(values_list, column_prefix = none, column_suffix = none, separate_by_comma = false) %}
    {%- set column_names = table.columns if values_list is none or (values_list | length()) == 0 else values_list -%}
    {%- for item in column_names -%}
        {{ (column_prefix) if column_prefix is not none -}} {{- lib.quote_identifier(item) -}} {{- (column_suffix) if column_suffix is not none -}} {{- ", " if not loop.last }} {{- "', ', " if separate_by_comma and not loop.last }}
    {%- endfor -%}
{% endmacro %}

SELECT
    CASE
        WHEN SUM(duplicated_count) IS NULL THEN 0
        ELSE SUM(CASE WHEN duplicated_count > 1 THEN 1 ELSE 0 END)
        END AS actual_value
    {{- lib.render_data_grouping_projections_reference('grouping_table') }}
    {{- lib.render_time_dimension_projection_reference('grouping_table') }}
    FROM (
        SELECT COUNT(*) AS duplicated_count
        {{- lib.render_data_grouping_projections_reference('analyzed_table_nested', indentation='        ') }}
        {{- lib.render_time_dimension_projection_reference('analyzed_table_nested', indentation='        ') }}
        FROM (
            SELECT
                {{ extract_in_list(parameters.columns) -}}
                {{- lib.render_data_grouping_projections('analyzed_table_nested', indentation='            ') }}
                {{- lib.render_time_dimension_projection('analyzed_table_nested', indentation='            ') }}
            FROM {{ lib.render_target_table() }} AS analyzed_table_nested
            {{- lib.render_where_clause(table_alias_prefix = 'analyzed_table_nested', indentation='        ', extra_filter = 'COALESCE(' ~ extract_in_list(parameters.columns, column_prefix='CAST(', column_suffix=' AS VARCHAR)') ~ ') IS NOT NULL') }}
    )
    GROUP BY {{ extract_in_list(parameters.columns) -}} {{- (", " ~ lib.render_grouping_column_names()) if (lib.data_groupings is not none and (lib.data_groupings | length()) > 0) or lib.time_series is not none }}
) grouping_table
{{- lib.render_group_by() -}}
{{- lib.render_order_by() -}}
{% import '/dialects/questdb.sql.jinja2' as lib with context -%}

{% macro extract_in_list(values_list, column_prefix = none, column_suffix = none, separate_by_comma = false) %}
    {%- set column_names = table.columns if values_list is none or (values_list | length()) == 0 else values_list -%}
    {%- for item in column_names -%}
        {{ (column_prefix) if column_prefix is not none -}} {{- lib.quote_identifier(item) -}} {{- (column_suffix) if column_suffix is not none -}} {{- ", " if not loop.last }} {{- "', ', " if separate_by_comma and not loop.last }}
    {%- endfor -%}
{% endmacro %}

SELECT
    COALESCE(
        SUM(CASE WHEN duplicated_count > 1 THEN 1 ELSE 0 END)
        , 0) AS actual_value
    {{- lib.render_data_grouping_projections_reference('grouping_table') }}
    {{- lib.render_time_dimension_projection_reference('grouping_table') }}
FROM (
    SELECT COUNT() AS duplicated_count
        {{- lib.render_data_grouping_projections_reference('analyzed_table_nested', indentation='        ') }}
        {{- lib.render_time_dimension_projection_reference('analyzed_table_nested', indentation='        ') }}
    FROM (
         SELECT
             {{ extract_in_list(parameters.columns) -}}
             {{- lib.render_data_grouping_projections('analyzed_table_nested', indentation='            ') }}
             {{- lib.render_time_dimension_projection('analyzed_table_nested', indentation='            ') }}
         FROM {{ lib.render_target_table() }} AS analyzed_table_nested
         {{- lib.render_where_clause(table_alias_prefix = 'analyzed_table_nested', indentation='        ', extra_filter = 'COALESCE(' ~ extract_in_list(parameters.columns, column_prefix='CAST(', column_suffix=' AS VARCHAR)') ~ ') IS NOT NULL') }}
    )
    GROUP BY {{ extract_in_list(parameters.columns) -}} {{- (", " ~ lib.render_grouping_column_names()) if (lib.data_groupings is not none and (lib.data_groupings | length()) > 0) or lib.time_series is not none }}
) grouping_table
{{- lib.render_group_by() -}}
{{- lib.render_order_by() -}}
{% import '/dialects/redshift.sql.jinja2' as lib with context -%}

{% macro extract_in_list(values_list, column_prefix = none, column_suffix = none, separate_by_comma = false) %}
    {%- set column_names = table.columns if values_list is none or (values_list | length()) == 0 else values_list -%}
    {%- for item in column_names -%}
        {{ (column_prefix) if column_prefix is not none -}} {{- lib.quote_identifier(item) -}} {{- (column_suffix) if column_suffix is not none -}} {{- ", " if not loop.last }} {{- "', ', " if separate_by_comma and not loop.last }}
    {%- endfor -%}
{% endmacro %}

SELECT
    CASE
        WHEN SUM(duplicated_count) IS NULL THEN 0
        ELSE SUM(CASE WHEN duplicated_count > 1 THEN 1 ELSE 0 END)
        END AS actual_value
    {{- lib.render_data_grouping_projections_reference('grouping_table') }}
    {{- lib.render_time_dimension_projection_reference('grouping_table') }}
FROM (
    SELECT COUNT(*) AS duplicated_count
    {{- lib.render_data_grouping_projections('analyzed_table', indentation='        ') }}
    {{- lib.render_time_dimension_projection('analyzed_table', indentation='        ') }}
    FROM {{ lib.render_target_table() }} AS analyzed_table
    {{- lib.render_where_clause(indentation='    ', extra_filter = 'COALESCE(' ~ extract_in_list(parameters.columns, column_suffix='::VARCHAR') ~ ') IS NOT NULL') }}
    GROUP BY {{ extract_in_list(parameters.columns) -}} {{- (", " ~ lib.render_grouping_column_names()) if (lib.data_groupings is not none and (lib.data_groupings | length()) > 0) or lib.time_series is not none }}
) grouping_table
{{- lib.render_group_by() -}}
{{- lib.render_order_by() -}}
{% import '/dialects/snowflake.sql.jinja2' as lib with context -%}

{% macro extract_in_list(values_list, column_prefix = none, column_suffix = none, separate_by_comma = false) %}
    {%- set column_names = table.columns if values_list is none or (values_list | length()) == 0 else values_list -%}
    {%- for item in column_names -%}
        {{ (column_prefix) if column_prefix is not none -}} {{- lib.quote_identifier(item) -}} {{- (column_suffix) if column_suffix is not none -}} {{- ", " if not loop.last }} {{- "', ', " if separate_by_comma and not loop.last }}
    {%- endfor -%}
{% endmacro %}

SELECT
    CASE
        WHEN SUM(duplicated_count) IS NULL THEN 0
        ELSE SUM(CASE WHEN duplicated_count > 1 THEN 1 ELSE 0 END)
        END AS actual_value
    {{- lib.render_data_grouping_projections_reference('grouping_table') }}
    {{- lib.render_time_dimension_projection_reference('grouping_table') }}
FROM (
    SELECT COUNT(*) AS duplicated_count
    {{- lib.render_data_grouping_projections('analyzed_table', indentation='        ') }}
    {{- lib.render_time_dimension_projection('analyzed_table', indentation='        ') }}
    FROM {{ lib.render_target_table() }} AS analyzed_table
    {{- lib.render_where_clause(indentation='    ', extra_filter = 'COALESCE(' ~ extract_in_list(parameters.columns, column_prefix='CAST(', column_suffix=' AS STRING)') ~ ') IS NOT NULL') }}
    GROUP BY {{ extract_in_list(parameters.columns) -}} {{- (", " ~ lib.render_grouping_column_names()) if (lib.data_groupings is not none and (lib.data_groupings | length()) > 0) or lib.time_series is not none }}
) grouping_table
{{- lib.render_group_by() -}}
{{- lib.render_order_by() -}}
{% import '/dialects/spark.sql.jinja2' as lib with context -%}

{% macro extract_in_list(values_list, column_prefix = none, column_suffix = none, separate_by_comma = false) %}
    {%- set column_names = table.columns if values_list is none or (values_list | length()) == 0 else values_list -%}
    {%- for item in column_names -%}
        {{ (column_prefix) if column_prefix is not none -}} {{- lib.quote_identifier(item) -}} {{- (column_suffix) if column_suffix is not none -}} {{- ", " if not loop.last }} {{- "', ', " if separate_by_comma and not loop.last }}
    {%- endfor -%}
{% endmacro %}

SELECT
    CASE
        WHEN SUM(duplicated_count) IS NULL THEN 0
        ELSE SUM(CASE WHEN duplicated_count > 1 THEN 1 ELSE 0 END)
        END AS actual_value
    {{- lib.render_data_grouping_projections_reference('grouping_table') }}
    {{- lib.render_time_dimension_projection_reference('grouping_table') }}
FROM (
    SELECT COUNT(*) AS duplicated_count
    {{- lib.render_data_grouping_projections('analyzed_table', indentation='        ') }}
    {{- lib.render_time_dimension_projection('analyzed_table', indentation='        ') }}
    FROM {{ lib.render_target_table() }} AS analyzed_table
    {{- lib.render_where_clause(indentation='    ', extra_filter = 'COALESCE(' ~ extract_in_list(parameters.columns, column_prefix='CAST(', column_suffix=' AS STRING)') ~ ') IS NOT NULL') }}
    GROUP BY {{ extract_in_list(parameters.columns) -}} {{- (", " ~ lib.render_grouping_column_names()) if (lib.data_groupings is not none and (lib.data_groupings | length()) > 0) or lib.time_series is not none }}
) grouping_table
{{- lib.render_group_by() -}}
{{- lib.render_order_by() -}}
{% import '/dialects/sqlserver.sql.jinja2' as lib with context -%}

{%- macro extract_in_list(values_list, column_prefix = none, column_suffix = none, separate_by_comma = false) %}
    {%- set column_names = table.columns if values_list is none or (values_list | length()) == 0 else values_list -%}
    {%- for item in column_names -%}
        {{ (column_prefix) if column_prefix is not none -}} {{- lib.quote_identifier(item) -}} {{- (column_suffix) if column_suffix is not none -}} {{- ", " if not loop.last }} {{- "', ', " if separate_by_comma and not loop.last }}
    {%- endfor -%}
{% endmacro -%}

{% macro render_group_by(table_alias_prefix = 'grouping_table', indentation = '    ') %}
    {%- if (lib.data_groupings is not none and (lib.data_groupings | length()) > 0) or lib.time_series is not none -%}
    GROUP BY
    {%- endif -%}
    {%- if lib.data_groupings is not none and (lib.data_groupings | length()) > 0 -%}
        {%- for attribute in lib.data_groupings -%}
            {{- ',' if not loop.first -}}{{- lib.eol() }}
            {{ indentation }}{{ table_alias_prefix }}.grouping_{{ attribute -}}
        {%- endfor -%}
    {%- endif -%}
    {%- if lib.time_series is not none -%}
        {{ ',' if lib.data_groupings is not none and (lib.data_groupings | length()) > 0 -}}{{- lib.eol() -}}
        {{ indentation }}time_period,{{ lib.eol() -}}
        {{ indentation }}time_period_utc
    {%- endif -%}
{% endmacro %}

SELECT
    CASE
        WHEN SUM(duplicated_count) IS NULL THEN 0
        ELSE SUM(CASE WHEN duplicated_count > 1 THEN 1 ELSE 0 END)
        END AS actual_value
    {{- lib.render_data_grouping_projections_reference('grouping_table') }}
    {{- lib.render_time_dimension_projection_reference('grouping_table') }}
FROM (
    SELECT COUNT(*) AS duplicated_count
    {{- lib.render_data_grouping_projections('analyzed_table', indentation='        ') }}
    {{- lib.render_time_dimension_projection('analyzed_table', indentation='        ') }}
    FROM {{ lib.render_target_table() }} AS analyzed_table
    {{- lib.render_where_clause(indentation='    ', extra_filter = 'COALESCE(' ~ extract_in_list(parameters.columns, column_prefix='CAST(', column_suffix=' AS VARCHAR)') ~ ') IS NOT NULL') }}
    GROUP BY {{ extract_in_list(parameters.columns) -}} {{- (", " ~ lib.render_grouping_column_names()) if (lib.data_groupings is not none and (lib.data_groupings | length()) > 0) or lib.time_series is not none }}
) grouping_table
{{ render_group_by('grouping_table') }}
{% import '/dialects/teradata.sql.jinja2' as lib with context -%}

{% macro extract_in_list(values_list, column_prefix = none, column_suffix = none, separate_by_comma = false) %}
    {%- set column_names = table.columns if values_list is none or (values_list | length()) == 0 else values_list -%}
    {%- for item in column_names -%}
        {{ (column_prefix) if column_prefix is not none -}} {{- lib.quote_identifier(item) -}} {{- (column_suffix) if column_suffix is not none -}} {{- ", " if not loop.last }} {{- "', ', " if separate_by_comma and not loop.last }}
    {%- endfor -%}
{% endmacro %}

SELECT
    CASE
        WHEN SUM(duplicated_count) IS NULL THEN 0
        ELSE SUM(CASE WHEN duplicated_count > 1 THEN 1 ELSE 0 END)
        END AS actual_value
    {{- lib.render_data_grouping_projections_reference('grouping_table') }}
    {{- lib.render_time_dimension_projection_reference('grouping_table') }}
FROM (
    SELECT COUNT(*) AS duplicated_count
    {{- lib.render_data_grouping_projections('analyzed_table', indentation='        ') }}
    {{- lib.render_time_dimension_projection('analyzed_table', indentation='        ') }}
    FROM {{ lib.render_target_table() }} AS analyzed_table
    {{- lib.render_where_clause(indentation='    ', extra_filter = 'COALESCE(' ~ extract_in_list(parameters.columns, column_prefix='CAST(', column_suffix=' AS VARCHAR(4096))') ~ ') IS NOT NULL') }}
    GROUP BY {{ extract_in_list(parameters.columns) -}} {{- (", " ~ lib.render_grouping_column_names()) if (lib.data_groupings is not none and (lib.data_groupings | length()) > 0) or lib.time_series is not none }}
) grouping_table
{{- lib.render_group_by() -}}
{{- lib.render_order_by() -}}
{% import '/dialects/trino.sql.jinja2' as lib with context -%}

{% macro extract_in_list(values_list, column_prefix = none, column_suffix = none, separate_by_comma = false) %}
    {%- set column_names = table.columns if values_list is none or (values_list | length()) == 0 else values_list -%}
    {%- for item in column_names -%}
        {{ (column_prefix) if column_prefix is not none -}} {{- lib.quote_identifier(item) -}} {{- (column_suffix) if column_suffix is not none -}} {{- ", " if not loop.last }} {{- "', ', " if separate_by_comma and not loop.last }}
    {%- endfor -%}
{% endmacro %}

SELECT
    CASE
        WHEN SUM(duplicated_count) IS NULL THEN 0
        ELSE SUM(CASE WHEN duplicated_count > 1 THEN 1 ELSE 0 END)
        END AS actual_value
    {{- lib.render_data_grouping_projections_reference('grouping_table') }}
    {{- lib.render_time_dimension_projection_reference('grouping_table') }}
    FROM (
        SELECT COUNT(*) AS duplicated_count
        {{- lib.render_data_grouping_projections_reference('analyzed_table_nested', indentation='        ') }}
        {{- lib.render_time_dimension_projection_reference('analyzed_table_nested', indentation='        ') }}
        FROM (
            SELECT
                {{ extract_in_list(parameters.columns) -}}
                {{- lib.render_data_grouping_projections('analyzed_table_nested', indentation='            ') }}
                {{- lib.render_time_dimension_projection('analyzed_table_nested', indentation='            ') }}
            FROM {{ lib.render_target_table() }} AS analyzed_table_nested
            {{- lib.render_where_clause(table_alias_prefix = 'analyzed_table_nested', indentation='        ', extra_filter = 'COALESCE(' ~ extract_in_list(parameters.columns, column_prefix='CAST(', column_suffix=' AS VARCHAR)') ~ ') IS NOT NULL') }}
    )
    GROUP BY {{ extract_in_list(parameters.columns) -}} {{- (", " ~ lib.render_grouping_column_names()) if (lib.data_groupings is not none and (lib.data_groupings | length()) > 0) or lib.time_series is not none }}
) grouping_table
{{- lib.render_group_by() -}}
{{- lib.render_order_by() -}}

duplicate record percent

Table sensor that executes a duplicate record percent query.

Sensor summary

The duplicate record percent sensor is documented below.

Target Category Full sensor name Source code on GitHub
table uniqueness table/uniqueness/duplicate_record_percent sensors/table/uniqueness

Sensor parameters

Field name Description Allowed data type Required Allowed values
columns A list of columns used for uniqueness record duplicate verification. string_list

Jinja2 SQL templates

The templates used to generate the SQL query for each data source supported by DQOps is shown below.

{% import '/dialects/bigquery.sql.jinja2' as lib with context -%}

{%- macro extract_in_list(values_list, column_prefix = none, column_suffix = none, separate_by_comma = false) %}
    {%- set column_names = table.columns if values_list is none or (values_list | length()) == 0 else values_list -%}
    {%- for item in column_names -%}
        {{ (column_prefix) if column_prefix is not none -}} {{- lib.quote_identifier(item) -}} {{- (column_suffix) if column_suffix is not none -}} {{- ", " if not loop.last }} {{- "', ', " if separate_by_comma and not loop.last }}
    {%- endfor -%}
{% endmacro -%}

SELECT
    CASE WHEN SUM(distinct_records) IS NULL THEN 0
        ELSE (1 - SUM(distinct_records) / SUM(records_number)) * 100.0 END
        AS actual_value
    {{- lib.render_data_grouping_projections_reference('grouping_table') }}
    {{- lib.render_time_dimension_projection_reference('grouping_table') }}
FROM (
    SELECT COUNT(*) AS records_number,
        COUNT(*) OVER (PARTITION BY {{ extract_in_list(parameters.columns) -}} ) AS distinct_records
        {{- lib.render_data_grouping_projections('analyzed_table', indentation='        ') }}
        {{- lib.render_time_dimension_projection('analyzed_table', indentation='        ') }}
    FROM {{ lib.render_target_table() }} AS analyzed_table
    {{- lib.render_where_clause(indentation='    ', extra_filter = 'COALESCE(' ~ extract_in_list(parameters.columns, column_prefix='CAST(', column_suffix=' AS STRING)') ~ ') IS NOT NULL') }}
    GROUP BY {{ extract_in_list(parameters.columns) -}} {{- (", " ~ lib.render_grouping_column_names()) if (lib.data_groupings is not none and (lib.data_groupings | length()) > 0) or lib.time_series is not none }}
) grouping_table
{{- lib.render_group_by() -}}
{{- lib.render_order_by() -}}
{% import '/dialects/clickhouse.sql.jinja2' as lib with context -%}

{%- macro extract_in_list(values_list, column_prefix = none, column_suffix = none, separate_by_comma = false) %}
    {%- set column_names = table.columns if values_list is none or (values_list | length()) == 0 else values_list -%}
    {%- for item in column_names -%}
        {{ (column_prefix) if column_prefix is not none -}} {{- lib.quote_identifier(item) -}} {{- (column_suffix) if column_suffix is not none -}} {{- ", " if not loop.last }} {{- "', ', " if separate_by_comma and not loop.last }}
    {%- endfor -%}
{% endmacro -%}

SELECT
    CASE WHEN sumOrNull(distinct_records) IS NULL THEN 0
        ELSE (1 - SUM(distinct_records) / SUM(records_number)) * 100.0 END
        AS actual_value
    {{- lib.render_data_grouping_projections_reference('grouping_table') }}
    {{- lib.render_time_dimension_projection_reference('grouping_table') }}
FROM (
    SELECT COUNT(*) AS records_number,
        COUNT(*) OVER (PARTITION BY {{ extract_in_list(parameters.columns) -}} ) AS distinct_records
        {{- lib.render_data_grouping_projections('analyzed_table', indentation='        ') }}
        {{- lib.render_time_dimension_projection('analyzed_table', indentation='        ') }}
    FROM {{ lib.render_target_table() }} AS analyzed_table
    {{- lib.render_where_clause(indentation='    ', extra_filter = 'COALESCE(' ~ extract_in_list(parameters.columns, column_prefix='toString(', column_suffix=')') ~ ') IS NOT NULL') }}
    GROUP BY {{ extract_in_list(parameters.columns) -}} {{- (", " ~ lib.render_grouping_column_names()) if (lib.data_groupings is not none and (lib.data_groupings | length()) > 0) or lib.time_series is not none }}
) grouping_table
{{- lib.render_group_by() -}}
{{- lib.render_order_by() -}}
{% import '/dialects/databricks.sql.jinja2' as lib with context -%}

{%- macro extract_in_list(values_list, column_prefix = none, column_suffix = none, separate_by_comma = false) %}
    {%- set column_names = table.columns if values_list is none or (values_list | length()) == 0 else values_list -%}
    {%- for item in column_names -%}
        {{ (column_prefix) if column_prefix is not none -}} {{- lib.quote_identifier(item) -}} {{- (column_suffix) if column_suffix is not none -}} {{- ", " if not loop.last }} {{- "', ', " if separate_by_comma and not loop.last }}
    {%- endfor -%}
{% endmacro -%}

SELECT
    CASE WHEN SUM(distinct_records) IS NULL THEN 0
        ELSE (1 - SUM(distinct_records) / SUM(records_number)) * 100.0 END
        AS actual_value
    {{- lib.render_data_grouping_projections_reference('grouping_table') }}
    {{- lib.render_time_dimension_projection_reference('grouping_table') }}
FROM (
    SELECT COUNT(*) AS records_number,
        COUNT(*) OVER (PARTITION BY {{ extract_in_list(parameters.columns) -}} ) AS distinct_records
        {{- lib.render_data_grouping_projections('analyzed_table', indentation='        ') }}
        {{- lib.render_time_dimension_projection('analyzed_table', indentation='        ') }}
    FROM {{ lib.render_target_table() }} AS analyzed_table
    {{- lib.render_where_clause(indentation='    ', extra_filter = 'COALESCE(' ~ extract_in_list(parameters.columns, column_prefix='CAST(', column_suffix=' AS STRING)') ~ ') IS NOT NULL') }}
    GROUP BY {{ extract_in_list(parameters.columns) -}} {{- (", " ~ lib.render_grouping_column_names()) if (lib.data_groupings is not none and (lib.data_groupings | length()) > 0) or lib.time_series is not none }}
) grouping_table
{{- lib.render_group_by() -}}
{{- lib.render_order_by() -}}
{% import '/dialects/db2.sql.jinja2' as lib with context -%}

{% macro extract_in_list(values_list, column_prefix = none, column_suffix = none, separate_by_comma = false) %}
    {%- set column_names = table.columns if values_list is none or (values_list | length()) == 0 else values_list -%}
    {%- for item in column_names -%}
        {{ (column_prefix) if column_prefix is not none -}} {{- lib.quote_identifier(item) -}} {{- (column_suffix) if column_suffix is not none -}} {{- ", " if not loop.last }} {{- "', ', " if separate_by_comma and not loop.last }}
    {%- endfor -%}
{% endmacro %}

SELECT
    CASE WHEN SUM(distinct_records) IS NULL THEN 0
        ELSE (1 - SUM(distinct_records) * 1.0 / SUM(records_number)) * 100.0 END
        AS actual_value
    {{- lib.render_data_grouping_projections_reference('grouping_table') }}
    {{- lib.render_time_dimension_projection_reference('grouping_table') }}
    FROM (
        SELECT COUNT(*) AS records_number,
            COUNT(*) OVER (PARTITION BY {{ extract_in_list(parameters.columns) -}} ) AS distinct_records
            {{- lib.render_data_grouping_projections_reference('analyzed_table_nested', indentation='            ') }}
            {{- lib.render_time_dimension_projection_reference('analyzed_table_nested', indentation='            ') }}
        FROM (
            SELECT
                {{ extract_in_list(parameters.columns) -}}
                {{- lib.render_data_grouping_projections('analyzed_table_nested', indentation='            ') }}
                {{- lib.render_time_dimension_projection('analyzed_table_nested', indentation='            ') }}
            FROM {{ lib.render_target_table() }} analyzed_table_nested
            {{- lib.render_where_clause(table_alias_prefix = 'analyzed_table_nested', indentation='            ', extra_filter = 'COALESCE(' ~ extract_in_list(parameters.columns, column_prefix='CAST(', column_suffix=' AS VARCHAR(4000))') ~ ') IS NOT NULL') }}
    ) analyzed_table
    GROUP BY {{ extract_in_list(parameters.columns) -}} {{- (", " ~ lib.render_grouping_column_names()) if (lib.data_groupings is not none and (lib.data_groupings | length()) > 0) or lib.time_series is not none }}
) grouping_table
{{- lib.render_group_by() -}}
{{- lib.render_order_by() -}}
{% import '/dialects/duckdb.sql.jinja2' as lib with context -%}

{%- macro extract_in_list(values_list, column_prefix = none, column_suffix = none, separate_by_comma = false) %}
    {%- set column_names = table.columns if values_list is none or (values_list | length()) == 0 else values_list -%}
    {%- for item in column_names -%}
        {{ (column_prefix) if column_prefix is not none -}} {{- lib.quote_identifier(item) -}} {{- (column_suffix) if column_suffix is not none -}} {{- ", " if not loop.last }} {{- "', ', " if separate_by_comma and not loop.last }}
    {%- endfor -%}
{% endmacro -%}

SELECT
    CASE WHEN SUM(distinct_records) IS NULL THEN 0
        ELSE (1 - SUM(distinct_records) / SUM(records_number)) * 100.0 END
        AS actual_value
    {{- lib.render_data_grouping_projections_reference('grouping_table') }}
    {{- lib.render_time_dimension_projection_reference('grouping_table') }}
FROM (
    SELECT COUNT(*) AS records_number,
        COUNT(*) OVER (PARTITION BY {{ extract_in_list(parameters.columns) -}} ) AS distinct_records
        {{- lib.render_data_grouping_projections('analyzed_table', indentation='        ') }}
        {{- lib.render_time_dimension_projection('analyzed_table', indentation='        ') }}
    FROM {{ lib.render_target_table() }} AS analyzed_table
    {{- lib.render_where_clause(indentation='    ', extra_filter = 'COALESCE(' ~ extract_in_list(parameters.columns, column_prefix='CAST( ', column_suffix=' AS VARCHAR)') ~ ') IS NOT NULL') }}
    GROUP BY {{ extract_in_list(parameters.columns) -}} {{- (", " ~ lib.render_grouping_column_names()) if (lib.data_groupings is not none and (lib.data_groupings | length()) > 0) or lib.time_series is not none }}
) grouping_table
{{- lib.render_group_by() -}}
{{- lib.render_order_by() -}}
{% import '/dialects/hana.sql.jinja2' as lib with context -%}

{% macro extract_in_list(values_list, column_prefix = none, column_suffix = none, separate_by_comma = false) %}
    {%- set column_names = table.columns if values_list is none or (values_list | length()) == 0 else values_list -%}
    {%- for item in column_names -%}
        {{ (column_prefix) if column_prefix is not none -}} {{- lib.quote_identifier(item) -}} {{- (column_suffix) if column_suffix is not none -}} {{- ", " if not loop.last }} {{- "', ', " if separate_by_comma and not loop.last }}
    {%- endfor -%}
{% endmacro %}

SELECT
    CASE WHEN SUM(distinct_records) IS NULL THEN 0
        ELSE (1 - SUM(distinct_records) / SUM(records_number)) * 100.0 END
        AS actual_value
    {{- lib.render_data_grouping_projections_reference('grouping_table') }}
    {{- lib.render_time_dimension_projection_reference('grouping_table') }}
FROM (
    SELECT COUNT(*) AS records_number,
        COUNT(*) OVER (PARTITION BY {{ extract_in_list(parameters.columns) -}} ) AS distinct_records
    {{- lib.render_data_grouping_projections_reference('analyzed_table_nested', indentation='        ') }}
    {{- lib.render_time_dimension_projection_reference('analyzed_table_nested', indentation='        ') }}
    FROM (
        SELECT
            {{ extract_in_list(parameters.columns) -}}
            {{- lib.render_data_grouping_projections('analyzed_table_nested', indentation='            ') }}
            {{- lib.render_time_dimension_projection('analyzed_table_nested', indentation='            ') }}
        FROM {{ lib.render_target_table() }} AS analyzed_table_nested
        {{- lib.render_where_clause(table_alias_prefix = 'analyzed_table_nested', indentation='        ', extra_filter = 'COALESCE(' ~ extract_in_list(parameters.columns, column_prefix='CAST(', column_suffix=' AS VARCHAR)') ~ ') IS NOT NULL') }}
    )
    GROUP BY {{ extract_in_list(parameters.columns) -}} {{- (", " ~ lib.render_grouping_column_names()) if (lib.data_groupings is not none and (lib.data_groupings | length()) > 0) or lib.time_series is not none }}
) grouping_table
{{- lib.render_group_by() -}}
{{- lib.render_order_by() -}}
{% import '/dialects/mariadb.sql.jinja2' as lib with context -%}

{% macro extract_in_list(values_list, column_prefix = none, column_suffix = none, separate_by_comma = false) %}
    {%- set column_names = table.columns if values_list is none or (values_list | length()) == 0 else values_list -%}
    {%- for item in column_names -%}
        {{ (column_prefix) if column_prefix is not none -}} {{- lib.quote_identifier(item) -}} {{- (column_suffix) if column_suffix is not none -}} {{- ", " if not loop.last }} {{- "', ', " if separate_by_comma and not loop.last }}
    {%- endfor -%}
{% endmacro %}

SELECT
    CASE WHEN SUM(distinct_records) IS NULL THEN 0
        ELSE (1 - SUM(distinct_records) / SUM(records_number)) * 100.0 END
        AS actual_value
    {{- lib.render_data_grouping_projections_reference('grouping_table') }}
    {{- lib.render_time_dimension_projection_reference('grouping_table') }}
FROM (
    SELECT COUNT(*) AS records_number,
        COUNT(*) OVER (PARTITION BY {{ extract_in_list(parameters.columns) -}} ) AS distinct_records
        {{- lib.render_data_grouping_projections('analyzed_table', indentation='        ') }}
        {{- lib.render_time_dimension_projection('analyzed_table', indentation='        ') }}
    FROM {{ lib.render_target_table() }} AS analyzed_table
    {{- lib.render_where_clause(indentation='    ', extra_filter = 'COALESCE(' ~ extract_in_list(parameters.columns) ~ ') IS NOT NULL') }}
    GROUP BY {{ extract_in_list(parameters.columns) -}} {{- (", " ~ lib.render_grouping_column_names()) if (lib.data_groupings is not none and (lib.data_groupings | length()) > 0) or lib.time_series is not none }}
) grouping_table
{{- lib.render_group_by() -}}
{{- lib.render_order_by() -}}
{% import '/dialects/mysql.sql.jinja2' as lib with context -%}

{% macro extract_in_list(values_list, column_prefix = none, column_suffix = none, separate_by_comma = false) %}
    {%- set column_names = table.columns if values_list is none or (values_list | length()) == 0 else values_list -%}
    {%- for item in column_names -%}
        {{ (column_prefix) if column_prefix is not none -}} {{- lib.quote_identifier(item) -}} {{- (column_suffix) if column_suffix is not none -}} {{- ", " if not loop.last }} {{- "', ', " if separate_by_comma and not loop.last }}
    {%- endfor -%}
{% endmacro %}

SELECT
    CASE WHEN SUM(distinct_records) IS NULL THEN 0
        ELSE (1 - SUM(distinct_records) / SUM(records_number)) * 100.0 END
        AS actual_value
    {{- lib.render_data_grouping_projections_reference('grouping_table') }}
    {{- lib.render_time_dimension_projection_reference('grouping_table') }}
FROM (
    SELECT COUNT(*) AS records_number,
        COUNT(*) OVER (PARTITION BY {{ extract_in_list(parameters.columns) -}} ) AS distinct_records
        {{- lib.render_data_grouping_projections('analyzed_table', indentation='        ') }}
        {{- lib.render_time_dimension_projection('analyzed_table', indentation='        ') }}
    FROM {{ lib.render_target_table() }} AS analyzed_table
    {{- lib.render_where_clause(indentation='    ', extra_filter = 'COALESCE(' ~ extract_in_list(parameters.columns) ~ ') IS NOT NULL') }}
    GROUP BY {{ extract_in_list(parameters.columns) -}} {{- (", " ~ lib.render_grouping_column_names()) if (lib.data_groupings is not none and (lib.data_groupings | length()) > 0) or lib.time_series is not none }}
) grouping_table
{{- lib.render_group_by() -}}
{{- lib.render_order_by() -}}
{% import '/dialects/oracle.sql.jinja2' as lib with context -%}

{% macro extract_in_list(values_list, column_prefix = none, column_suffix = none, separate_by_comma = false) %}
    {%- set column_names = table.columns if values_list is none or (values_list | length()) == 0 else values_list -%}
    {%- for item in column_names -%}
        {{ (column_prefix) if column_prefix is not none -}} {{- lib.quote_identifier(item) -}} {{- (column_suffix) if column_suffix is not none -}} {{- ", " if not loop.last }} {{- "', ', " if separate_by_comma and not loop.last }}
    {%- endfor -%}
{% endmacro %}

SELECT
    CASE WHEN SUM(distinct_records) IS NULL THEN 0
        ELSE (1 - SUM(distinct_records) / SUM(records_number)) * 100.0 END
        AS actual_value
    {{- lib.render_data_grouping_projections_reference('grouping_table') }}
    {{- lib.render_time_dimension_projection_reference('grouping_table') }}
    FROM (
        SELECT COUNT(*) AS records_number,
            COUNT(*) OVER (PARTITION BY {{ extract_in_list(parameters.columns) -}} ) AS distinct_records
            {{- lib.render_data_grouping_projections_reference('analyzed_table_nested', indentation='            ') }}
            {{- lib.render_time_dimension_projection_reference('analyzed_table_nested', indentation='            ') }}
        FROM (
            SELECT
                {{ extract_in_list(parameters.columns) -}}
                {{- lib.render_data_grouping_projections('analyzed_table_nested', indentation='            ') }}
                {{- lib.render_time_dimension_projection('analyzed_table_nested', indentation='            ') }}
            FROM {{ lib.render_target_table() }} analyzed_table_nested
            {{- lib.render_where_clause(table_alias_prefix = 'analyzed_table_nested', indentation='            ', extra_filter = 'COALESCE(' ~ extract_in_list(parameters.columns, column_prefix='CAST(', column_suffix=' AS VARCHAR(4000))') ~ ') IS NOT NULL') }}
    ) analyzed_table
    GROUP BY {{ extract_in_list(parameters.columns) -}} {{- (", " ~ lib.render_grouping_column_names()) if (lib.data_groupings is not none and (lib.data_groupings | length()) > 0) or lib.time_series is not none }}
) grouping_table
{{- lib.render_group_by() -}}
{{- lib.render_order_by() -}}
{% import '/dialects/postgresql.sql.jinja2' as lib with context -%}

{% macro extract_in_list(values_list, column_prefix = none, column_suffix = none, separate_by_comma = false) %}
    {%- set column_names = table.columns if values_list is none or (values_list | length()) == 0 else values_list -%}
    {%- for item in column_names -%}
        {{ (column_prefix) if column_prefix is not none -}} {{- lib.quote_identifier(item) -}} {{- (column_suffix) if column_suffix is not none -}} {{- ", " if not loop.last }} {{- "', ', " if separate_by_comma and not loop.last }}
    {%- endfor -%}
{% endmacro %}

SELECT
    CASE WHEN SUM(distinct_records) IS NULL THEN 0
        ELSE (1 - SUM(distinct_records) / SUM(records_number)) * 100.0 END
        AS actual_value
    {{- lib.render_data_grouping_projections_reference('grouping_table') }}
    {{- lib.render_time_dimension_projection_reference('grouping_table') }}
FROM (
    SELECT COUNT(*) AS records_number,
        COUNT(*) OVER (PARTITION BY {{ extract_in_list(parameters.columns) -}} ) AS distinct_records
        {{- lib.render_data_grouping_projections('analyzed_table', indentation='        ') }}
        {{- lib.render_time_dimension_projection('analyzed_table', indentation='        ') }}
    FROM {{ lib.render_target_table() }} AS analyzed_table
    {{- lib.render_where_clause(indentation='    ', extra_filter = 'COALESCE(' ~ extract_in_list(parameters.columns, column_suffix='::VARCHAR') ~ ') IS NOT NULL') }}
    GROUP BY {{ extract_in_list(parameters.columns) -}} {{- (", " ~ lib.render_grouping_column_names()) if (lib.data_groupings is not none and (lib.data_groupings | length()) > 0) or lib.time_series is not none }}
) grouping_table
{{- lib.render_group_by() -}}
{{- lib.render_order_by() -}}
{% import '/dialects/presto.sql.jinja2' as lib with context -%}

{% macro extract_in_list(values_list, column_prefix = none, column_suffix = none, separate_by_comma = false) %}
    {%- set column_names = table.columns if values_list is none or (values_list | length()) == 0 else values_list -%}
    {%- for item in column_names -%}
        {{ (column_prefix) if column_prefix is not none -}} {{- lib.quote_identifier(item) -}} {{- (column_suffix) if column_suffix is not none -}} {{- ", " if not loop.last }} {{- "', ', " if separate_by_comma and not loop.last }}
    {%- endfor -%}
{% endmacro %}

SELECT
    CASE WHEN SUM(distinct_records) IS NULL THEN 0
        ELSE (1 - SUM(distinct_records) / CAST(SUM(records_number) AS DOUBLE)) * 100.0 END
        AS actual_value
    {{- lib.render_data_grouping_projections_reference('grouping_table') }}
    {{- lib.render_time_dimension_projection_reference('grouping_table') }}
FROM (
    SELECT COUNT(*) AS records_number,
        COUNT(*) OVER (PARTITION BY {{ extract_in_list(parameters.columns) -}} ) AS distinct_records
        {{- lib.render_data_grouping_projections_reference('analyzed_table_nested', indentation='        ') }}
        {{- lib.render_time_dimension_projection_reference('analyzed_table_nested', indentation='        ') }}
    FROM (
        SELECT
            {{ extract_in_list(parameters.columns) -}}
            {{- lib.render_data_grouping_projections('analyzed_table_nested', indentation='            ') }}
            {{- lib.render_time_dimension_projection('analyzed_table_nested', indentation='            ') }}
        FROM {{ lib.render_target_table() }} AS analyzed_table_nested
        {{- lib.render_where_clause(table_alias_prefix = 'analyzed_table_nested', indentation='        ', extra_filter = 'COALESCE(' ~ extract_in_list(parameters.columns, column_prefix='CAST(', column_suffix=' AS VARCHAR)') ~ ') IS NOT NULL') }}
    )
    GROUP BY {{ extract_in_list(parameters.columns) -}} {{- (", " ~ lib.render_grouping_column_names()) if (lib.data_groupings is not none and (lib.data_groupings | length()) > 0) or lib.time_series is not none }}
) grouping_table
{{- lib.render_group_by() -}}
{{- lib.render_order_by() -}}
{% import '/dialects/questdb.sql.jinja2' as lib with context -%}

{% macro extract_in_list(values_list, column_prefix = none, column_suffix = none, separate_by_comma = false) %}
    {%- set column_names = table.columns if values_list is none or (values_list | length()) == 0 else values_list -%}
    {%- for item in column_names -%}
        {{ (column_prefix) if column_prefix is not none -}} {{- lib.quote_identifier(item) -}} {{- (column_suffix) if column_suffix is not none -}}
        {%- if not loop.last -%}
            {{- ", " if separate_by_comma else " || " -}}
        {%- endif -%}
    {%- endfor -%}
{% endmacro %}

SELECT
    COALESCE(
        (1 - SUM(distinct_records) * 1.0 / SUM(records_number)) * 100.0
        , 0) AS actual_value
    {{- lib.render_data_grouping_projections_reference('grouping_table') }}
    {{- lib.render_time_dimension_projection_reference('grouping_table') }}
FROM (
    SELECT COUNT() AS records_number,
        COUNT_DISTINCT({{ extract_in_list(parameters.columns) -}}) AS distinct_records
        {{- lib.render_data_grouping_projections('analyzed_table', indentation='        ') }}
        {{- lib.render_time_dimension_projection('analyzed_table', indentation='        ') }}
    FROM {{ lib.render_target_table() }} AS analyzed_table
    {{- lib.render_where_clause(indentation='    ', extra_filter = 'COALESCE(' ~ extract_in_list(parameters.columns, column_suffix='::VARCHAR', separate_by_comma=true) ~ ') IS NOT NULL') }}
    GROUP BY {{ extract_in_list(parameters.columns, separate_by_comma=true) -}} {{- (", " ~ lib.render_grouping_column_names()) if (lib.data_groupings is not none and (lib.data_groupings | length()) > 0) or lib.time_series is not none }}
) grouping_table
{{- lib.render_group_by() -}}
{{- lib.render_order_by() -}}
{% import '/dialects/redshift.sql.jinja2' as lib with context -%}

{% macro extract_in_list(values_list, column_prefix = none, column_suffix = none, separate_by_comma = false) %}
    {%- set column_names = table.columns if values_list is none or (values_list | length()) == 0 else values_list -%}
    {%- for item in column_names -%}
        {{ (column_prefix) if column_prefix is not none -}} {{- lib.quote_identifier(item) -}} {{- (column_suffix) if column_suffix is not none -}} {{- ", " if not loop.last }} {{- "', ', " if separate_by_comma and not loop.last }}
    {%- endfor -%}
{% endmacro %}

SELECT
    CASE WHEN SUM(distinct_records) IS NULL THEN 0
        ELSE (1 - SUM(distinct_records) / SUM(records_number)) * 100.0 END
        AS actual_value
    {{- lib.render_data_grouping_projections_reference('grouping_table') }}
    {{- lib.render_time_dimension_projection_reference('grouping_table') }}
FROM (
    SELECT COUNT(*) AS records_number,
        COUNT(*) OVER (PARTITION BY {{ extract_in_list(parameters.columns) -}} ) AS distinct_records
        {{- lib.render_data_grouping_projections('analyzed_table', indentation='        ') }}
        {{- lib.render_time_dimension_projection('analyzed_table', indentation='        ') }}
    FROM {{ lib.render_target_table() }} AS analyzed_table
    {{- lib.render_where_clause(indentation='    ', extra_filter = 'COALESCE(' ~ extract_in_list(parameters.columns, column_suffix='::VARCHAR') ~ ') IS NOT NULL') }}
    GROUP BY {{ extract_in_list(parameters.columns) -}} {{- (", " ~ lib.render_grouping_column_names()) if (lib.data_groupings is not none and (lib.data_groupings | length()) > 0) or lib.time_series is not none }}
) grouping_table
{{- lib.render_group_by() -}}
{{- lib.render_order_by() -}}
{% import '/dialects/snowflake.sql.jinja2' as lib with context -%}

{% macro extract_in_list(values_list, column_prefix = none, column_suffix = none, separate_by_comma = false) %}
    {%- set column_names = table.columns if values_list is none or (values_list | length()) == 0 else values_list -%}
    {%- for item in column_names -%}
        {{ (column_prefix) if column_prefix is not none -}} {{- lib.quote_identifier(item) -}} {{- (column_suffix) if column_suffix is not none -}} {{- ", " if not loop.last }} {{- "', ', " if separate_by_comma and not loop.last }}
    {%- endfor -%}
{% endmacro %}

SELECT
    CASE WHEN SUM(distinct_records) IS NULL THEN 0
        ELSE (1 - SUM(distinct_records) / SUM(records_number)) * 100.0 END
        AS actual_value
    {{- lib.render_data_grouping_projections_reference('grouping_table') }}
    {{- lib.render_time_dimension_projection_reference('grouping_table') }}
FROM (
    SELECT COUNT(*) AS records_number,
        COUNT(*) OVER (PARTITION BY {{ extract_in_list(parameters.columns) -}} ) AS distinct_records
        {{- lib.render_data_grouping_projections('analyzed_table', indentation='        ') }}
        {{- lib.render_time_dimension_projection('analyzed_table', indentation='        ') }}
    FROM {{ lib.render_target_table() }} AS analyzed_table
    {{- lib.render_where_clause(indentation='    ', extra_filter = 'COALESCE(' ~ extract_in_list(parameters.columns, column_prefix='CAST(', column_suffix=' AS STRING)') ~ ') IS NOT NULL') }}
    GROUP BY {{ extract_in_list(parameters.columns) -}} {{- (", " ~ lib.render_grouping_column_names()) if (lib.data_groupings is not none and (lib.data_groupings | length()) > 0) or lib.time_series is not none }}
) grouping_table
{{- lib.render_group_by() -}}
{{- lib.render_order_by() -}}
{% import '/dialects/spark.sql.jinja2' as lib with context -%}

{% macro extract_in_list(values_list, column_prefix = none, column_suffix = none, separate_by_comma = false) %}
    {%- set column_names = table.columns if values_list is none or (values_list | length()) == 0 else values_list -%}
    {%- for item in column_names -%}
        {{ (column_prefix) if column_prefix is not none -}} {{- lib.quote_identifier(item) -}} {{- (column_suffix) if column_suffix is not none -}} {{- ", " if not loop.last }} {{- "', ', " if separate_by_comma and not loop.last }}
    {%- endfor -%}
{% endmacro %}

SELECT
    CASE WHEN SUM(distinct_records) IS NULL THEN 0
        ELSE (1 - SUM(distinct_records) / SUM(records_number)) * 100.0 END
        AS actual_value
    {{- lib.render_data_grouping_projections_reference('grouping_table') }}
    {{- lib.render_time_dimension_projection_reference('grouping_table') }}
FROM (
    SELECT COUNT(*) AS records_number,
        COUNT(*) OVER (PARTITION BY {{ extract_in_list(parameters.columns) -}} ) AS distinct_records
        {{- lib.render_data_grouping_projections('analyzed_table', indentation='        ') }}
        {{- lib.render_time_dimension_projection('analyzed_table', indentation='        ') }}
    FROM {{ lib.render_target_table() }} AS analyzed_table
    {{- lib.render_where_clause(indentation='    ', extra_filter = 'COALESCE(' ~ extract_in_list(parameters.columns, column_prefix='CAST(', column_suffix=' AS STRING)') ~ ') IS NOT NULL') }}
    GROUP BY {{ extract_in_list(parameters.columns) -}} {{- (", " ~ lib.render_grouping_column_names()) if (lib.data_groupings is not none and (lib.data_groupings | length()) > 0) or lib.time_series is not none }}
) grouping_table
{{- lib.render_group_by() -}}
{{- lib.render_order_by() -}}
{% import '/dialects/sqlserver.sql.jinja2' as lib with context -%}

{%- macro extract_in_list(values_list, column_prefix = none, column_suffix = none, separate_by_comma = false) %}
    {%- set column_names = table.columns if values_list is none or (values_list | length()) == 0 else values_list -%}
    {%- for item in column_names -%}
        {{ (column_prefix) if column_prefix is not none -}} {{- lib.quote_identifier(item) -}} {{- (column_suffix) if column_suffix is not none -}} {{- ", " if not loop.last }} {{- "', ', " if separate_by_comma and not loop.last }}
    {%- endfor -%}
{% endmacro -%}

{% macro render_group_by(table_alias_prefix = 'grouping_table', indentation = '    ') %}
    {%- if (lib.data_groupings is not none and (lib.data_groupings | length()) > 0) or lib.time_series is not none -%}
    GROUP BY
    {%- endif -%}
    {%- if lib.data_groupings is not none and (lib.data_groupings | length()) > 0 -%}
        {%- for attribute in lib.data_groupings -%}
            {{- ',' if not loop.first -}}{{- lib.eol() }}
            {{ indentation }}{{ table_alias_prefix }}.grouping_{{ attribute -}}
        {%- endfor -%}
    {%- endif -%}
    {%- if lib.time_series is not none -%}
        {{ ',' if lib.data_groupings is not none and (lib.data_groupings | length()) > 0 -}}{{- lib.eol() -}}
        {{ indentation }}time_period,{{ lib.eol() -}}
        {{ indentation }}time_period_utc
    {%- endif -%}
{% endmacro %}

SELECT
    CASE WHEN SUM(distinct_records) IS NULL THEN 0
        ELSE (1 - SUM(distinct_records) * 1.0 / SUM(records_number)) * 100.0 END
        AS actual_value
    {{- lib.render_data_grouping_projections_reference('grouping_table') }}
    {{- lib.render_time_dimension_projection_reference('grouping_table') }}
FROM (
    SELECT COUNT(*) AS records_number,
        COUNT(*) OVER (PARTITION BY {{ extract_in_list(parameters.columns) -}} ) AS distinct_records
        {{- lib.render_data_grouping_projections('analyzed_table', indentation='        ') }}
        {{- lib.render_time_dimension_projection('analyzed_table', indentation='        ') }}
    FROM {{ lib.render_target_table() }} AS analyzed_table
    {{- lib.render_where_clause(indentation='    ', extra_filter = 'COALESCE(' ~ extract_in_list(parameters.columns, column_prefix='CAST(', column_suffix=' AS VARCHAR)') ~ ') IS NOT NULL') }}
    GROUP BY {{ extract_in_list(parameters.columns) -}} {{- (", " ~ lib.render_grouping_column_names()) if (lib.data_groupings is not none and (lib.data_groupings | length()) > 0) or lib.time_series is not none }}
) grouping_table
{{ render_group_by('grouping_table') }}
{% import '/dialects/teradata.sql.jinja2' as lib with context -%}

{% macro extract_in_list(values_list, column_prefix = none, column_suffix = none, separate_by_comma = false) %}
    {%- set column_names = table.columns if values_list is none or (values_list | length()) == 0 else values_list -%}
    {%- for item in column_names -%}
        {{ (column_prefix) if column_prefix is not none -}} {{- lib.quote_identifier(item) -}} {{- (column_suffix) if column_suffix is not none -}} {{- ", " if not loop.last }} {{- "', ', " if separate_by_comma and not loop.last }}
    {%- endfor -%}
{% endmacro %}

SELECT
    CASE WHEN SUM(distinct_records) IS NULL THEN 0
        ELSE (1 - SUM(distinct_records) * 1.0 / SUM(records_number)) * 100.0 END
        AS actual_value
    {{- lib.render_data_grouping_projections_reference('grouping_table') }}
    {{- lib.render_time_dimension_projection_reference('grouping_table') }}
FROM (
    SELECT COUNT(*) AS records_number,
        COUNT(*) OVER (PARTITION BY {{ extract_in_list(parameters.columns) -}} ) AS distinct_records
        {{- lib.render_data_grouping_projections('analyzed_table', indentation='        ') }}
        {{- lib.render_time_dimension_projection('analyzed_table', indentation='        ') }}
    FROM {{ lib.render_target_table() }} AS analyzed_table
    {{- lib.render_where_clause(indentation='    ', extra_filter = 'COALESCE(' ~ extract_in_list(parameters.columns, column_prefix='CAST(', column_suffix=' AS VARCHAR(4096))') ~ ') IS NOT NULL') }}
    GROUP BY {{ extract_in_list(parameters.columns) -}} {{- (", " ~ lib.render_grouping_column_names()) if (lib.data_groupings is not none and (lib.data_groupings | length()) > 0) or lib.time_series is not none }}
) grouping_table
{{- lib.render_group_by() -}}
{{- lib.render_order_by() -}}
{% import '/dialects/trino.sql.jinja2' as lib with context -%}

{% macro extract_in_list(values_list, column_prefix = none, column_suffix = none, separate_by_comma = false) %}
    {%- set column_names = table.columns if values_list is none or (values_list | length()) == 0 else values_list -%}
    {%- for item in column_names -%}
        {{ (column_prefix) if column_prefix is not none -}} {{- lib.quote_identifier(item) -}} {{- (column_suffix) if column_suffix is not none -}} {{- ", " if not loop.last }} {{- "', ', " if separate_by_comma and not loop.last }}
    {%- endfor -%}
{% endmacro %}

SELECT
    CASE WHEN SUM(distinct_records) IS NULL THEN 0
        ELSE (1 - SUM(distinct_records) / CAST(SUM(records_number) AS DOUBLE)) * 100.0 END
        AS actual_value
    {{- lib.render_data_grouping_projections_reference('grouping_table') }}
    {{- lib.render_time_dimension_projection_reference('grouping_table') }}
FROM (
    SELECT COUNT(*) AS records_number,
        COUNT(*) OVER (PARTITION BY {{ extract_in_list(parameters.columns) -}} ) AS distinct_records
    {{- lib.render_data_grouping_projections_reference('analyzed_table_nested', indentation='        ') }}
    {{- lib.render_time_dimension_projection_reference('analyzed_table_nested', indentation='        ') }}
    FROM (
        SELECT
            {{ extract_in_list(parameters.columns) -}}
            {{- lib.render_data_grouping_projections('analyzed_table_nested', indentation='            ') }}
            {{- lib.render_time_dimension_projection('analyzed_table_nested', indentation='            ') }}
        FROM {{ lib.render_target_table() }} AS analyzed_table_nested
        {{- lib.render_where_clause(table_alias_prefix = 'analyzed_table_nested', indentation='        ', extra_filter = 'COALESCE(' ~ extract_in_list(parameters.columns, column_prefix='CAST(', column_suffix=' AS VARCHAR)') ~ ') IS NOT NULL') }}
    )
    GROUP BY {{ extract_in_list(parameters.columns) -}} {{- (", " ~ lib.render_grouping_column_names()) if (lib.data_groupings is not none and (lib.data_groupings | length()) > 0) or lib.time_series is not none }}
) grouping_table
{{- lib.render_group_by() -}}
{{- lib.render_order_by() -}}

What's next