Skip to content

date match format percent

date match format percent checks

Description
Column check that calculates percentage of values that match the date format in a column.


profile date match format percent

Check description
Verifies that the percentage of date values matching the given format in a column does not exceed the minimum accepted percentage.

Check name Check type Time scale Sensor definition Quality rule
profile_date_match_format_percent profiling date_match_format_percent min_percent

Enable check (Shell)
To enable this check provide connection name and check name in check enable command

dqo> check enable -c=connection_name -ch=profile_date_match_format_percent
Run check (Shell)
To run this check provide check name in check run command
dqo> check run -ch=profile_date_match_format_percent
It is also possible to run this check on a specific connection. In order to do this, add the connection name to the below
dqo> check run -c=connection_name -ch=profile_date_match_format_percent
It is additionally feasible to run this check on a specific table. In order to do this, add the table name to the below
dqo> check run -c=connection_name -t=table_name -ch=profile_date_match_format_percent
It is furthermore viable to combine run this check on a specific column. In order to do this, add the column name to the below
dqo> check run -c=connection_name -t=table_name -col=column_name -ch=profile_date_match_format_percent
Check structure (Yaml)
      profiling_checks:
        datatype:
          profile_date_match_format_percent:
            parameters:
              date_formats: DD/MM/YYYY
            warning:
              min_percent: 100.0
            error:
              min_percent: 99.0
            fatal:
              min_percent: 95.0
Sample configuration (Yaml)
# yaml-language-server: $schema=https://cloud.dqo.ai/dqo-yaml-schema/TableYaml-schema.json
apiVersion: dqo/v1
kind: table
spec:
  timestamp_columns:
    event_timestamp_column: col_event_timestamp
    ingestion_timestamp_column: col_inserted_at
  incremental_time_window:
    daily_partitioning_recent_days: 7
    monthly_partitioning_recent_months: 1
  columns:
    target_column:
      profiling_checks:
        datatype:
          profile_date_match_format_percent:
            parameters:
              date_formats: DD/MM/YYYY
            warning:
              min_percent: 100.0
            error:
              min_percent: 99.0
            fatal:
              min_percent: 95.0
      labels:
      - This is the column that is analyzed for data quality issues
    col_event_timestamp:
      labels:
      - optional column that stores the timestamp when the event/transaction happened
    col_inserted_at:
      labels:
      - optional column that stores the timestamp when row was ingested

BigQuery

{% import '/dialects/bigquery.sql.jinja2' as lib with context -%}

{% macro render_date_formats(date_formats) %}
    {%- if date_formats == 'DD/MM/YYYY'-%}
        "^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[/](0[1-9]|1[0-2])[/](\d{4})$"
    {%- elif date_formats == 'DD-MM-YYYY' -%}
        "^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[-](0[1-9]|1[0-2])[-](\d{4})$"
    {%- elif date_formats == 'DD.MM.YYYY' -%}
        "^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[.](0[1-9]|1[0-2])[.](\d{4})$"
    {%- elif date_formats == 'YYYY-MM-DD' -%}
        "^(\d{4})[-](0[1-9]|1[0-2])[-](0[1-9]|[1][0-9]|[2][0-9]|3[01])$"
    {%- endif -%}
{% endmacro -%}

SELECT
    CASE
        WHEN COUNT({{ lib.render_target_column('analyzed_table') }}) = 0 THEN NULL
        ELSE 100.0 * SUM(
            CASE
                WHEN REGEXP_CONTAINS(CAST({{ lib.render_target_column('analyzed_table') }} AS STRING), r{{render_date_formats(parameters.date_formats)}}) IS NOT FALSE
                    THEN 1
                ELSE 0
            END
        ) / COUNT(*)
    END AS actual_value
    {{- lib.render_data_grouping_projections('analyzed_table') }}
    {{- lib.render_time_dimension_projection('analyzed_table') }}
FROM {{ lib.render_target_table() }} AS analyzed_table
{{- lib.render_where_clause() -}}
{{- lib.render_group_by() -}}
{{- lib.render_order_by() -}}
SELECT
    CASE
        WHEN COUNT(analyzed_table.`target_column`) = 0 THEN NULL
        ELSE 100.0 * SUM(
            CASE
                WHEN REGEXP_CONTAINS(CAST(analyzed_table.`target_column` AS STRING), r"^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[/](0[1-9]|1[0-2])[/](\d{4})$") IS NOT FALSE
                    THEN 1
                ELSE 0
            END
        ) / COUNT(*)
    END AS actual_value,
    DATE_TRUNC(CAST(CURRENT_TIMESTAMP() AS DATE), MONTH) AS time_period,
    TIMESTAMP(DATE_TRUNC(CAST(CURRENT_TIMESTAMP() AS DATE), MONTH)) AS time_period_utc
FROM `your-google-project-id`.`<target_schema>`.`<target_table>` AS analyzed_table
GROUP BY time_period, time_period_utc
ORDER BY time_period, time_period_utc

MySQL

{% import '/dialects/mysql.sql.jinja2' as lib with context -%}

{% macro render_date_formats(date_formats) %}
    {%- if date_formats == 'DD/MM/YYYY'-%}
        '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[/](0[1-9]|1[0-2])[/](\d{4})$'
    {%- elif date_formats == 'DD-MM-YYYY' -%}
        '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[-](0[1-9]|1[0-2])[-](\d{4})$'
    {%- elif date_formats == 'DD.MM.YYYY' -%}
        '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[.](0[1-9]|1[0-2])[.](\d{4})$'
    {%- elif date_formats == 'YYYY-MM-DD' -%}
        '^([0-9]{4})[-](0[1-9]|1[0-2])[-](0[1-9]|[1][0-9]|[2][0-9]|3[01])$'
    {%- endif -%}
{% endmacro -%}

SELECT
    CASE
        WHEN COUNT({{ lib.render_target_column('analyzed_table') }}) = 0 THEN NULL
        ELSE 100.0 * SUM(CASE
              WHEN REGEXP_LIKE({{ lib.render_target_column('analyzed_table') }}, {{render_date_formats(parameters.date_formats)}})
                 THEN 1
              ELSE 0
            END
        ) / COUNT(*)
    END AS actual_value
    {{- lib.render_data_grouping_projections('analyzed_table') }}
    {{- lib.render_time_dimension_projection('analyzed_table') }}
FROM {{ lib.render_target_table() }} AS analyzed_table
{{- lib.render_where_clause() -}}
{{- lib.render_group_by() -}}
{{- lib.render_order_by() -}}
SELECT
    CASE
        WHEN COUNT(analyzed_table.`target_column`) = 0 THEN NULL
        ELSE 100.0 * SUM(CASE
              WHEN REGEXP_LIKE(analyzed_table.`target_column`, '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[/](0[1-9]|1[0-2])[/](\d{4})$')
                 THEN 1
              ELSE 0
            END
        ) / COUNT(*)
    END AS actual_value,
    DATE_FORMAT(LOCALTIMESTAMP, '%Y-%m-01 00:00:00') AS time_period,
    FROM_UNIXTIME(UNIX_TIMESTAMP(DATE_FORMAT(LOCALTIMESTAMP, '%Y-%m-01 00:00:00'))) AS time_period_utc
FROM `<target_table>` AS analyzed_table
GROUP BY time_period, time_period_utc
ORDER BY time_period, time_period_utc

Oracle

{% import '/dialects/oracle.sql.jinja2' as lib with context -%}

  {% macro render_date_formats(date_formats) %}
  {%- if date_formats == 'DD/MM/YYYY'-%}
  '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[/](0[1-9]|1[0-2])[/](\d{4})$'
  {%- elif date_formats == 'DD-MM-YYYY' -%}
  '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[-](0[1-9]|1[0-2])[-](\d{4})$'
  {%- elif date_formats == 'DD.MM.YYYY' -%}
  '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[.](0[1-9]|1[0-2])[.](\d{4})$'
  {%- elif date_formats == 'YYYY-MM-DD' -%}
  '^([0-9]{4})[-](0[1-9]|1[0-2])[-](0[1-9]|[1][0-9]|[2][0-9]|3[01])$'
  {%- endif -%}
  {% endmacro -%}

SELECT
    CASE
        WHEN COUNT({{ lib.render_target_column('analyzed_table') }}) = 0 THEN NULL
        ELSE 100.0 * SUM(CASE
              WHEN REGEXP_LIKE({{ lib.render_target_column('analyzed_table') }}, {{render_date_formats(parameters.date_formats)}})
                 THEN 1
              ELSE 0
            END
        ) / COUNT(*)
    END AS actual_value
    {{- lib.render_data_grouping_projections_reference('analyzed_table') }}
    {{- lib.render_time_dimension_projection_reference('analyzed_table') }}
FROM (
    SELECT
        original_table.*
        {{- lib.render_data_grouping_projections('original_table') }}
        {{- lib.render_time_dimension_projection('original_table') }}
    FROM {{ lib.render_target_table() }} original_table
    {{- lib.render_where_clause(table_alias_prefix='original_table') }}
) analyzed_table
{{- lib.render_group_by() -}}
{{- lib.render_order_by() -}}
SELECT
    CASE
        WHEN COUNT(analyzed_table."target_column") = 0 THEN NULL
        ELSE 100.0 * SUM(CASE
              WHEN REGEXP_LIKE(analyzed_table."target_column", '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[/](0[1-9]|1[0-2])[/](\d{4})$')
                 THEN 1
              ELSE 0
            END
        ) / COUNT(*)
    END AS actual_value,
    time_period,
    time_period_utc
FROM (
    SELECT
        original_table.*,
    TRUNC(CAST(CURRENT_TIMESTAMP AS DATE), 'MONTH') AS time_period,
    CAST(TRUNC(CAST(CURRENT_TIMESTAMP AS DATE), 'MONTH') AS TIMESTAMP WITH TIME ZONE) AS time_period_utc
    FROM "<target_schema>"."<target_table>" original_table
) analyzed_table
GROUP BY time_period, time_period_utc
ORDER BY time_period, time_period_utc

PostgreSQL

{% import '/dialects/postgresql.sql.jinja2' as lib with context -%}

{% macro render_date_formats(date_formats) %}
    {%- if date_formats == 'DD/MM/YYYY'-%}
        '^([0][1-9]|[1-2][0-9]|[3][0-1])/(0[1-9]|1[0-2])/([0-9]{4})$'
    {%- elif date_formats == 'DD-MM-YYYY' -%}
        '^([0][1-9]|[1-2][0-9]|[3][0-1])-(0[1-9]|1[0-2])-(\d{4})$'
    {%- elif date_formats == 'DD.MM.YYYY' -%}
        '^([0][1-9]|[1-2][0-9]|[3][0-1]).(0[1-9]|1[0-2]).(\d{4})$'
    {%- elif date_formats == 'YYYY-MM-DD' -%}
        '^(\d{4})-(0[1-9]|1[0-2])-(0[1-9]|[1-2][0-9]|3[0-1])$'
    {%- endif -%}
{% endmacro -%}

SELECT
    CASE
        WHEN COUNT({{ lib.render_target_column('analyzed_table') }}) = 0 THEN NULL
        ELSE 100.0 * SUM(
            CASE
                WHEN CAST({{lib.render_target_column('analyzed_table')}} AS VARCHAR) ~ {{render_date_formats(parameters.date_formats)}} IS NOT NULL
                    THEN 1
                ELSE 0
            END
        ) / COUNT(*)
    END AS actual_value
    {{- lib.render_data_grouping_projections('analyzed_table') }}
    {{- lib.render_time_dimension_projection('analyzed_table') }}
FROM {{ lib.render_target_table() }} AS analyzed_table
{{- lib.render_where_clause() -}}
{{- lib.render_group_by() -}}
{{- lib.render_order_by() -}}
SELECT
    CASE
        WHEN COUNT(analyzed_table."target_column") = 0 THEN NULL
        ELSE 100.0 * SUM(
            CASE
                WHEN CAST(analyzed_table."target_column" AS VARCHAR) ~ '^([0][1-9]|[1-2][0-9]|[3][0-1])/(0[1-9]|1[0-2])/([0-9]{4})$' IS NOT NULL
                    THEN 1
                ELSE 0
            END
        ) / COUNT(*)
    END AS actual_value,
    DATE_TRUNC('MONTH', CAST(LOCALTIMESTAMP AS date)) AS time_period,
    CAST((DATE_TRUNC('MONTH', CAST(LOCALTIMESTAMP AS date))) AS TIMESTAMP WITH TIME ZONE) AS time_period_utc
FROM "your_postgresql_database"."<target_schema>"."<target_table>" AS analyzed_table
GROUP BY time_period, time_period_utc
ORDER BY time_period, time_period_utc

Redshift

{% import '/dialects/redshift.sql.jinja2' as lib with context -%}

{% macro render_date_formats(date_formats) %}
    {%- if date_formats == 'DD/MM/YYYY'-%}
        '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[/](0[1-9]|1[0-2])[/]([1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9])$'
    {%- elif date_formats == 'DD-MM-YYYY' -%}
        '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[-](0[1-9]|1[0-2])[-]([1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9])$'
    {%- elif date_formats == 'DD.MM.YYYY' -%}
        '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[.](0[1-9]|1[0-2])[.]([1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9])$'
    {%- elif date_formats == 'YYYY-MM-DD' -%}
        '^([1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9])[-](0[1-9]|1[0-2])[-](0[1-9]|[1][0-9]|[2][0-9]|3[01])$'
    {%- endif -%}
{% endmacro -%}


SELECT
    CASE
        WHEN COUNT({{ lib.render_target_column('analyzed_table') }}) = 0 THEN NULL
        ELSE 100.0 * SUM(
            CASE
                WHEN {{lib.render_target_column('analyzed_table')}} ~ {{render_date_formats(parameters.date_formats)}})
                    THEN 1
                ELSE 0
            END
        ) / COUNT(*)
    END AS actual_value
    {{- lib.render_data_grouping_projections('analyzed_table') }}
    {{- lib.render_time_dimension_projection('analyzed_table') }}
FROM {{ lib.render_target_table() }} AS analyzed_table
{{- lib.render_where_clause() -}}
{{- lib.render_group_by() -}}
{{- lib.render_order_by() -}}
SELECT
    CASE
        WHEN COUNT(analyzed_table."target_column") = 0 THEN NULL
        ELSE 100.0 * SUM(
            CASE
                WHEN analyzed_table."target_column" ~ '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[/](0[1-9]|1[0-2])[/]([1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9])$')
                    THEN 1
                ELSE 0
            END
        ) / COUNT(*)
    END AS actual_value,
    DATE_TRUNC('MONTH', CAST(LOCALTIMESTAMP AS date)) AS time_period,
    CAST((DATE_TRUNC('MONTH', CAST(LOCALTIMESTAMP AS date))) AS TIMESTAMP WITH TIME ZONE) AS time_period_utc
FROM "your_redshift_database"."<target_schema>"."<target_table>" AS analyzed_table
GROUP BY time_period, time_period_utc
ORDER BY time_period, time_period_utc

Snowflake

{% import '/dialects/snowflake.sql.jinja2' as lib with context -%}

{% macro render_date_formats(date_formats) %}
    {%- if date_formats == 'DD/MM/YYYY'-%}
        '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[/](0[1-9]|1[0-2])[/]([1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9])$'
    {%- elif date_formats == 'DD-MM-YYYY' -%}
        '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[-](0[1-9]|1[0-2])[-]([1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9])$'
    {%- elif date_formats == 'DD.MM.YYYY' -%}
        '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[.](0[1-9]|1[0-2])[.]([1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9])$'
    {%- elif date_formats == 'YYYY-MM-DD' -%}
        '^([1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9])[-](0[1-9]|1[0-2])[-](0[1-9]|[1][0-9]|[2][0-9]|3[01])$'
    {%- endif -%}
{% endmacro -%}


SELECT
    CASE
        WHEN COUNT({{ lib.render_target_column('analyzed_table') }}) = 0 THEN NULL
        ELSE 100.0 * SUM(
            CASE
                WHEN REGEXP_LIKE({{ lib.render_target_column('analyzed_table') }}, {{render_date_formats(parameters.date_formats)}})
                    THEN 1
                ELSE 0
            END
        ) / COUNT(*)
    END AS actual_value
    {{- lib.render_data_grouping_projections('analyzed_table') }}
    {{- lib.render_time_dimension_projection('analyzed_table') }}
FROM {{ lib.render_target_table() }} AS analyzed_table
{{- lib.render_where_clause() -}}
{{- lib.render_group_by() -}}
{{- lib.render_order_by() -}}
SELECT
    CASE
        WHEN COUNT(analyzed_table."target_column") = 0 THEN NULL
        ELSE 100.0 * SUM(
            CASE
                WHEN REGEXP_LIKE(analyzed_table."target_column", '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[/](0[1-9]|1[0-2])[/]([1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9])$')
                    THEN 1
                ELSE 0
            END
        ) / COUNT(*)
    END AS actual_value,
    DATE_TRUNC('MONTH', CAST(TO_TIMESTAMP_NTZ(LOCALTIMESTAMP()) AS date)) AS time_period,
    TO_TIMESTAMP(DATE_TRUNC('MONTH', CAST(TO_TIMESTAMP_NTZ(LOCALTIMESTAMP()) AS date))) AS time_period_utc
FROM "your_snowflake_database"."<target_schema>"."<target_table>" AS analyzed_table
GROUP BY time_period, time_period_utc
ORDER BY time_period, time_period_utc

SQL Server

{% import '/dialects/sqlserver.sql.jinja2' as lib with context -%}

{% macro render_date_formats(date_formats) %}
    {%- if date_formats == 'DD/MM/YYYY'-%}
        '^(0[1-9]|[1-2][0-9]|3[0-1])/(0[1-9]|1[0-2])/([1-9][0-9]{3})$'
    {%- elif date_formats == 'DD-MM-YYYY' -%}
        '^(0[1-9]|[1-2][0-9]|3[0-1])-(0[1-9]|1[0-2])-([1-9][0-9]{3})$'
    {%- elif date_formats == 'DD.MM.YYYY' -%}
        '^(0[1-9]|[1-2][0-9]|3[0-1])\.(0[1-9]|1[0-2])\.([1-9][0-9]{3})$'
    {%- elif date_formats == 'YYYY-MM-DD' -%}
        '^([1-9][0-9]{3})-(0[1-9]|1[0-2])-(0[1-9]|[1-2][0-9]|3[0-1])$'
    {%- endif -%}
{% endmacro %}

SELECT
    CASE
        WHEN COUNT({{ lib.render_target_column('analyzed_table') }}) = 0 THEN NULL
        ELSE 100.0 * SUM(
            CASE
                WHEN {{ lib.render_target_column('analyzed_table') }} LIKE {{render_date_formats(parameters.date_formats)}} ESCAPE '~'
                    THEN 1
                ELSE 0
            END
        ) / COUNT(*)
    END AS actual_value
    {{- lib.render_data_grouping_projections('analyzed_table') }}
    {{- lib.render_time_dimension_projection('analyzed_table') }}
FROM {{ lib.render_target_table() }} AS analyzed_table
{{- lib.render_where_clause() -}}
{{- lib.render_group_by() -}}
{{- lib.render_order_by() -}}
SELECT
    CASE
        WHEN COUNT(analyzed_table.[target_column]) = 0 THEN NULL
        ELSE 100.0 * SUM(
            CASE
                WHEN analyzed_table.[target_column] LIKE '^(0[1-9]|[1-2][0-9]|3[0-1])/(0[1-9]|1[0-2])/([1-9][0-9]{3})$' ESCAPE '~'
                    THEN 1
                ELSE 0
            END
        ) / COUNT(*)
    END AS actual_value,
    DATEADD(month, DATEDIFF(month, 0, SYSDATETIMEOFFSET()), 0) AS time_period,
    CAST((DATEADD(month, DATEDIFF(month, 0, SYSDATETIMEOFFSET()), 0)) AS DATETIME) AS time_period_utc
FROM [your_sql_server_database].[<target_schema>].[<target_table>] AS analyzed_table

Configuration with data grouping

Click to see more

Sample configuration (Yaml)

# yaml-language-server: $schema=https://cloud.dqo.ai/dqo-yaml-schema/TableYaml-schema.json
apiVersion: dqo/v1
kind: table
spec:
  timestamp_columns:
    event_timestamp_column: col_event_timestamp
    ingestion_timestamp_column: col_inserted_at
  incremental_time_window:
    daily_partitioning_recent_days: 7
    monthly_partitioning_recent_months: 1
  default_grouping_name: group_by_country_and_state
  groupings:
    group_by_country_and_state:
      level_1:
        source: column_value
        column: country
      level_2:
        source: column_value
        column: state
  columns:
    target_column:
      profiling_checks:
        datatype:
          profile_date_match_format_percent:
            parameters:
              date_formats: DD/MM/YYYY
            warning:
              min_percent: 100.0
            error:
              min_percent: 99.0
            fatal:
              min_percent: 95.0
      labels:
      - This is the column that is analyzed for data quality issues
    col_event_timestamp:
      labels:
      - optional column that stores the timestamp when the event/transaction happened
    col_inserted_at:
      labels:
      - optional column that stores the timestamp when row was ingested
    country:
      labels:
      - column used as the first grouping key
    state:
      labels:
      - column used as the second grouping key
BigQuery

{% import '/dialects/bigquery.sql.jinja2' as lib with context -%}

{% macro render_date_formats(date_formats) %}
    {%- if date_formats == 'DD/MM/YYYY'-%}
        "^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[/](0[1-9]|1[0-2])[/](\d{4})$"
    {%- elif date_formats == 'DD-MM-YYYY' -%}
        "^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[-](0[1-9]|1[0-2])[-](\d{4})$"
    {%- elif date_formats == 'DD.MM.YYYY' -%}
        "^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[.](0[1-9]|1[0-2])[.](\d{4})$"
    {%- elif date_formats == 'YYYY-MM-DD' -%}
        "^(\d{4})[-](0[1-9]|1[0-2])[-](0[1-9]|[1][0-9]|[2][0-9]|3[01])$"
    {%- endif -%}
{% endmacro -%}

SELECT
    CASE
        WHEN COUNT({{ lib.render_target_column('analyzed_table') }}) = 0 THEN NULL
        ELSE 100.0 * SUM(
            CASE
                WHEN REGEXP_CONTAINS(CAST({{ lib.render_target_column('analyzed_table') }} AS STRING), r{{render_date_formats(parameters.date_formats)}}) IS NOT FALSE
                    THEN 1
                ELSE 0
            END
        ) / COUNT(*)
    END AS actual_value
    {{- lib.render_data_grouping_projections('analyzed_table') }}
    {{- lib.render_time_dimension_projection('analyzed_table') }}
FROM {{ lib.render_target_table() }} AS analyzed_table
{{- lib.render_where_clause() -}}
{{- lib.render_group_by() -}}
{{- lib.render_order_by() -}}
SELECT
    CASE
        WHEN COUNT(analyzed_table.`target_column`) = 0 THEN NULL
        ELSE 100.0 * SUM(
            CASE
                WHEN REGEXP_CONTAINS(CAST(analyzed_table.`target_column` AS STRING), r"^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[/](0[1-9]|1[0-2])[/](\d{4})$") IS NOT FALSE
                    THEN 1
                ELSE 0
            END
        ) / COUNT(*)
    END AS actual_value,
    analyzed_table.`country` AS grouping_level_1,
    analyzed_table.`state` AS grouping_level_2,
    DATE_TRUNC(CAST(CURRENT_TIMESTAMP() AS DATE), MONTH) AS time_period,
    TIMESTAMP(DATE_TRUNC(CAST(CURRENT_TIMESTAMP() AS DATE), MONTH)) AS time_period_utc
FROM `your-google-project-id`.`<target_schema>`.`<target_table>` AS analyzed_table
GROUP BY grouping_level_1, grouping_level_2, time_period, time_period_utc
ORDER BY grouping_level_1, grouping_level_2, time_period, time_period_utc

MySQL

{% import '/dialects/mysql.sql.jinja2' as lib with context -%}

{% macro render_date_formats(date_formats) %}
    {%- if date_formats == 'DD/MM/YYYY'-%}
        '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[/](0[1-9]|1[0-2])[/](\d{4})$'
    {%- elif date_formats == 'DD-MM-YYYY' -%}
        '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[-](0[1-9]|1[0-2])[-](\d{4})$'
    {%- elif date_formats == 'DD.MM.YYYY' -%}
        '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[.](0[1-9]|1[0-2])[.](\d{4})$'
    {%- elif date_formats == 'YYYY-MM-DD' -%}
        '^([0-9]{4})[-](0[1-9]|1[0-2])[-](0[1-9]|[1][0-9]|[2][0-9]|3[01])$'
    {%- endif -%}
{% endmacro -%}

SELECT
    CASE
        WHEN COUNT({{ lib.render_target_column('analyzed_table') }}) = 0 THEN NULL
        ELSE 100.0 * SUM(CASE
              WHEN REGEXP_LIKE({{ lib.render_target_column('analyzed_table') }}, {{render_date_formats(parameters.date_formats)}})
                 THEN 1
              ELSE 0
            END
        ) / COUNT(*)
    END AS actual_value
    {{- lib.render_data_grouping_projections('analyzed_table') }}
    {{- lib.render_time_dimension_projection('analyzed_table') }}
FROM {{ lib.render_target_table() }} AS analyzed_table
{{- lib.render_where_clause() -}}
{{- lib.render_group_by() -}}
{{- lib.render_order_by() -}}
SELECT
    CASE
        WHEN COUNT(analyzed_table.`target_column`) = 0 THEN NULL
        ELSE 100.0 * SUM(CASE
              WHEN REGEXP_LIKE(analyzed_table.`target_column`, '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[/](0[1-9]|1[0-2])[/](\d{4})$')
                 THEN 1
              ELSE 0
            END
        ) / COUNT(*)
    END AS actual_value,
    analyzed_table.`country` AS grouping_level_1,
    analyzed_table.`state` AS grouping_level_2,
    DATE_FORMAT(LOCALTIMESTAMP, '%Y-%m-01 00:00:00') AS time_period,
    FROM_UNIXTIME(UNIX_TIMESTAMP(DATE_FORMAT(LOCALTIMESTAMP, '%Y-%m-01 00:00:00'))) AS time_period_utc
FROM `<target_table>` AS analyzed_table
GROUP BY grouping_level_1, grouping_level_2, time_period, time_period_utc
ORDER BY grouping_level_1, grouping_level_2, time_period, time_period_utc

Oracle

{% import '/dialects/oracle.sql.jinja2' as lib with context -%}

  {% macro render_date_formats(date_formats) %}
  {%- if date_formats == 'DD/MM/YYYY'-%}
  '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[/](0[1-9]|1[0-2])[/](\d{4})$'
  {%- elif date_formats == 'DD-MM-YYYY' -%}
  '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[-](0[1-9]|1[0-2])[-](\d{4})$'
  {%- elif date_formats == 'DD.MM.YYYY' -%}
  '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[.](0[1-9]|1[0-2])[.](\d{4})$'
  {%- elif date_formats == 'YYYY-MM-DD' -%}
  '^([0-9]{4})[-](0[1-9]|1[0-2])[-](0[1-9]|[1][0-9]|[2][0-9]|3[01])$'
  {%- endif -%}
  {% endmacro -%}

SELECT
    CASE
        WHEN COUNT({{ lib.render_target_column('analyzed_table') }}) = 0 THEN NULL
        ELSE 100.0 * SUM(CASE
              WHEN REGEXP_LIKE({{ lib.render_target_column('analyzed_table') }}, {{render_date_formats(parameters.date_formats)}})
                 THEN 1
              ELSE 0
            END
        ) / COUNT(*)
    END AS actual_value
    {{- lib.render_data_grouping_projections_reference('analyzed_table') }}
    {{- lib.render_time_dimension_projection_reference('analyzed_table') }}
FROM (
    SELECT
        original_table.*
        {{- lib.render_data_grouping_projections('original_table') }}
        {{- lib.render_time_dimension_projection('original_table') }}
    FROM {{ lib.render_target_table() }} original_table
    {{- lib.render_where_clause(table_alias_prefix='original_table') }}
) analyzed_table
{{- lib.render_group_by() -}}
{{- lib.render_order_by() -}}
SELECT
    CASE
        WHEN COUNT(analyzed_table."target_column") = 0 THEN NULL
        ELSE 100.0 * SUM(CASE
              WHEN REGEXP_LIKE(analyzed_table."target_column", '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[/](0[1-9]|1[0-2])[/](\d{4})$')
                 THEN 1
              ELSE 0
            END
        ) / COUNT(*)
    END AS actual_value,

                analyzed_table.grouping_level_1,

                analyzed_table.grouping_level_2
,
    time_period,
    time_period_utc
FROM (
    SELECT
        original_table.*,
    original_table."country" AS grouping_level_1,
    original_table."state" AS grouping_level_2,
    TRUNC(CAST(CURRENT_TIMESTAMP AS DATE), 'MONTH') AS time_period,
    CAST(TRUNC(CAST(CURRENT_TIMESTAMP AS DATE), 'MONTH') AS TIMESTAMP WITH TIME ZONE) AS time_period_utc
    FROM "<target_schema>"."<target_table>" original_table
) analyzed_table
GROUP BY grouping_level_1, grouping_level_2, time_period, time_period_utc
ORDER BY grouping_level_1, grouping_level_2, time_period, time_period_utc

PostgreSQL

{% import '/dialects/postgresql.sql.jinja2' as lib with context -%}

{% macro render_date_formats(date_formats) %}
    {%- if date_formats == 'DD/MM/YYYY'-%}
        '^([0][1-9]|[1-2][0-9]|[3][0-1])/(0[1-9]|1[0-2])/([0-9]{4})$'
    {%- elif date_formats == 'DD-MM-YYYY' -%}
        '^([0][1-9]|[1-2][0-9]|[3][0-1])-(0[1-9]|1[0-2])-(\d{4})$'
    {%- elif date_formats == 'DD.MM.YYYY' -%}
        '^([0][1-9]|[1-2][0-9]|[3][0-1]).(0[1-9]|1[0-2]).(\d{4})$'
    {%- elif date_formats == 'YYYY-MM-DD' -%}
        '^(\d{4})-(0[1-9]|1[0-2])-(0[1-9]|[1-2][0-9]|3[0-1])$'
    {%- endif -%}
{% endmacro -%}

SELECT
    CASE
        WHEN COUNT({{ lib.render_target_column('analyzed_table') }}) = 0 THEN NULL
        ELSE 100.0 * SUM(
            CASE
                WHEN CAST({{lib.render_target_column('analyzed_table')}} AS VARCHAR) ~ {{render_date_formats(parameters.date_formats)}} IS NOT NULL
                    THEN 1
                ELSE 0
            END
        ) / COUNT(*)
    END AS actual_value
    {{- lib.render_data_grouping_projections('analyzed_table') }}
    {{- lib.render_time_dimension_projection('analyzed_table') }}
FROM {{ lib.render_target_table() }} AS analyzed_table
{{- lib.render_where_clause() -}}
{{- lib.render_group_by() -}}
{{- lib.render_order_by() -}}
SELECT
    CASE
        WHEN COUNT(analyzed_table."target_column") = 0 THEN NULL
        ELSE 100.0 * SUM(
            CASE
                WHEN CAST(analyzed_table."target_column" AS VARCHAR) ~ '^([0][1-9]|[1-2][0-9]|[3][0-1])/(0[1-9]|1[0-2])/([0-9]{4})$' IS NOT NULL
                    THEN 1
                ELSE 0
            END
        ) / COUNT(*)
    END AS actual_value,
    analyzed_table."country" AS grouping_level_1,
    analyzed_table."state" AS grouping_level_2,
    DATE_TRUNC('MONTH', CAST(LOCALTIMESTAMP AS date)) AS time_period,
    CAST((DATE_TRUNC('MONTH', CAST(LOCALTIMESTAMP AS date))) AS TIMESTAMP WITH TIME ZONE) AS time_period_utc
FROM "your_postgresql_database"."<target_schema>"."<target_table>" AS analyzed_table
GROUP BY grouping_level_1, grouping_level_2, time_period, time_period_utc
ORDER BY grouping_level_1, grouping_level_2, time_period, time_period_utc

Redshift

{% import '/dialects/redshift.sql.jinja2' as lib with context -%}

{% macro render_date_formats(date_formats) %}
    {%- if date_formats == 'DD/MM/YYYY'-%}
        '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[/](0[1-9]|1[0-2])[/]([1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9])$'
    {%- elif date_formats == 'DD-MM-YYYY' -%}
        '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[-](0[1-9]|1[0-2])[-]([1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9])$'
    {%- elif date_formats == 'DD.MM.YYYY' -%}
        '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[.](0[1-9]|1[0-2])[.]([1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9])$'
    {%- elif date_formats == 'YYYY-MM-DD' -%}
        '^([1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9])[-](0[1-9]|1[0-2])[-](0[1-9]|[1][0-9]|[2][0-9]|3[01])$'
    {%- endif -%}
{% endmacro -%}


SELECT
    CASE
        WHEN COUNT({{ lib.render_target_column('analyzed_table') }}) = 0 THEN NULL
        ELSE 100.0 * SUM(
            CASE
                WHEN {{lib.render_target_column('analyzed_table')}} ~ {{render_date_formats(parameters.date_formats)}})
                    THEN 1
                ELSE 0
            END
        ) / COUNT(*)
    END AS actual_value
    {{- lib.render_data_grouping_projections('analyzed_table') }}
    {{- lib.render_time_dimension_projection('analyzed_table') }}
FROM {{ lib.render_target_table() }} AS analyzed_table
{{- lib.render_where_clause() -}}
{{- lib.render_group_by() -}}
{{- lib.render_order_by() -}}
SELECT
    CASE
        WHEN COUNT(analyzed_table."target_column") = 0 THEN NULL
        ELSE 100.0 * SUM(
            CASE
                WHEN analyzed_table."target_column" ~ '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[/](0[1-9]|1[0-2])[/]([1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9])$')
                    THEN 1
                ELSE 0
            END
        ) / COUNT(*)
    END AS actual_value,
    analyzed_table."country" AS grouping_level_1,
    analyzed_table."state" AS grouping_level_2,
    DATE_TRUNC('MONTH', CAST(LOCALTIMESTAMP AS date)) AS time_period,
    CAST((DATE_TRUNC('MONTH', CAST(LOCALTIMESTAMP AS date))) AS TIMESTAMP WITH TIME ZONE) AS time_period_utc
FROM "your_redshift_database"."<target_schema>"."<target_table>" AS analyzed_table
GROUP BY grouping_level_1, grouping_level_2, time_period, time_period_utc
ORDER BY grouping_level_1, grouping_level_2, time_period, time_period_utc

Snowflake

{% import '/dialects/snowflake.sql.jinja2' as lib with context -%}

{% macro render_date_formats(date_formats) %}
    {%- if date_formats == 'DD/MM/YYYY'-%}
        '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[/](0[1-9]|1[0-2])[/]([1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9])$'
    {%- elif date_formats == 'DD-MM-YYYY' -%}
        '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[-](0[1-9]|1[0-2])[-]([1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9])$'
    {%- elif date_formats == 'DD.MM.YYYY' -%}
        '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[.](0[1-9]|1[0-2])[.]([1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9])$'
    {%- elif date_formats == 'YYYY-MM-DD' -%}
        '^([1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9])[-](0[1-9]|1[0-2])[-](0[1-9]|[1][0-9]|[2][0-9]|3[01])$'
    {%- endif -%}
{% endmacro -%}


SELECT
    CASE
        WHEN COUNT({{ lib.render_target_column('analyzed_table') }}) = 0 THEN NULL
        ELSE 100.0 * SUM(
            CASE
                WHEN REGEXP_LIKE({{ lib.render_target_column('analyzed_table') }}, {{render_date_formats(parameters.date_formats)}})
                    THEN 1
                ELSE 0
            END
        ) / COUNT(*)
    END AS actual_value
    {{- lib.render_data_grouping_projections('analyzed_table') }}
    {{- lib.render_time_dimension_projection('analyzed_table') }}
FROM {{ lib.render_target_table() }} AS analyzed_table
{{- lib.render_where_clause() -}}
{{- lib.render_group_by() -}}
{{- lib.render_order_by() -}}
SELECT
    CASE
        WHEN COUNT(analyzed_table."target_column") = 0 THEN NULL
        ELSE 100.0 * SUM(
            CASE
                WHEN REGEXP_LIKE(analyzed_table."target_column", '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[/](0[1-9]|1[0-2])[/]([1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9])$')
                    THEN 1
                ELSE 0
            END
        ) / COUNT(*)
    END AS actual_value,
    analyzed_table."country" AS grouping_level_1,
    analyzed_table."state" AS grouping_level_2,
    DATE_TRUNC('MONTH', CAST(TO_TIMESTAMP_NTZ(LOCALTIMESTAMP()) AS date)) AS time_period,
    TO_TIMESTAMP(DATE_TRUNC('MONTH', CAST(TO_TIMESTAMP_NTZ(LOCALTIMESTAMP()) AS date))) AS time_period_utc
FROM "your_snowflake_database"."<target_schema>"."<target_table>" AS analyzed_table
GROUP BY grouping_level_1, grouping_level_2, time_period, time_period_utc
ORDER BY grouping_level_1, grouping_level_2, time_period, time_period_utc

SQL Server

{% import '/dialects/sqlserver.sql.jinja2' as lib with context -%}

{% macro render_date_formats(date_formats) %}
    {%- if date_formats == 'DD/MM/YYYY'-%}
        '^(0[1-9]|[1-2][0-9]|3[0-1])/(0[1-9]|1[0-2])/([1-9][0-9]{3})$'
    {%- elif date_formats == 'DD-MM-YYYY' -%}
        '^(0[1-9]|[1-2][0-9]|3[0-1])-(0[1-9]|1[0-2])-([1-9][0-9]{3})$'
    {%- elif date_formats == 'DD.MM.YYYY' -%}
        '^(0[1-9]|[1-2][0-9]|3[0-1])\.(0[1-9]|1[0-2])\.([1-9][0-9]{3})$'
    {%- elif date_formats == 'YYYY-MM-DD' -%}
        '^([1-9][0-9]{3})-(0[1-9]|1[0-2])-(0[1-9]|[1-2][0-9]|3[0-1])$'
    {%- endif -%}
{% endmacro %}

SELECT
    CASE
        WHEN COUNT({{ lib.render_target_column('analyzed_table') }}) = 0 THEN NULL
        ELSE 100.0 * SUM(
            CASE
                WHEN {{ lib.render_target_column('analyzed_table') }} LIKE {{render_date_formats(parameters.date_formats)}} ESCAPE '~'
                    THEN 1
                ELSE 0
            END
        ) / COUNT(*)
    END AS actual_value
    {{- lib.render_data_grouping_projections('analyzed_table') }}
    {{- lib.render_time_dimension_projection('analyzed_table') }}
FROM {{ lib.render_target_table() }} AS analyzed_table
{{- lib.render_where_clause() -}}
{{- lib.render_group_by() -}}
{{- lib.render_order_by() -}}
SELECT
    CASE
        WHEN COUNT(analyzed_table.[target_column]) = 0 THEN NULL
        ELSE 100.0 * SUM(
            CASE
                WHEN analyzed_table.[target_column] LIKE '^(0[1-9]|[1-2][0-9]|3[0-1])/(0[1-9]|1[0-2])/([1-9][0-9]{3})$' ESCAPE '~'
                    THEN 1
                ELSE 0
            END
        ) / COUNT(*)
    END AS actual_value,
    analyzed_table.[country] AS grouping_level_1,
    analyzed_table.[state] AS grouping_level_2,
    DATEADD(month, DATEDIFF(month, 0, SYSDATETIMEOFFSET()), 0) AS time_period,
    CAST((DATEADD(month, DATEDIFF(month, 0, SYSDATETIMEOFFSET()), 0)) AS DATETIME) AS time_period_utc
FROM [your_sql_server_database].[<target_schema>].[<target_table>] AS analyzed_table
GROUP BY analyzed_table.[country], analyzed_table.[state]
ORDER BY level_1, level_2
        , 

daily date match format percent

Check description
Verifies that the percentage of date values matching the given format in a column does not exceed the maximum accepted percentage. Creates a separate data quality check (and an alert) for each daily recurring.

Check name Check type Time scale Sensor definition Quality rule
daily_date_match_format_percent recurring daily date_match_format_percent min_percent

Enable check (Shell)
To enable this check provide connection name and check name in check enable command

dqo> check enable -c=connection_name -ch=daily_date_match_format_percent
Run check (Shell)
To run this check provide check name in check run command
dqo> check run -ch=daily_date_match_format_percent
It is also possible to run this check on a specific connection. In order to do this, add the connection name to the below
dqo> check run -c=connection_name -ch=daily_date_match_format_percent
It is additionally feasible to run this check on a specific table. In order to do this, add the table name to the below
dqo> check run -c=connection_name -t=table_name -ch=daily_date_match_format_percent
It is furthermore viable to combine run this check on a specific column. In order to do this, add the column name to the below
dqo> check run -c=connection_name -t=table_name -col=column_name -ch=daily_date_match_format_percent
Check structure (Yaml)
      recurring_checks:
        daily:
          datatype:
            daily_date_match_format_percent:
              parameters:
                date_formats: DD/MM/YYYY
              warning:
                min_percent: 100.0
              error:
                min_percent: 99.0
              fatal:
                min_percent: 95.0
Sample configuration (Yaml)
# yaml-language-server: $schema=https://cloud.dqo.ai/dqo-yaml-schema/TableYaml-schema.json
apiVersion: dqo/v1
kind: table
spec:
  timestamp_columns:
    event_timestamp_column: col_event_timestamp
    ingestion_timestamp_column: col_inserted_at
  incremental_time_window:
    daily_partitioning_recent_days: 7
    monthly_partitioning_recent_months: 1
  columns:
    target_column:
      recurring_checks:
        daily:
          datatype:
            daily_date_match_format_percent:
              parameters:
                date_formats: DD/MM/YYYY
              warning:
                min_percent: 100.0
              error:
                min_percent: 99.0
              fatal:
                min_percent: 95.0
      labels:
      - This is the column that is analyzed for data quality issues
    col_event_timestamp:
      labels:
      - optional column that stores the timestamp when the event/transaction happened
    col_inserted_at:
      labels:
      - optional column that stores the timestamp when row was ingested

BigQuery

{% import '/dialects/bigquery.sql.jinja2' as lib with context -%}

{% macro render_date_formats(date_formats) %}
    {%- if date_formats == 'DD/MM/YYYY'-%}
        "^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[/](0[1-9]|1[0-2])[/](\d{4})$"
    {%- elif date_formats == 'DD-MM-YYYY' -%}
        "^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[-](0[1-9]|1[0-2])[-](\d{4})$"
    {%- elif date_formats == 'DD.MM.YYYY' -%}
        "^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[.](0[1-9]|1[0-2])[.](\d{4})$"
    {%- elif date_formats == 'YYYY-MM-DD' -%}
        "^(\d{4})[-](0[1-9]|1[0-2])[-](0[1-9]|[1][0-9]|[2][0-9]|3[01])$"
    {%- endif -%}
{% endmacro -%}

SELECT
    CASE
        WHEN COUNT({{ lib.render_target_column('analyzed_table') }}) = 0 THEN NULL
        ELSE 100.0 * SUM(
            CASE
                WHEN REGEXP_CONTAINS(CAST({{ lib.render_target_column('analyzed_table') }} AS STRING), r{{render_date_formats(parameters.date_formats)}}) IS NOT FALSE
                    THEN 1
                ELSE 0
            END
        ) / COUNT(*)
    END AS actual_value
    {{- lib.render_data_grouping_projections('analyzed_table') }}
    {{- lib.render_time_dimension_projection('analyzed_table') }}
FROM {{ lib.render_target_table() }} AS analyzed_table
{{- lib.render_where_clause() -}}
{{- lib.render_group_by() -}}
{{- lib.render_order_by() -}}
SELECT
    CASE
        WHEN COUNT(analyzed_table.`target_column`) = 0 THEN NULL
        ELSE 100.0 * SUM(
            CASE
                WHEN REGEXP_CONTAINS(CAST(analyzed_table.`target_column` AS STRING), r"^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[/](0[1-9]|1[0-2])[/](\d{4})$") IS NOT FALSE
                    THEN 1
                ELSE 0
            END
        ) / COUNT(*)
    END AS actual_value,
    CAST(CURRENT_TIMESTAMP() AS DATE) AS time_period,
    TIMESTAMP(CAST(CURRENT_TIMESTAMP() AS DATE)) AS time_period_utc
FROM `your-google-project-id`.`<target_schema>`.`<target_table>` AS analyzed_table
GROUP BY time_period, time_period_utc
ORDER BY time_period, time_period_utc

MySQL

{% import '/dialects/mysql.sql.jinja2' as lib with context -%}

{% macro render_date_formats(date_formats) %}
    {%- if date_formats == 'DD/MM/YYYY'-%}
        '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[/](0[1-9]|1[0-2])[/](\d{4})$'
    {%- elif date_formats == 'DD-MM-YYYY' -%}
        '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[-](0[1-9]|1[0-2])[-](\d{4})$'
    {%- elif date_formats == 'DD.MM.YYYY' -%}
        '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[.](0[1-9]|1[0-2])[.](\d{4})$'
    {%- elif date_formats == 'YYYY-MM-DD' -%}
        '^([0-9]{4})[-](0[1-9]|1[0-2])[-](0[1-9]|[1][0-9]|[2][0-9]|3[01])$'
    {%- endif -%}
{% endmacro -%}

SELECT
    CASE
        WHEN COUNT({{ lib.render_target_column('analyzed_table') }}) = 0 THEN NULL
        ELSE 100.0 * SUM(CASE
              WHEN REGEXP_LIKE({{ lib.render_target_column('analyzed_table') }}, {{render_date_formats(parameters.date_formats)}})
                 THEN 1
              ELSE 0
            END
        ) / COUNT(*)
    END AS actual_value
    {{- lib.render_data_grouping_projections('analyzed_table') }}
    {{- lib.render_time_dimension_projection('analyzed_table') }}
FROM {{ lib.render_target_table() }} AS analyzed_table
{{- lib.render_where_clause() -}}
{{- lib.render_group_by() -}}
{{- lib.render_order_by() -}}
SELECT
    CASE
        WHEN COUNT(analyzed_table.`target_column`) = 0 THEN NULL
        ELSE 100.0 * SUM(CASE
              WHEN REGEXP_LIKE(analyzed_table.`target_column`, '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[/](0[1-9]|1[0-2])[/](\d{4})$')
                 THEN 1
              ELSE 0
            END
        ) / COUNT(*)
    END AS actual_value,
    DATE_FORMAT(LOCALTIMESTAMP, '%Y-%m-%d 00:00:00') AS time_period,
    FROM_UNIXTIME(UNIX_TIMESTAMP(DATE_FORMAT(LOCALTIMESTAMP, '%Y-%m-%d 00:00:00'))) AS time_period_utc
FROM `<target_table>` AS analyzed_table
GROUP BY time_period, time_period_utc
ORDER BY time_period, time_period_utc

Oracle

{% import '/dialects/oracle.sql.jinja2' as lib with context -%}

  {% macro render_date_formats(date_formats) %}
  {%- if date_formats == 'DD/MM/YYYY'-%}
  '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[/](0[1-9]|1[0-2])[/](\d{4})$'
  {%- elif date_formats == 'DD-MM-YYYY' -%}
  '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[-](0[1-9]|1[0-2])[-](\d{4})$'
  {%- elif date_formats == 'DD.MM.YYYY' -%}
  '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[.](0[1-9]|1[0-2])[.](\d{4})$'
  {%- elif date_formats == 'YYYY-MM-DD' -%}
  '^([0-9]{4})[-](0[1-9]|1[0-2])[-](0[1-9]|[1][0-9]|[2][0-9]|3[01])$'
  {%- endif -%}
  {% endmacro -%}

SELECT
    CASE
        WHEN COUNT({{ lib.render_target_column('analyzed_table') }}) = 0 THEN NULL
        ELSE 100.0 * SUM(CASE
              WHEN REGEXP_LIKE({{ lib.render_target_column('analyzed_table') }}, {{render_date_formats(parameters.date_formats)}})
                 THEN 1
              ELSE 0
            END
        ) / COUNT(*)
    END AS actual_value
    {{- lib.render_data_grouping_projections_reference('analyzed_table') }}
    {{- lib.render_time_dimension_projection_reference('analyzed_table') }}
FROM (
    SELECT
        original_table.*
        {{- lib.render_data_grouping_projections('original_table') }}
        {{- lib.render_time_dimension_projection('original_table') }}
    FROM {{ lib.render_target_table() }} original_table
    {{- lib.render_where_clause(table_alias_prefix='original_table') }}
) analyzed_table
{{- lib.render_group_by() -}}
{{- lib.render_order_by() -}}
SELECT
    CASE
        WHEN COUNT(analyzed_table."target_column") = 0 THEN NULL
        ELSE 100.0 * SUM(CASE
              WHEN REGEXP_LIKE(analyzed_table."target_column", '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[/](0[1-9]|1[0-2])[/](\d{4})$')
                 THEN 1
              ELSE 0
            END
        ) / COUNT(*)
    END AS actual_value,
    time_period,
    time_period_utc
FROM (
    SELECT
        original_table.*,
    TRUNC(CAST(CURRENT_TIMESTAMP AS DATE)) AS time_period,
    CAST(TRUNC(CAST(CURRENT_TIMESTAMP AS DATE)) AS TIMESTAMP WITH TIME ZONE) AS time_period_utc
    FROM "<target_schema>"."<target_table>" original_table
) analyzed_table
GROUP BY time_period, time_period_utc
ORDER BY time_period, time_period_utc

PostgreSQL

{% import '/dialects/postgresql.sql.jinja2' as lib with context -%}

{% macro render_date_formats(date_formats) %}
    {%- if date_formats == 'DD/MM/YYYY'-%}
        '^([0][1-9]|[1-2][0-9]|[3][0-1])/(0[1-9]|1[0-2])/([0-9]{4})$'
    {%- elif date_formats == 'DD-MM-YYYY' -%}
        '^([0][1-9]|[1-2][0-9]|[3][0-1])-(0[1-9]|1[0-2])-(\d{4})$'
    {%- elif date_formats == 'DD.MM.YYYY' -%}
        '^([0][1-9]|[1-2][0-9]|[3][0-1]).(0[1-9]|1[0-2]).(\d{4})$'
    {%- elif date_formats == 'YYYY-MM-DD' -%}
        '^(\d{4})-(0[1-9]|1[0-2])-(0[1-9]|[1-2][0-9]|3[0-1])$'
    {%- endif -%}
{% endmacro -%}

SELECT
    CASE
        WHEN COUNT({{ lib.render_target_column('analyzed_table') }}) = 0 THEN NULL
        ELSE 100.0 * SUM(
            CASE
                WHEN CAST({{lib.render_target_column('analyzed_table')}} AS VARCHAR) ~ {{render_date_formats(parameters.date_formats)}} IS NOT NULL
                    THEN 1
                ELSE 0
            END
        ) / COUNT(*)
    END AS actual_value
    {{- lib.render_data_grouping_projections('analyzed_table') }}
    {{- lib.render_time_dimension_projection('analyzed_table') }}
FROM {{ lib.render_target_table() }} AS analyzed_table
{{- lib.render_where_clause() -}}
{{- lib.render_group_by() -}}
{{- lib.render_order_by() -}}
SELECT
    CASE
        WHEN COUNT(analyzed_table."target_column") = 0 THEN NULL
        ELSE 100.0 * SUM(
            CASE
                WHEN CAST(analyzed_table."target_column" AS VARCHAR) ~ '^([0][1-9]|[1-2][0-9]|[3][0-1])/(0[1-9]|1[0-2])/([0-9]{4})$' IS NOT NULL
                    THEN 1
                ELSE 0
            END
        ) / COUNT(*)
    END AS actual_value,
    CAST(LOCALTIMESTAMP AS date) AS time_period,
    CAST((CAST(LOCALTIMESTAMP AS date)) AS TIMESTAMP WITH TIME ZONE) AS time_period_utc
FROM "your_postgresql_database"."<target_schema>"."<target_table>" AS analyzed_table
GROUP BY time_period, time_period_utc
ORDER BY time_period, time_period_utc

Redshift

{% import '/dialects/redshift.sql.jinja2' as lib with context -%}

{% macro render_date_formats(date_formats) %}
    {%- if date_formats == 'DD/MM/YYYY'-%}
        '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[/](0[1-9]|1[0-2])[/]([1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9])$'
    {%- elif date_formats == 'DD-MM-YYYY' -%}
        '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[-](0[1-9]|1[0-2])[-]([1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9])$'
    {%- elif date_formats == 'DD.MM.YYYY' -%}
        '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[.](0[1-9]|1[0-2])[.]([1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9])$'
    {%- elif date_formats == 'YYYY-MM-DD' -%}
        '^([1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9])[-](0[1-9]|1[0-2])[-](0[1-9]|[1][0-9]|[2][0-9]|3[01])$'
    {%- endif -%}
{% endmacro -%}


SELECT
    CASE
        WHEN COUNT({{ lib.render_target_column('analyzed_table') }}) = 0 THEN NULL
        ELSE 100.0 * SUM(
            CASE
                WHEN {{lib.render_target_column('analyzed_table')}} ~ {{render_date_formats(parameters.date_formats)}})
                    THEN 1
                ELSE 0
            END
        ) / COUNT(*)
    END AS actual_value
    {{- lib.render_data_grouping_projections('analyzed_table') }}
    {{- lib.render_time_dimension_projection('analyzed_table') }}
FROM {{ lib.render_target_table() }} AS analyzed_table
{{- lib.render_where_clause() -}}
{{- lib.render_group_by() -}}
{{- lib.render_order_by() -}}
SELECT
    CASE
        WHEN COUNT(analyzed_table."target_column") = 0 THEN NULL
        ELSE 100.0 * SUM(
            CASE
                WHEN analyzed_table."target_column" ~ '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[/](0[1-9]|1[0-2])[/]([1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9])$')
                    THEN 1
                ELSE 0
            END
        ) / COUNT(*)
    END AS actual_value,
    CAST(LOCALTIMESTAMP AS date) AS time_period,
    CAST((CAST(LOCALTIMESTAMP AS date)) AS TIMESTAMP WITH TIME ZONE) AS time_period_utc
FROM "your_redshift_database"."<target_schema>"."<target_table>" AS analyzed_table
GROUP BY time_period, time_period_utc
ORDER BY time_period, time_period_utc

Snowflake

{% import '/dialects/snowflake.sql.jinja2' as lib with context -%}

{% macro render_date_formats(date_formats) %}
    {%- if date_formats == 'DD/MM/YYYY'-%}
        '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[/](0[1-9]|1[0-2])[/]([1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9])$'
    {%- elif date_formats == 'DD-MM-YYYY' -%}
        '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[-](0[1-9]|1[0-2])[-]([1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9])$'
    {%- elif date_formats == 'DD.MM.YYYY' -%}
        '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[.](0[1-9]|1[0-2])[.]([1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9])$'
    {%- elif date_formats == 'YYYY-MM-DD' -%}
        '^([1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9])[-](0[1-9]|1[0-2])[-](0[1-9]|[1][0-9]|[2][0-9]|3[01])$'
    {%- endif -%}
{% endmacro -%}


SELECT
    CASE
        WHEN COUNT({{ lib.render_target_column('analyzed_table') }}) = 0 THEN NULL
        ELSE 100.0 * SUM(
            CASE
                WHEN REGEXP_LIKE({{ lib.render_target_column('analyzed_table') }}, {{render_date_formats(parameters.date_formats)}})
                    THEN 1
                ELSE 0
            END
        ) / COUNT(*)
    END AS actual_value
    {{- lib.render_data_grouping_projections('analyzed_table') }}
    {{- lib.render_time_dimension_projection('analyzed_table') }}
FROM {{ lib.render_target_table() }} AS analyzed_table
{{- lib.render_where_clause() -}}
{{- lib.render_group_by() -}}
{{- lib.render_order_by() -}}
SELECT
    CASE
        WHEN COUNT(analyzed_table."target_column") = 0 THEN NULL
        ELSE 100.0 * SUM(
            CASE
                WHEN REGEXP_LIKE(analyzed_table."target_column", '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[/](0[1-9]|1[0-2])[/]([1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9])$')
                    THEN 1
                ELSE 0
            END
        ) / COUNT(*)
    END AS actual_value,
    CAST(TO_TIMESTAMP_NTZ(LOCALTIMESTAMP()) AS date) AS time_period,
    TO_TIMESTAMP(CAST(TO_TIMESTAMP_NTZ(LOCALTIMESTAMP()) AS date)) AS time_period_utc
FROM "your_snowflake_database"."<target_schema>"."<target_table>" AS analyzed_table
GROUP BY time_period, time_period_utc
ORDER BY time_period, time_period_utc

SQL Server

{% import '/dialects/sqlserver.sql.jinja2' as lib with context -%}

{% macro render_date_formats(date_formats) %}
    {%- if date_formats == 'DD/MM/YYYY'-%}
        '^(0[1-9]|[1-2][0-9]|3[0-1])/(0[1-9]|1[0-2])/([1-9][0-9]{3})$'
    {%- elif date_formats == 'DD-MM-YYYY' -%}
        '^(0[1-9]|[1-2][0-9]|3[0-1])-(0[1-9]|1[0-2])-([1-9][0-9]{3})$'
    {%- elif date_formats == 'DD.MM.YYYY' -%}
        '^(0[1-9]|[1-2][0-9]|3[0-1])\.(0[1-9]|1[0-2])\.([1-9][0-9]{3})$'
    {%- elif date_formats == 'YYYY-MM-DD' -%}
        '^([1-9][0-9]{3})-(0[1-9]|1[0-2])-(0[1-9]|[1-2][0-9]|3[0-1])$'
    {%- endif -%}
{% endmacro %}

SELECT
    CASE
        WHEN COUNT({{ lib.render_target_column('analyzed_table') }}) = 0 THEN NULL
        ELSE 100.0 * SUM(
            CASE
                WHEN {{ lib.render_target_column('analyzed_table') }} LIKE {{render_date_formats(parameters.date_formats)}} ESCAPE '~'
                    THEN 1
                ELSE 0
            END
        ) / COUNT(*)
    END AS actual_value
    {{- lib.render_data_grouping_projections('analyzed_table') }}
    {{- lib.render_time_dimension_projection('analyzed_table') }}
FROM {{ lib.render_target_table() }} AS analyzed_table
{{- lib.render_where_clause() -}}
{{- lib.render_group_by() -}}
{{- lib.render_order_by() -}}
SELECT
    CASE
        WHEN COUNT(analyzed_table.[target_column]) = 0 THEN NULL
        ELSE 100.0 * SUM(
            CASE
                WHEN analyzed_table.[target_column] LIKE '^(0[1-9]|[1-2][0-9]|3[0-1])/(0[1-9]|1[0-2])/([1-9][0-9]{3})$' ESCAPE '~'
                    THEN 1
                ELSE 0
            END
        ) / COUNT(*)
    END AS actual_value,
    CAST(SYSDATETIMEOFFSET() AS date) AS time_period,
    CAST((CAST(SYSDATETIMEOFFSET() AS date)) AS DATETIME) AS time_period_utc
FROM [your_sql_server_database].[<target_schema>].[<target_table>] AS analyzed_table

Configuration with data grouping

Click to see more

Sample configuration (Yaml)

# yaml-language-server: $schema=https://cloud.dqo.ai/dqo-yaml-schema/TableYaml-schema.json
apiVersion: dqo/v1
kind: table
spec:
  timestamp_columns:
    event_timestamp_column: col_event_timestamp
    ingestion_timestamp_column: col_inserted_at
  incremental_time_window:
    daily_partitioning_recent_days: 7
    monthly_partitioning_recent_months: 1
  default_grouping_name: group_by_country_and_state
  groupings:
    group_by_country_and_state:
      level_1:
        source: column_value
        column: country
      level_2:
        source: column_value
        column: state
  columns:
    target_column:
      recurring_checks:
        daily:
          datatype:
            daily_date_match_format_percent:
              parameters:
                date_formats: DD/MM/YYYY
              warning:
                min_percent: 100.0
              error:
                min_percent: 99.0
              fatal:
                min_percent: 95.0
      labels:
      - This is the column that is analyzed for data quality issues
    col_event_timestamp:
      labels:
      - optional column that stores the timestamp when the event/transaction happened
    col_inserted_at:
      labels:
      - optional column that stores the timestamp when row was ingested
    country:
      labels:
      - column used as the first grouping key
    state:
      labels:
      - column used as the second grouping key
BigQuery

{% import '/dialects/bigquery.sql.jinja2' as lib with context -%}

{% macro render_date_formats(date_formats) %}
    {%- if date_formats == 'DD/MM/YYYY'-%}
        "^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[/](0[1-9]|1[0-2])[/](\d{4})$"
    {%- elif date_formats == 'DD-MM-YYYY' -%}
        "^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[-](0[1-9]|1[0-2])[-](\d{4})$"
    {%- elif date_formats == 'DD.MM.YYYY' -%}
        "^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[.](0[1-9]|1[0-2])[.](\d{4})$"
    {%- elif date_formats == 'YYYY-MM-DD' -%}
        "^(\d{4})[-](0[1-9]|1[0-2])[-](0[1-9]|[1][0-9]|[2][0-9]|3[01])$"
    {%- endif -%}
{% endmacro -%}

SELECT
    CASE
        WHEN COUNT({{ lib.render_target_column('analyzed_table') }}) = 0 THEN NULL
        ELSE 100.0 * SUM(
            CASE
                WHEN REGEXP_CONTAINS(CAST({{ lib.render_target_column('analyzed_table') }} AS STRING), r{{render_date_formats(parameters.date_formats)}}) IS NOT FALSE
                    THEN 1
                ELSE 0
            END
        ) / COUNT(*)
    END AS actual_value
    {{- lib.render_data_grouping_projections('analyzed_table') }}
    {{- lib.render_time_dimension_projection('analyzed_table') }}
FROM {{ lib.render_target_table() }} AS analyzed_table
{{- lib.render_where_clause() -}}
{{- lib.render_group_by() -}}
{{- lib.render_order_by() -}}
SELECT
    CASE
        WHEN COUNT(analyzed_table.`target_column`) = 0 THEN NULL
        ELSE 100.0 * SUM(
            CASE
                WHEN REGEXP_CONTAINS(CAST(analyzed_table.`target_column` AS STRING), r"^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[/](0[1-9]|1[0-2])[/](\d{4})$") IS NOT FALSE
                    THEN 1
                ELSE 0
            END
        ) / COUNT(*)
    END AS actual_value,
    analyzed_table.`country` AS grouping_level_1,
    analyzed_table.`state` AS grouping_level_2,
    CAST(CURRENT_TIMESTAMP() AS DATE) AS time_period,
    TIMESTAMP(CAST(CURRENT_TIMESTAMP() AS DATE)) AS time_period_utc
FROM `your-google-project-id`.`<target_schema>`.`<target_table>` AS analyzed_table
GROUP BY grouping_level_1, grouping_level_2, time_period, time_period_utc
ORDER BY grouping_level_1, grouping_level_2, time_period, time_period_utc

MySQL

{% import '/dialects/mysql.sql.jinja2' as lib with context -%}

{% macro render_date_formats(date_formats) %}
    {%- if date_formats == 'DD/MM/YYYY'-%}
        '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[/](0[1-9]|1[0-2])[/](\d{4})$'
    {%- elif date_formats == 'DD-MM-YYYY' -%}
        '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[-](0[1-9]|1[0-2])[-](\d{4})$'
    {%- elif date_formats == 'DD.MM.YYYY' -%}
        '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[.](0[1-9]|1[0-2])[.](\d{4})$'
    {%- elif date_formats == 'YYYY-MM-DD' -%}
        '^([0-9]{4})[-](0[1-9]|1[0-2])[-](0[1-9]|[1][0-9]|[2][0-9]|3[01])$'
    {%- endif -%}
{% endmacro -%}

SELECT
    CASE
        WHEN COUNT({{ lib.render_target_column('analyzed_table') }}) = 0 THEN NULL
        ELSE 100.0 * SUM(CASE
              WHEN REGEXP_LIKE({{ lib.render_target_column('analyzed_table') }}, {{render_date_formats(parameters.date_formats)}})
                 THEN 1
              ELSE 0
            END
        ) / COUNT(*)
    END AS actual_value
    {{- lib.render_data_grouping_projections('analyzed_table') }}
    {{- lib.render_time_dimension_projection('analyzed_table') }}
FROM {{ lib.render_target_table() }} AS analyzed_table
{{- lib.render_where_clause() -}}
{{- lib.render_group_by() -}}
{{- lib.render_order_by() -}}
SELECT
    CASE
        WHEN COUNT(analyzed_table.`target_column`) = 0 THEN NULL
        ELSE 100.0 * SUM(CASE
              WHEN REGEXP_LIKE(analyzed_table.`target_column`, '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[/](0[1-9]|1[0-2])[/](\d{4})$')
                 THEN 1
              ELSE 0
            END
        ) / COUNT(*)
    END AS actual_value,
    analyzed_table.`country` AS grouping_level_1,
    analyzed_table.`state` AS grouping_level_2,
    DATE_FORMAT(LOCALTIMESTAMP, '%Y-%m-%d 00:00:00') AS time_period,
    FROM_UNIXTIME(UNIX_TIMESTAMP(DATE_FORMAT(LOCALTIMESTAMP, '%Y-%m-%d 00:00:00'))) AS time_period_utc
FROM `<target_table>` AS analyzed_table
GROUP BY grouping_level_1, grouping_level_2, time_period, time_period_utc
ORDER BY grouping_level_1, grouping_level_2, time_period, time_period_utc

Oracle

{% import '/dialects/oracle.sql.jinja2' as lib with context -%}

  {% macro render_date_formats(date_formats) %}
  {%- if date_formats == 'DD/MM/YYYY'-%}
  '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[/](0[1-9]|1[0-2])[/](\d{4})$'
  {%- elif date_formats == 'DD-MM-YYYY' -%}
  '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[-](0[1-9]|1[0-2])[-](\d{4})$'
  {%- elif date_formats == 'DD.MM.YYYY' -%}
  '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[.](0[1-9]|1[0-2])[.](\d{4})$'
  {%- elif date_formats == 'YYYY-MM-DD' -%}
  '^([0-9]{4})[-](0[1-9]|1[0-2])[-](0[1-9]|[1][0-9]|[2][0-9]|3[01])$'
  {%- endif -%}
  {% endmacro -%}

SELECT
    CASE
        WHEN COUNT({{ lib.render_target_column('analyzed_table') }}) = 0 THEN NULL
        ELSE 100.0 * SUM(CASE
              WHEN REGEXP_LIKE({{ lib.render_target_column('analyzed_table') }}, {{render_date_formats(parameters.date_formats)}})
                 THEN 1
              ELSE 0
            END
        ) / COUNT(*)
    END AS actual_value
    {{- lib.render_data_grouping_projections_reference('analyzed_table') }}
    {{- lib.render_time_dimension_projection_reference('analyzed_table') }}
FROM (
    SELECT
        original_table.*
        {{- lib.render_data_grouping_projections('original_table') }}
        {{- lib.render_time_dimension_projection('original_table') }}
    FROM {{ lib.render_target_table() }} original_table
    {{- lib.render_where_clause(table_alias_prefix='original_table') }}
) analyzed_table
{{- lib.render_group_by() -}}
{{- lib.render_order_by() -}}
SELECT
    CASE
        WHEN COUNT(analyzed_table."target_column") = 0 THEN NULL
        ELSE 100.0 * SUM(CASE
              WHEN REGEXP_LIKE(analyzed_table."target_column", '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[/](0[1-9]|1[0-2])[/](\d{4})$')
                 THEN 1
              ELSE 0
            END
        ) / COUNT(*)
    END AS actual_value,

                analyzed_table.grouping_level_1,

                analyzed_table.grouping_level_2
,
    time_period,
    time_period_utc
FROM (
    SELECT
        original_table.*,
    original_table."country" AS grouping_level_1,
    original_table."state" AS grouping_level_2,
    TRUNC(CAST(CURRENT_TIMESTAMP AS DATE)) AS time_period,
    CAST(TRUNC(CAST(CURRENT_TIMESTAMP AS DATE)) AS TIMESTAMP WITH TIME ZONE) AS time_period_utc
    FROM "<target_schema>"."<target_table>" original_table
) analyzed_table
GROUP BY grouping_level_1, grouping_level_2, time_period, time_period_utc
ORDER BY grouping_level_1, grouping_level_2, time_period, time_period_utc

PostgreSQL

{% import '/dialects/postgresql.sql.jinja2' as lib with context -%}

{% macro render_date_formats(date_formats) %}
    {%- if date_formats == 'DD/MM/YYYY'-%}
        '^([0][1-9]|[1-2][0-9]|[3][0-1])/(0[1-9]|1[0-2])/([0-9]{4})$'
    {%- elif date_formats == 'DD-MM-YYYY' -%}
        '^([0][1-9]|[1-2][0-9]|[3][0-1])-(0[1-9]|1[0-2])-(\d{4})$'
    {%- elif date_formats == 'DD.MM.YYYY' -%}
        '^([0][1-9]|[1-2][0-9]|[3][0-1]).(0[1-9]|1[0-2]).(\d{4})$'
    {%- elif date_formats == 'YYYY-MM-DD' -%}
        '^(\d{4})-(0[1-9]|1[0-2])-(0[1-9]|[1-2][0-9]|3[0-1])$'
    {%- endif -%}
{% endmacro -%}

SELECT
    CASE
        WHEN COUNT({{ lib.render_target_column('analyzed_table') }}) = 0 THEN NULL
        ELSE 100.0 * SUM(
            CASE
                WHEN CAST({{lib.render_target_column('analyzed_table')}} AS VARCHAR) ~ {{render_date_formats(parameters.date_formats)}} IS NOT NULL
                    THEN 1
                ELSE 0
            END
        ) / COUNT(*)
    END AS actual_value
    {{- lib.render_data_grouping_projections('analyzed_table') }}
    {{- lib.render_time_dimension_projection('analyzed_table') }}
FROM {{ lib.render_target_table() }} AS analyzed_table
{{- lib.render_where_clause() -}}
{{- lib.render_group_by() -}}
{{- lib.render_order_by() -}}
SELECT
    CASE
        WHEN COUNT(analyzed_table."target_column") = 0 THEN NULL
        ELSE 100.0 * SUM(
            CASE
                WHEN CAST(analyzed_table."target_column" AS VARCHAR) ~ '^([0][1-9]|[1-2][0-9]|[3][0-1])/(0[1-9]|1[0-2])/([0-9]{4})$' IS NOT NULL
                    THEN 1
                ELSE 0
            END
        ) / COUNT(*)
    END AS actual_value,
    analyzed_table."country" AS grouping_level_1,
    analyzed_table."state" AS grouping_level_2,
    CAST(LOCALTIMESTAMP AS date) AS time_period,
    CAST((CAST(LOCALTIMESTAMP AS date)) AS TIMESTAMP WITH TIME ZONE) AS time_period_utc
FROM "your_postgresql_database"."<target_schema>"."<target_table>" AS analyzed_table
GROUP BY grouping_level_1, grouping_level_2, time_period, time_period_utc
ORDER BY grouping_level_1, grouping_level_2, time_period, time_period_utc

Redshift

{% import '/dialects/redshift.sql.jinja2' as lib with context -%}

{% macro render_date_formats(date_formats) %}
    {%- if date_formats == 'DD/MM/YYYY'-%}
        '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[/](0[1-9]|1[0-2])[/]([1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9])$'
    {%- elif date_formats == 'DD-MM-YYYY' -%}
        '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[-](0[1-9]|1[0-2])[-]([1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9])$'
    {%- elif date_formats == 'DD.MM.YYYY' -%}
        '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[.](0[1-9]|1[0-2])[.]([1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9])$'
    {%- elif date_formats == 'YYYY-MM-DD' -%}
        '^([1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9])[-](0[1-9]|1[0-2])[-](0[1-9]|[1][0-9]|[2][0-9]|3[01])$'
    {%- endif -%}
{% endmacro -%}


SELECT
    CASE
        WHEN COUNT({{ lib.render_target_column('analyzed_table') }}) = 0 THEN NULL
        ELSE 100.0 * SUM(
            CASE
                WHEN {{lib.render_target_column('analyzed_table')}} ~ {{render_date_formats(parameters.date_formats)}})
                    THEN 1
                ELSE 0
            END
        ) / COUNT(*)
    END AS actual_value
    {{- lib.render_data_grouping_projections('analyzed_table') }}
    {{- lib.render_time_dimension_projection('analyzed_table') }}
FROM {{ lib.render_target_table() }} AS analyzed_table
{{- lib.render_where_clause() -}}
{{- lib.render_group_by() -}}
{{- lib.render_order_by() -}}
SELECT
    CASE
        WHEN COUNT(analyzed_table."target_column") = 0 THEN NULL
        ELSE 100.0 * SUM(
            CASE
                WHEN analyzed_table."target_column" ~ '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[/](0[1-9]|1[0-2])[/]([1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9])$')
                    THEN 1
                ELSE 0
            END
        ) / COUNT(*)
    END AS actual_value,
    analyzed_table."country" AS grouping_level_1,
    analyzed_table."state" AS grouping_level_2,
    CAST(LOCALTIMESTAMP AS date) AS time_period,
    CAST((CAST(LOCALTIMESTAMP AS date)) AS TIMESTAMP WITH TIME ZONE) AS time_period_utc
FROM "your_redshift_database"."<target_schema>"."<target_table>" AS analyzed_table
GROUP BY grouping_level_1, grouping_level_2, time_period, time_period_utc
ORDER BY grouping_level_1, grouping_level_2, time_period, time_period_utc

Snowflake

{% import '/dialects/snowflake.sql.jinja2' as lib with context -%}

{% macro render_date_formats(date_formats) %}
    {%- if date_formats == 'DD/MM/YYYY'-%}
        '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[/](0[1-9]|1[0-2])[/]([1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9])$'
    {%- elif date_formats == 'DD-MM-YYYY' -%}
        '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[-](0[1-9]|1[0-2])[-]([1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9])$'
    {%- elif date_formats == 'DD.MM.YYYY' -%}
        '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[.](0[1-9]|1[0-2])[.]([1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9])$'
    {%- elif date_formats == 'YYYY-MM-DD' -%}
        '^([1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9])[-](0[1-9]|1[0-2])[-](0[1-9]|[1][0-9]|[2][0-9]|3[01])$'
    {%- endif -%}
{% endmacro -%}


SELECT
    CASE
        WHEN COUNT({{ lib.render_target_column('analyzed_table') }}) = 0 THEN NULL
        ELSE 100.0 * SUM(
            CASE
                WHEN REGEXP_LIKE({{ lib.render_target_column('analyzed_table') }}, {{render_date_formats(parameters.date_formats)}})
                    THEN 1
                ELSE 0
            END
        ) / COUNT(*)
    END AS actual_value
    {{- lib.render_data_grouping_projections('analyzed_table') }}
    {{- lib.render_time_dimension_projection('analyzed_table') }}
FROM {{ lib.render_target_table() }} AS analyzed_table
{{- lib.render_where_clause() -}}
{{- lib.render_group_by() -}}
{{- lib.render_order_by() -}}
SELECT
    CASE
        WHEN COUNT(analyzed_table."target_column") = 0 THEN NULL
        ELSE 100.0 * SUM(
            CASE
                WHEN REGEXP_LIKE(analyzed_table."target_column", '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[/](0[1-9]|1[0-2])[/]([1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9])$')
                    THEN 1
                ELSE 0
            END
        ) / COUNT(*)
    END AS actual_value,
    analyzed_table."country" AS grouping_level_1,
    analyzed_table."state" AS grouping_level_2,
    CAST(TO_TIMESTAMP_NTZ(LOCALTIMESTAMP()) AS date) AS time_period,
    TO_TIMESTAMP(CAST(TO_TIMESTAMP_NTZ(LOCALTIMESTAMP()) AS date)) AS time_period_utc
FROM "your_snowflake_database"."<target_schema>"."<target_table>" AS analyzed_table
GROUP BY grouping_level_1, grouping_level_2, time_period, time_period_utc
ORDER BY grouping_level_1, grouping_level_2, time_period, time_period_utc

SQL Server

{% import '/dialects/sqlserver.sql.jinja2' as lib with context -%}

{% macro render_date_formats(date_formats) %}
    {%- if date_formats == 'DD/MM/YYYY'-%}
        '^(0[1-9]|[1-2][0-9]|3[0-1])/(0[1-9]|1[0-2])/([1-9][0-9]{3})$'
    {%- elif date_formats == 'DD-MM-YYYY' -%}
        '^(0[1-9]|[1-2][0-9]|3[0-1])-(0[1-9]|1[0-2])-([1-9][0-9]{3})$'
    {%- elif date_formats == 'DD.MM.YYYY' -%}
        '^(0[1-9]|[1-2][0-9]|3[0-1])\.(0[1-9]|1[0-2])\.([1-9][0-9]{3})$'
    {%- elif date_formats == 'YYYY-MM-DD' -%}
        '^([1-9][0-9]{3})-(0[1-9]|1[0-2])-(0[1-9]|[1-2][0-9]|3[0-1])$'
    {%- endif -%}
{% endmacro %}

SELECT
    CASE
        WHEN COUNT({{ lib.render_target_column('analyzed_table') }}) = 0 THEN NULL
        ELSE 100.0 * SUM(
            CASE
                WHEN {{ lib.render_target_column('analyzed_table') }} LIKE {{render_date_formats(parameters.date_formats)}} ESCAPE '~'
                    THEN 1
                ELSE 0
            END
        ) / COUNT(*)
    END AS actual_value
    {{- lib.render_data_grouping_projections('analyzed_table') }}
    {{- lib.render_time_dimension_projection('analyzed_table') }}
FROM {{ lib.render_target_table() }} AS analyzed_table
{{- lib.render_where_clause() -}}
{{- lib.render_group_by() -}}
{{- lib.render_order_by() -}}
SELECT
    CASE
        WHEN COUNT(analyzed_table.[target_column]) = 0 THEN NULL
        ELSE 100.0 * SUM(
            CASE
                WHEN analyzed_table.[target_column] LIKE '^(0[1-9]|[1-2][0-9]|3[0-1])/(0[1-9]|1[0-2])/([1-9][0-9]{3})$' ESCAPE '~'
                    THEN 1
                ELSE 0
            END
        ) / COUNT(*)
    END AS actual_value,
    analyzed_table.[country] AS grouping_level_1,
    analyzed_table.[state] AS grouping_level_2,
    CAST(SYSDATETIMEOFFSET() AS date) AS time_period,
    CAST((CAST(SYSDATETIMEOFFSET() AS date)) AS DATETIME) AS time_period_utc
FROM [your_sql_server_database].[<target_schema>].[<target_table>] AS analyzed_table
GROUP BY analyzed_table.[country], analyzed_table.[state]
ORDER BY level_1, level_2
        , 

monthly date match format percent

Check description
Verifies that the percentage of date values matching the given format in a column does not exceed the maximum accepted percentage. Creates a separate data quality check (and an alert) for each monthly recurring.

Check name Check type Time scale Sensor definition Quality rule
monthly_date_match_format_percent recurring monthly date_match_format_percent min_percent

Enable check (Shell)
To enable this check provide connection name and check name in check enable command

dqo> check enable -c=connection_name -ch=monthly_date_match_format_percent
Run check (Shell)
To run this check provide check name in check run command
dqo> check run -ch=monthly_date_match_format_percent
It is also possible to run this check on a specific connection. In order to do this, add the connection name to the below
dqo> check run -c=connection_name -ch=monthly_date_match_format_percent
It is additionally feasible to run this check on a specific table. In order to do this, add the table name to the below
dqo> check run -c=connection_name -t=table_name -ch=monthly_date_match_format_percent
It is furthermore viable to combine run this check on a specific column. In order to do this, add the column name to the below
dqo> check run -c=connection_name -t=table_name -col=column_name -ch=monthly_date_match_format_percent
Check structure (Yaml)
      recurring_checks:
        monthly:
          datatype:
            monthly_date_match_format_percent:
              parameters:
                date_formats: DD/MM/YYYY
              warning:
                min_percent: 100.0
              error:
                min_percent: 99.0
              fatal:
                min_percent: 95.0
Sample configuration (Yaml)
# yaml-language-server: $schema=https://cloud.dqo.ai/dqo-yaml-schema/TableYaml-schema.json
apiVersion: dqo/v1
kind: table
spec:
  timestamp_columns:
    event_timestamp_column: col_event_timestamp
    ingestion_timestamp_column: col_inserted_at
  incremental_time_window:
    daily_partitioning_recent_days: 7
    monthly_partitioning_recent_months: 1
  columns:
    target_column:
      recurring_checks:
        monthly:
          datatype:
            monthly_date_match_format_percent:
              parameters:
                date_formats: DD/MM/YYYY
              warning:
                min_percent: 100.0
              error:
                min_percent: 99.0
              fatal:
                min_percent: 95.0
      labels:
      - This is the column that is analyzed for data quality issues
    col_event_timestamp:
      labels:
      - optional column that stores the timestamp when the event/transaction happened
    col_inserted_at:
      labels:
      - optional column that stores the timestamp when row was ingested

BigQuery

{% import '/dialects/bigquery.sql.jinja2' as lib with context -%}

{% macro render_date_formats(date_formats) %}
    {%- if date_formats == 'DD/MM/YYYY'-%}
        "^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[/](0[1-9]|1[0-2])[/](\d{4})$"
    {%- elif date_formats == 'DD-MM-YYYY' -%}
        "^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[-](0[1-9]|1[0-2])[-](\d{4})$"
    {%- elif date_formats == 'DD.MM.YYYY' -%}
        "^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[.](0[1-9]|1[0-2])[.](\d{4})$"
    {%- elif date_formats == 'YYYY-MM-DD' -%}
        "^(\d{4})[-](0[1-9]|1[0-2])[-](0[1-9]|[1][0-9]|[2][0-9]|3[01])$"
    {%- endif -%}
{% endmacro -%}

SELECT
    CASE
        WHEN COUNT({{ lib.render_target_column('analyzed_table') }}) = 0 THEN NULL
        ELSE 100.0 * SUM(
            CASE
                WHEN REGEXP_CONTAINS(CAST({{ lib.render_target_column('analyzed_table') }} AS STRING), r{{render_date_formats(parameters.date_formats)}}) IS NOT FALSE
                    THEN 1
                ELSE 0
            END
        ) / COUNT(*)
    END AS actual_value
    {{- lib.render_data_grouping_projections('analyzed_table') }}
    {{- lib.render_time_dimension_projection('analyzed_table') }}
FROM {{ lib.render_target_table() }} AS analyzed_table
{{- lib.render_where_clause() -}}
{{- lib.render_group_by() -}}
{{- lib.render_order_by() -}}
SELECT
    CASE
        WHEN COUNT(analyzed_table.`target_column`) = 0 THEN NULL
        ELSE 100.0 * SUM(
            CASE
                WHEN REGEXP_CONTAINS(CAST(analyzed_table.`target_column` AS STRING), r"^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[/](0[1-9]|1[0-2])[/](\d{4})$") IS NOT FALSE
                    THEN 1
                ELSE 0
            END
        ) / COUNT(*)
    END AS actual_value,
    DATE_TRUNC(CAST(CURRENT_TIMESTAMP() AS DATE), MONTH) AS time_period,
    TIMESTAMP(DATE_TRUNC(CAST(CURRENT_TIMESTAMP() AS DATE), MONTH)) AS time_period_utc
FROM `your-google-project-id`.`<target_schema>`.`<target_table>` AS analyzed_table
GROUP BY time_period, time_period_utc
ORDER BY time_period, time_period_utc

MySQL

{% import '/dialects/mysql.sql.jinja2' as lib with context -%}

{% macro render_date_formats(date_formats) %}
    {%- if date_formats == 'DD/MM/YYYY'-%}
        '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[/](0[1-9]|1[0-2])[/](\d{4})$'
    {%- elif date_formats == 'DD-MM-YYYY' -%}
        '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[-](0[1-9]|1[0-2])[-](\d{4})$'
    {%- elif date_formats == 'DD.MM.YYYY' -%}
        '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[.](0[1-9]|1[0-2])[.](\d{4})$'
    {%- elif date_formats == 'YYYY-MM-DD' -%}
        '^([0-9]{4})[-](0[1-9]|1[0-2])[-](0[1-9]|[1][0-9]|[2][0-9]|3[01])$'
    {%- endif -%}
{% endmacro -%}

SELECT
    CASE
        WHEN COUNT({{ lib.render_target_column('analyzed_table') }}) = 0 THEN NULL
        ELSE 100.0 * SUM(CASE
              WHEN REGEXP_LIKE({{ lib.render_target_column('analyzed_table') }}, {{render_date_formats(parameters.date_formats)}})
                 THEN 1
              ELSE 0
            END
        ) / COUNT(*)
    END AS actual_value
    {{- lib.render_data_grouping_projections('analyzed_table') }}
    {{- lib.render_time_dimension_projection('analyzed_table') }}
FROM {{ lib.render_target_table() }} AS analyzed_table
{{- lib.render_where_clause() -}}
{{- lib.render_group_by() -}}
{{- lib.render_order_by() -}}
SELECT
    CASE
        WHEN COUNT(analyzed_table.`target_column`) = 0 THEN NULL
        ELSE 100.0 * SUM(CASE
              WHEN REGEXP_LIKE(analyzed_table.`target_column`, '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[/](0[1-9]|1[0-2])[/](\d{4})$')
                 THEN 1
              ELSE 0
            END
        ) / COUNT(*)
    END AS actual_value,
    DATE_FORMAT(LOCALTIMESTAMP, '%Y-%m-01 00:00:00') AS time_period,
    FROM_UNIXTIME(UNIX_TIMESTAMP(DATE_FORMAT(LOCALTIMESTAMP, '%Y-%m-01 00:00:00'))) AS time_period_utc
FROM `<target_table>` AS analyzed_table
GROUP BY time_period, time_period_utc
ORDER BY time_period, time_period_utc

Oracle

{% import '/dialects/oracle.sql.jinja2' as lib with context -%}

  {% macro render_date_formats(date_formats) %}
  {%- if date_formats == 'DD/MM/YYYY'-%}
  '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[/](0[1-9]|1[0-2])[/](\d{4})$'
  {%- elif date_formats == 'DD-MM-YYYY' -%}
  '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[-](0[1-9]|1[0-2])[-](\d{4})$'
  {%- elif date_formats == 'DD.MM.YYYY' -%}
  '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[.](0[1-9]|1[0-2])[.](\d{4})$'
  {%- elif date_formats == 'YYYY-MM-DD' -%}
  '^([0-9]{4})[-](0[1-9]|1[0-2])[-](0[1-9]|[1][0-9]|[2][0-9]|3[01])$'
  {%- endif -%}
  {% endmacro -%}

SELECT
    CASE
        WHEN COUNT({{ lib.render_target_column('analyzed_table') }}) = 0 THEN NULL
        ELSE 100.0 * SUM(CASE
              WHEN REGEXP_LIKE({{ lib.render_target_column('analyzed_table') }}, {{render_date_formats(parameters.date_formats)}})
                 THEN 1
              ELSE 0
            END
        ) / COUNT(*)
    END AS actual_value
    {{- lib.render_data_grouping_projections_reference('analyzed_table') }}
    {{- lib.render_time_dimension_projection_reference('analyzed_table') }}
FROM (
    SELECT
        original_table.*
        {{- lib.render_data_grouping_projections('original_table') }}
        {{- lib.render_time_dimension_projection('original_table') }}
    FROM {{ lib.render_target_table() }} original_table
    {{- lib.render_where_clause(table_alias_prefix='original_table') }}
) analyzed_table
{{- lib.render_group_by() -}}
{{- lib.render_order_by() -}}
SELECT
    CASE
        WHEN COUNT(analyzed_table."target_column") = 0 THEN NULL
        ELSE 100.0 * SUM(CASE
              WHEN REGEXP_LIKE(analyzed_table."target_column", '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[/](0[1-9]|1[0-2])[/](\d{4})$')
                 THEN 1
              ELSE 0
            END
        ) / COUNT(*)
    END AS actual_value,
    time_period,
    time_period_utc
FROM (
    SELECT
        original_table.*,
    TRUNC(CAST(CURRENT_TIMESTAMP AS DATE), 'MONTH') AS time_period,
    CAST(TRUNC(CAST(CURRENT_TIMESTAMP AS DATE), 'MONTH') AS TIMESTAMP WITH TIME ZONE) AS time_period_utc
    FROM "<target_schema>"."<target_table>" original_table
) analyzed_table
GROUP BY time_period, time_period_utc
ORDER BY time_period, time_period_utc

PostgreSQL

{% import '/dialects/postgresql.sql.jinja2' as lib with context -%}

{% macro render_date_formats(date_formats) %}
    {%- if date_formats == 'DD/MM/YYYY'-%}
        '^([0][1-9]|[1-2][0-9]|[3][0-1])/(0[1-9]|1[0-2])/([0-9]{4})$'
    {%- elif date_formats == 'DD-MM-YYYY' -%}
        '^([0][1-9]|[1-2][0-9]|[3][0-1])-(0[1-9]|1[0-2])-(\d{4})$'
    {%- elif date_formats == 'DD.MM.YYYY' -%}
        '^([0][1-9]|[1-2][0-9]|[3][0-1]).(0[1-9]|1[0-2]).(\d{4})$'
    {%- elif date_formats == 'YYYY-MM-DD' -%}
        '^(\d{4})-(0[1-9]|1[0-2])-(0[1-9]|[1-2][0-9]|3[0-1])$'
    {%- endif -%}
{% endmacro -%}

SELECT
    CASE
        WHEN COUNT({{ lib.render_target_column('analyzed_table') }}) = 0 THEN NULL
        ELSE 100.0 * SUM(
            CASE
                WHEN CAST({{lib.render_target_column('analyzed_table')}} AS VARCHAR) ~ {{render_date_formats(parameters.date_formats)}} IS NOT NULL
                    THEN 1
                ELSE 0
            END
        ) / COUNT(*)
    END AS actual_value
    {{- lib.render_data_grouping_projections('analyzed_table') }}
    {{- lib.render_time_dimension_projection('analyzed_table') }}
FROM {{ lib.render_target_table() }} AS analyzed_table
{{- lib.render_where_clause() -}}
{{- lib.render_group_by() -}}
{{- lib.render_order_by() -}}
SELECT
    CASE
        WHEN COUNT(analyzed_table."target_column") = 0 THEN NULL
        ELSE 100.0 * SUM(
            CASE
                WHEN CAST(analyzed_table."target_column" AS VARCHAR) ~ '^([0][1-9]|[1-2][0-9]|[3][0-1])/(0[1-9]|1[0-2])/([0-9]{4})$' IS NOT NULL
                    THEN 1
                ELSE 0
            END
        ) / COUNT(*)
    END AS actual_value,
    DATE_TRUNC('MONTH', CAST(LOCALTIMESTAMP AS date)) AS time_period,
    CAST((DATE_TRUNC('MONTH', CAST(LOCALTIMESTAMP AS date))) AS TIMESTAMP WITH TIME ZONE) AS time_period_utc
FROM "your_postgresql_database"."<target_schema>"."<target_table>" AS analyzed_table
GROUP BY time_period, time_period_utc
ORDER BY time_period, time_period_utc

Redshift

{% import '/dialects/redshift.sql.jinja2' as lib with context -%}

{% macro render_date_formats(date_formats) %}
    {%- if date_formats == 'DD/MM/YYYY'-%}
        '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[/](0[1-9]|1[0-2])[/]([1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9])$'
    {%- elif date_formats == 'DD-MM-YYYY' -%}
        '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[-](0[1-9]|1[0-2])[-]([1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9])$'
    {%- elif date_formats == 'DD.MM.YYYY' -%}
        '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[.](0[1-9]|1[0-2])[.]([1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9])$'
    {%- elif date_formats == 'YYYY-MM-DD' -%}
        '^([1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9])[-](0[1-9]|1[0-2])[-](0[1-9]|[1][0-9]|[2][0-9]|3[01])$'
    {%- endif -%}
{% endmacro -%}


SELECT
    CASE
        WHEN COUNT({{ lib.render_target_column('analyzed_table') }}) = 0 THEN NULL
        ELSE 100.0 * SUM(
            CASE
                WHEN {{lib.render_target_column('analyzed_table')}} ~ {{render_date_formats(parameters.date_formats)}})
                    THEN 1
                ELSE 0
            END
        ) / COUNT(*)
    END AS actual_value
    {{- lib.render_data_grouping_projections('analyzed_table') }}
    {{- lib.render_time_dimension_projection('analyzed_table') }}
FROM {{ lib.render_target_table() }} AS analyzed_table
{{- lib.render_where_clause() -}}
{{- lib.render_group_by() -}}
{{- lib.render_order_by() -}}
SELECT
    CASE
        WHEN COUNT(analyzed_table."target_column") = 0 THEN NULL
        ELSE 100.0 * SUM(
            CASE
                WHEN analyzed_table."target_column" ~ '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[/](0[1-9]|1[0-2])[/]([1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9])$')
                    THEN 1
                ELSE 0
            END
        ) / COUNT(*)
    END AS actual_value,
    DATE_TRUNC('MONTH', CAST(LOCALTIMESTAMP AS date)) AS time_period,
    CAST((DATE_TRUNC('MONTH', CAST(LOCALTIMESTAMP AS date))) AS TIMESTAMP WITH TIME ZONE) AS time_period_utc
FROM "your_redshift_database"."<target_schema>"."<target_table>" AS analyzed_table
GROUP BY time_period, time_period_utc
ORDER BY time_period, time_period_utc

Snowflake

{% import '/dialects/snowflake.sql.jinja2' as lib with context -%}

{% macro render_date_formats(date_formats) %}
    {%- if date_formats == 'DD/MM/YYYY'-%}
        '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[/](0[1-9]|1[0-2])[/]([1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9])$'
    {%- elif date_formats == 'DD-MM-YYYY' -%}
        '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[-](0[1-9]|1[0-2])[-]([1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9])$'
    {%- elif date_formats == 'DD.MM.YYYY' -%}
        '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[.](0[1-9]|1[0-2])[.]([1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9])$'
    {%- elif date_formats == 'YYYY-MM-DD' -%}
        '^([1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9])[-](0[1-9]|1[0-2])[-](0[1-9]|[1][0-9]|[2][0-9]|3[01])$'
    {%- endif -%}
{% endmacro -%}


SELECT
    CASE
        WHEN COUNT({{ lib.render_target_column('analyzed_table') }}) = 0 THEN NULL
        ELSE 100.0 * SUM(
            CASE
                WHEN REGEXP_LIKE({{ lib.render_target_column('analyzed_table') }}, {{render_date_formats(parameters.date_formats)}})
                    THEN 1
                ELSE 0
            END
        ) / COUNT(*)
    END AS actual_value
    {{- lib.render_data_grouping_projections('analyzed_table') }}
    {{- lib.render_time_dimension_projection('analyzed_table') }}
FROM {{ lib.render_target_table() }} AS analyzed_table
{{- lib.render_where_clause() -}}
{{- lib.render_group_by() -}}
{{- lib.render_order_by() -}}
SELECT
    CASE
        WHEN COUNT(analyzed_table."target_column") = 0 THEN NULL
        ELSE 100.0 * SUM(
            CASE
                WHEN REGEXP_LIKE(analyzed_table."target_column", '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[/](0[1-9]|1[0-2])[/]([1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9])$')
                    THEN 1
                ELSE 0
            END
        ) / COUNT(*)
    END AS actual_value,
    DATE_TRUNC('MONTH', CAST(TO_TIMESTAMP_NTZ(LOCALTIMESTAMP()) AS date)) AS time_period,
    TO_TIMESTAMP(DATE_TRUNC('MONTH', CAST(TO_TIMESTAMP_NTZ(LOCALTIMESTAMP()) AS date))) AS time_period_utc
FROM "your_snowflake_database"."<target_schema>"."<target_table>" AS analyzed_table
GROUP BY time_period, time_period_utc
ORDER BY time_period, time_period_utc

SQL Server

{% import '/dialects/sqlserver.sql.jinja2' as lib with context -%}

{% macro render_date_formats(date_formats) %}
    {%- if date_formats == 'DD/MM/YYYY'-%}
        '^(0[1-9]|[1-2][0-9]|3[0-1])/(0[1-9]|1[0-2])/([1-9][0-9]{3})$'
    {%- elif date_formats == 'DD-MM-YYYY' -%}
        '^(0[1-9]|[1-2][0-9]|3[0-1])-(0[1-9]|1[0-2])-([1-9][0-9]{3})$'
    {%- elif date_formats == 'DD.MM.YYYY' -%}
        '^(0[1-9]|[1-2][0-9]|3[0-1])\.(0[1-9]|1[0-2])\.([1-9][0-9]{3})$'
    {%- elif date_formats == 'YYYY-MM-DD' -%}
        '^([1-9][0-9]{3})-(0[1-9]|1[0-2])-(0[1-9]|[1-2][0-9]|3[0-1])$'
    {%- endif -%}
{% endmacro %}

SELECT
    CASE
        WHEN COUNT({{ lib.render_target_column('analyzed_table') }}) = 0 THEN NULL
        ELSE 100.0 * SUM(
            CASE
                WHEN {{ lib.render_target_column('analyzed_table') }} LIKE {{render_date_formats(parameters.date_formats)}} ESCAPE '~'
                    THEN 1
                ELSE 0
            END
        ) / COUNT(*)
    END AS actual_value
    {{- lib.render_data_grouping_projections('analyzed_table') }}
    {{- lib.render_time_dimension_projection('analyzed_table') }}
FROM {{ lib.render_target_table() }} AS analyzed_table
{{- lib.render_where_clause() -}}
{{- lib.render_group_by() -}}
{{- lib.render_order_by() -}}
SELECT
    CASE
        WHEN COUNT(analyzed_table.[target_column]) = 0 THEN NULL
        ELSE 100.0 * SUM(
            CASE
                WHEN analyzed_table.[target_column] LIKE '^(0[1-9]|[1-2][0-9]|3[0-1])/(0[1-9]|1[0-2])/([1-9][0-9]{3})$' ESCAPE '~'
                    THEN 1
                ELSE 0
            END
        ) / COUNT(*)
    END AS actual_value,
    DATEADD(month, DATEDIFF(month, 0, SYSDATETIMEOFFSET()), 0) AS time_period,
    CAST((DATEADD(month, DATEDIFF(month, 0, SYSDATETIMEOFFSET()), 0)) AS DATETIME) AS time_period_utc
FROM [your_sql_server_database].[<target_schema>].[<target_table>] AS analyzed_table

Configuration with data grouping

Click to see more

Sample configuration (Yaml)

# yaml-language-server: $schema=https://cloud.dqo.ai/dqo-yaml-schema/TableYaml-schema.json
apiVersion: dqo/v1
kind: table
spec:
  timestamp_columns:
    event_timestamp_column: col_event_timestamp
    ingestion_timestamp_column: col_inserted_at
  incremental_time_window:
    daily_partitioning_recent_days: 7
    monthly_partitioning_recent_months: 1
  default_grouping_name: group_by_country_and_state
  groupings:
    group_by_country_and_state:
      level_1:
        source: column_value
        column: country
      level_2:
        source: column_value
        column: state
  columns:
    target_column:
      recurring_checks:
        monthly:
          datatype:
            monthly_date_match_format_percent:
              parameters:
                date_formats: DD/MM/YYYY
              warning:
                min_percent: 100.0
              error:
                min_percent: 99.0
              fatal:
                min_percent: 95.0
      labels:
      - This is the column that is analyzed for data quality issues
    col_event_timestamp:
      labels:
      - optional column that stores the timestamp when the event/transaction happened
    col_inserted_at:
      labels:
      - optional column that stores the timestamp when row was ingested
    country:
      labels:
      - column used as the first grouping key
    state:
      labels:
      - column used as the second grouping key
BigQuery

{% import '/dialects/bigquery.sql.jinja2' as lib with context -%}

{% macro render_date_formats(date_formats) %}
    {%- if date_formats == 'DD/MM/YYYY'-%}
        "^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[/](0[1-9]|1[0-2])[/](\d{4})$"
    {%- elif date_formats == 'DD-MM-YYYY' -%}
        "^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[-](0[1-9]|1[0-2])[-](\d{4})$"
    {%- elif date_formats == 'DD.MM.YYYY' -%}
        "^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[.](0[1-9]|1[0-2])[.](\d{4})$"
    {%- elif date_formats == 'YYYY-MM-DD' -%}
        "^(\d{4})[-](0[1-9]|1[0-2])[-](0[1-9]|[1][0-9]|[2][0-9]|3[01])$"
    {%- endif -%}
{% endmacro -%}

SELECT
    CASE
        WHEN COUNT({{ lib.render_target_column('analyzed_table') }}) = 0 THEN NULL
        ELSE 100.0 * SUM(
            CASE
                WHEN REGEXP_CONTAINS(CAST({{ lib.render_target_column('analyzed_table') }} AS STRING), r{{render_date_formats(parameters.date_formats)}}) IS NOT FALSE
                    THEN 1
                ELSE 0
            END
        ) / COUNT(*)
    END AS actual_value
    {{- lib.render_data_grouping_projections('analyzed_table') }}
    {{- lib.render_time_dimension_projection('analyzed_table') }}
FROM {{ lib.render_target_table() }} AS analyzed_table
{{- lib.render_where_clause() -}}
{{- lib.render_group_by() -}}
{{- lib.render_order_by() -}}
SELECT
    CASE
        WHEN COUNT(analyzed_table.`target_column`) = 0 THEN NULL
        ELSE 100.0 * SUM(
            CASE
                WHEN REGEXP_CONTAINS(CAST(analyzed_table.`target_column` AS STRING), r"^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[/](0[1-9]|1[0-2])[/](\d{4})$") IS NOT FALSE
                    THEN 1
                ELSE 0
            END
        ) / COUNT(*)
    END AS actual_value,
    analyzed_table.`country` AS grouping_level_1,
    analyzed_table.`state` AS grouping_level_2,
    DATE_TRUNC(CAST(CURRENT_TIMESTAMP() AS DATE), MONTH) AS time_period,
    TIMESTAMP(DATE_TRUNC(CAST(CURRENT_TIMESTAMP() AS DATE), MONTH)) AS time_period_utc
FROM `your-google-project-id`.`<target_schema>`.`<target_table>` AS analyzed_table
GROUP BY grouping_level_1, grouping_level_2, time_period, time_period_utc
ORDER BY grouping_level_1, grouping_level_2, time_period, time_period_utc

MySQL

{% import '/dialects/mysql.sql.jinja2' as lib with context -%}

{% macro render_date_formats(date_formats) %}
    {%- if date_formats == 'DD/MM/YYYY'-%}
        '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[/](0[1-9]|1[0-2])[/](\d{4})$'
    {%- elif date_formats == 'DD-MM-YYYY' -%}
        '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[-](0[1-9]|1[0-2])[-](\d{4})$'
    {%- elif date_formats == 'DD.MM.YYYY' -%}
        '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[.](0[1-9]|1[0-2])[.](\d{4})$'
    {%- elif date_formats == 'YYYY-MM-DD' -%}
        '^([0-9]{4})[-](0[1-9]|1[0-2])[-](0[1-9]|[1][0-9]|[2][0-9]|3[01])$'
    {%- endif -%}
{% endmacro -%}

SELECT
    CASE
        WHEN COUNT({{ lib.render_target_column('analyzed_table') }}) = 0 THEN NULL
        ELSE 100.0 * SUM(CASE
              WHEN REGEXP_LIKE({{ lib.render_target_column('analyzed_table') }}, {{render_date_formats(parameters.date_formats)}})
                 THEN 1
              ELSE 0
            END
        ) / COUNT(*)
    END AS actual_value
    {{- lib.render_data_grouping_projections('analyzed_table') }}
    {{- lib.render_time_dimension_projection('analyzed_table') }}
FROM {{ lib.render_target_table() }} AS analyzed_table
{{- lib.render_where_clause() -}}
{{- lib.render_group_by() -}}
{{- lib.render_order_by() -}}
SELECT
    CASE
        WHEN COUNT(analyzed_table.`target_column`) = 0 THEN NULL
        ELSE 100.0 * SUM(CASE
              WHEN REGEXP_LIKE(analyzed_table.`target_column`, '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[/](0[1-9]|1[0-2])[/](\d{4})$')
                 THEN 1
              ELSE 0
            END
        ) / COUNT(*)
    END AS actual_value,
    analyzed_table.`country` AS grouping_level_1,
    analyzed_table.`state` AS grouping_level_2,
    DATE_FORMAT(LOCALTIMESTAMP, '%Y-%m-01 00:00:00') AS time_period,
    FROM_UNIXTIME(UNIX_TIMESTAMP(DATE_FORMAT(LOCALTIMESTAMP, '%Y-%m-01 00:00:00'))) AS time_period_utc
FROM `<target_table>` AS analyzed_table
GROUP BY grouping_level_1, grouping_level_2, time_period, time_period_utc
ORDER BY grouping_level_1, grouping_level_2, time_period, time_period_utc

Oracle

{% import '/dialects/oracle.sql.jinja2' as lib with context -%}

  {% macro render_date_formats(date_formats) %}
  {%- if date_formats == 'DD/MM/YYYY'-%}
  '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[/](0[1-9]|1[0-2])[/](\d{4})$'
  {%- elif date_formats == 'DD-MM-YYYY' -%}
  '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[-](0[1-9]|1[0-2])[-](\d{4})$'
  {%- elif date_formats == 'DD.MM.YYYY' -%}
  '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[.](0[1-9]|1[0-2])[.](\d{4})$'
  {%- elif date_formats == 'YYYY-MM-DD' -%}
  '^([0-9]{4})[-](0[1-9]|1[0-2])[-](0[1-9]|[1][0-9]|[2][0-9]|3[01])$'
  {%- endif -%}
  {% endmacro -%}

SELECT
    CASE
        WHEN COUNT({{ lib.render_target_column('analyzed_table') }}) = 0 THEN NULL
        ELSE 100.0 * SUM(CASE
              WHEN REGEXP_LIKE({{ lib.render_target_column('analyzed_table') }}, {{render_date_formats(parameters.date_formats)}})
                 THEN 1
              ELSE 0
            END
        ) / COUNT(*)
    END AS actual_value
    {{- lib.render_data_grouping_projections_reference('analyzed_table') }}
    {{- lib.render_time_dimension_projection_reference('analyzed_table') }}
FROM (
    SELECT
        original_table.*
        {{- lib.render_data_grouping_projections('original_table') }}
        {{- lib.render_time_dimension_projection('original_table') }}
    FROM {{ lib.render_target_table() }} original_table
    {{- lib.render_where_clause(table_alias_prefix='original_table') }}
) analyzed_table
{{- lib.render_group_by() -}}
{{- lib.render_order_by() -}}
SELECT
    CASE
        WHEN COUNT(analyzed_table."target_column") = 0 THEN NULL
        ELSE 100.0 * SUM(CASE
              WHEN REGEXP_LIKE(analyzed_table."target_column", '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[/](0[1-9]|1[0-2])[/](\d{4})$')
                 THEN 1
              ELSE 0
            END
        ) / COUNT(*)
    END AS actual_value,

                analyzed_table.grouping_level_1,

                analyzed_table.grouping_level_2
,
    time_period,
    time_period_utc
FROM (
    SELECT
        original_table.*,
    original_table."country" AS grouping_level_1,
    original_table."state" AS grouping_level_2,
    TRUNC(CAST(CURRENT_TIMESTAMP AS DATE), 'MONTH') AS time_period,
    CAST(TRUNC(CAST(CURRENT_TIMESTAMP AS DATE), 'MONTH') AS TIMESTAMP WITH TIME ZONE) AS time_period_utc
    FROM "<target_schema>"."<target_table>" original_table
) analyzed_table
GROUP BY grouping_level_1, grouping_level_2, time_period, time_period_utc
ORDER BY grouping_level_1, grouping_level_2, time_period, time_period_utc

PostgreSQL

{% import '/dialects/postgresql.sql.jinja2' as lib with context -%}

{% macro render_date_formats(date_formats) %}
    {%- if date_formats == 'DD/MM/YYYY'-%}
        '^([0][1-9]|[1-2][0-9]|[3][0-1])/(0[1-9]|1[0-2])/([0-9]{4})$'
    {%- elif date_formats == 'DD-MM-YYYY' -%}
        '^([0][1-9]|[1-2][0-9]|[3][0-1])-(0[1-9]|1[0-2])-(\d{4})$'
    {%- elif date_formats == 'DD.MM.YYYY' -%}
        '^([0][1-9]|[1-2][0-9]|[3][0-1]).(0[1-9]|1[0-2]).(\d{4})$'
    {%- elif date_formats == 'YYYY-MM-DD' -%}
        '^(\d{4})-(0[1-9]|1[0-2])-(0[1-9]|[1-2][0-9]|3[0-1])$'
    {%- endif -%}
{% endmacro -%}

SELECT
    CASE
        WHEN COUNT({{ lib.render_target_column('analyzed_table') }}) = 0 THEN NULL
        ELSE 100.0 * SUM(
            CASE
                WHEN CAST({{lib.render_target_column('analyzed_table')}} AS VARCHAR) ~ {{render_date_formats(parameters.date_formats)}} IS NOT NULL
                    THEN 1
                ELSE 0
            END
        ) / COUNT(*)
    END AS actual_value
    {{- lib.render_data_grouping_projections('analyzed_table') }}
    {{- lib.render_time_dimension_projection('analyzed_table') }}
FROM {{ lib.render_target_table() }} AS analyzed_table
{{- lib.render_where_clause() -}}
{{- lib.render_group_by() -}}
{{- lib.render_order_by() -}}
SELECT
    CASE
        WHEN COUNT(analyzed_table."target_column") = 0 THEN NULL
        ELSE 100.0 * SUM(
            CASE
                WHEN CAST(analyzed_table."target_column" AS VARCHAR) ~ '^([0][1-9]|[1-2][0-9]|[3][0-1])/(0[1-9]|1[0-2])/([0-9]{4})$' IS NOT NULL
                    THEN 1
                ELSE 0
            END
        ) / COUNT(*)
    END AS actual_value,
    analyzed_table."country" AS grouping_level_1,
    analyzed_table."state" AS grouping_level_2,
    DATE_TRUNC('MONTH', CAST(LOCALTIMESTAMP AS date)) AS time_period,
    CAST((DATE_TRUNC('MONTH', CAST(LOCALTIMESTAMP AS date))) AS TIMESTAMP WITH TIME ZONE) AS time_period_utc
FROM "your_postgresql_database"."<target_schema>"."<target_table>" AS analyzed_table
GROUP BY grouping_level_1, grouping_level_2, time_period, time_period_utc
ORDER BY grouping_level_1, grouping_level_2, time_period, time_period_utc

Redshift

{% import '/dialects/redshift.sql.jinja2' as lib with context -%}

{% macro render_date_formats(date_formats) %}
    {%- if date_formats == 'DD/MM/YYYY'-%}
        '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[/](0[1-9]|1[0-2])[/]([1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9])$'
    {%- elif date_formats == 'DD-MM-YYYY' -%}
        '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[-](0[1-9]|1[0-2])[-]([1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9])$'
    {%- elif date_formats == 'DD.MM.YYYY' -%}
        '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[.](0[1-9]|1[0-2])[.]([1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9])$'
    {%- elif date_formats == 'YYYY-MM-DD' -%}
        '^([1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9])[-](0[1-9]|1[0-2])[-](0[1-9]|[1][0-9]|[2][0-9]|3[01])$'
    {%- endif -%}
{% endmacro -%}


SELECT
    CASE
        WHEN COUNT({{ lib.render_target_column('analyzed_table') }}) = 0 THEN NULL
        ELSE 100.0 * SUM(
            CASE
                WHEN {{lib.render_target_column('analyzed_table')}} ~ {{render_date_formats(parameters.date_formats)}})
                    THEN 1
                ELSE 0
            END
        ) / COUNT(*)
    END AS actual_value
    {{- lib.render_data_grouping_projections('analyzed_table') }}
    {{- lib.render_time_dimension_projection('analyzed_table') }}
FROM {{ lib.render_target_table() }} AS analyzed_table
{{- lib.render_where_clause() -}}
{{- lib.render_group_by() -}}
{{- lib.render_order_by() -}}
SELECT
    CASE
        WHEN COUNT(analyzed_table."target_column") = 0 THEN NULL
        ELSE 100.0 * SUM(
            CASE
                WHEN analyzed_table."target_column" ~ '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[/](0[1-9]|1[0-2])[/]([1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9])$')
                    THEN 1
                ELSE 0
            END
        ) / COUNT(*)
    END AS actual_value,
    analyzed_table."country" AS grouping_level_1,
    analyzed_table."state" AS grouping_level_2,
    DATE_TRUNC('MONTH', CAST(LOCALTIMESTAMP AS date)) AS time_period,
    CAST((DATE_TRUNC('MONTH', CAST(LOCALTIMESTAMP AS date))) AS TIMESTAMP WITH TIME ZONE) AS time_period_utc
FROM "your_redshift_database"."<target_schema>"."<target_table>" AS analyzed_table
GROUP BY grouping_level_1, grouping_level_2, time_period, time_period_utc
ORDER BY grouping_level_1, grouping_level_2, time_period, time_period_utc

Snowflake

{% import '/dialects/snowflake.sql.jinja2' as lib with context -%}

{% macro render_date_formats(date_formats) %}
    {%- if date_formats == 'DD/MM/YYYY'-%}
        '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[/](0[1-9]|1[0-2])[/]([1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9])$'
    {%- elif date_formats == 'DD-MM-YYYY' -%}
        '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[-](0[1-9]|1[0-2])[-]([1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9])$'
    {%- elif date_formats == 'DD.MM.YYYY' -%}
        '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[.](0[1-9]|1[0-2])[.]([1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9])$'
    {%- elif date_formats == 'YYYY-MM-DD' -%}
        '^([1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9])[-](0[1-9]|1[0-2])[-](0[1-9]|[1][0-9]|[2][0-9]|3[01])$'
    {%- endif -%}
{% endmacro -%}


SELECT
    CASE
        WHEN COUNT({{ lib.render_target_column('analyzed_table') }}) = 0 THEN NULL
        ELSE 100.0 * SUM(
            CASE
                WHEN REGEXP_LIKE({{ lib.render_target_column('analyzed_table') }}, {{render_date_formats(parameters.date_formats)}})
                    THEN 1
                ELSE 0
            END
        ) / COUNT(*)
    END AS actual_value
    {{- lib.render_data_grouping_projections('analyzed_table') }}
    {{- lib.render_time_dimension_projection('analyzed_table') }}
FROM {{ lib.render_target_table() }} AS analyzed_table
{{- lib.render_where_clause() -}}
{{- lib.render_group_by() -}}
{{- lib.render_order_by() -}}
SELECT
    CASE
        WHEN COUNT(analyzed_table."target_column") = 0 THEN NULL
        ELSE 100.0 * SUM(
            CASE
                WHEN REGEXP_LIKE(analyzed_table."target_column", '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[/](0[1-9]|1[0-2])[/]([1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9])$')
                    THEN 1
                ELSE 0
            END
        ) / COUNT(*)
    END AS actual_value,
    analyzed_table."country" AS grouping_level_1,
    analyzed_table."state" AS grouping_level_2,
    DATE_TRUNC('MONTH', CAST(TO_TIMESTAMP_NTZ(LOCALTIMESTAMP()) AS date)) AS time_period,
    TO_TIMESTAMP(DATE_TRUNC('MONTH', CAST(TO_TIMESTAMP_NTZ(LOCALTIMESTAMP()) AS date))) AS time_period_utc
FROM "your_snowflake_database"."<target_schema>"."<target_table>" AS analyzed_table
GROUP BY grouping_level_1, grouping_level_2, time_period, time_period_utc
ORDER BY grouping_level_1, grouping_level_2, time_period, time_period_utc

SQL Server

{% import '/dialects/sqlserver.sql.jinja2' as lib with context -%}

{% macro render_date_formats(date_formats) %}
    {%- if date_formats == 'DD/MM/YYYY'-%}
        '^(0[1-9]|[1-2][0-9]|3[0-1])/(0[1-9]|1[0-2])/([1-9][0-9]{3})$'
    {%- elif date_formats == 'DD-MM-YYYY' -%}
        '^(0[1-9]|[1-2][0-9]|3[0-1])-(0[1-9]|1[0-2])-([1-9][0-9]{3})$'
    {%- elif date_formats == 'DD.MM.YYYY' -%}
        '^(0[1-9]|[1-2][0-9]|3[0-1])\.(0[1-9]|1[0-2])\.([1-9][0-9]{3})$'
    {%- elif date_formats == 'YYYY-MM-DD' -%}
        '^([1-9][0-9]{3})-(0[1-9]|1[0-2])-(0[1-9]|[1-2][0-9]|3[0-1])$'
    {%- endif -%}
{% endmacro %}

SELECT
    CASE
        WHEN COUNT({{ lib.render_target_column('analyzed_table') }}) = 0 THEN NULL
        ELSE 100.0 * SUM(
            CASE
                WHEN {{ lib.render_target_column('analyzed_table') }} LIKE {{render_date_formats(parameters.date_formats)}} ESCAPE '~'
                    THEN 1
                ELSE 0
            END
        ) / COUNT(*)
    END AS actual_value
    {{- lib.render_data_grouping_projections('analyzed_table') }}
    {{- lib.render_time_dimension_projection('analyzed_table') }}
FROM {{ lib.render_target_table() }} AS analyzed_table
{{- lib.render_where_clause() -}}
{{- lib.render_group_by() -}}
{{- lib.render_order_by() -}}
SELECT
    CASE
        WHEN COUNT(analyzed_table.[target_column]) = 0 THEN NULL
        ELSE 100.0 * SUM(
            CASE
                WHEN analyzed_table.[target_column] LIKE '^(0[1-9]|[1-2][0-9]|3[0-1])/(0[1-9]|1[0-2])/([1-9][0-9]{3})$' ESCAPE '~'
                    THEN 1
                ELSE 0
            END
        ) / COUNT(*)
    END AS actual_value,
    analyzed_table.[country] AS grouping_level_1,
    analyzed_table.[state] AS grouping_level_2,
    DATEADD(month, DATEDIFF(month, 0, SYSDATETIMEOFFSET()), 0) AS time_period,
    CAST((DATEADD(month, DATEDIFF(month, 0, SYSDATETIMEOFFSET()), 0)) AS DATETIME) AS time_period_utc
FROM [your_sql_server_database].[<target_schema>].[<target_table>] AS analyzed_table
GROUP BY analyzed_table.[country], analyzed_table.[state]
ORDER BY level_1, level_2
        , 

daily partition date match format percent

Check description
Verifies that the percentage of date values matching the given format in a column does not exceed the maximum accepted percentage. Creates a separate data quality check (and an alert) for each daily partition.

Check name Check type Time scale Sensor definition Quality rule
daily_partition_date_match_format_percent partitioned daily date_match_format_percent min_percent

Enable check (Shell)
To enable this check provide connection name and check name in check enable command

dqo> check enable -c=connection_name -ch=daily_partition_date_match_format_percent
Run check (Shell)
To run this check provide check name in check run command
dqo> check run -ch=daily_partition_date_match_format_percent
It is also possible to run this check on a specific connection. In order to do this, add the connection name to the below
dqo> check run -c=connection_name -ch=daily_partition_date_match_format_percent
It is additionally feasible to run this check on a specific table. In order to do this, add the table name to the below
dqo> check run -c=connection_name -t=table_name -ch=daily_partition_date_match_format_percent
It is furthermore viable to combine run this check on a specific column. In order to do this, add the column name to the below
dqo> check run -c=connection_name -t=table_name -col=column_name -ch=daily_partition_date_match_format_percent
Check structure (Yaml)
      partitioned_checks:
        daily:
          datatype:
            daily_partition_date_match_format_percent:
              parameters:
                date_formats: DD/MM/YYYY
              warning:
                min_percent: 100.0
              error:
                min_percent: 99.0
              fatal:
                min_percent: 95.0
Sample configuration (Yaml)
# yaml-language-server: $schema=https://cloud.dqo.ai/dqo-yaml-schema/TableYaml-schema.json
apiVersion: dqo/v1
kind: table
spec:
  timestamp_columns:
    event_timestamp_column: col_event_timestamp
    ingestion_timestamp_column: col_inserted_at
  incremental_time_window:
    daily_partitioning_recent_days: 7
    monthly_partitioning_recent_months: 1
  columns:
    target_column:
      partitioned_checks:
        daily:
          datatype:
            daily_partition_date_match_format_percent:
              parameters:
                date_formats: DD/MM/YYYY
              warning:
                min_percent: 100.0
              error:
                min_percent: 99.0
              fatal:
                min_percent: 95.0
      labels:
      - This is the column that is analyzed for data quality issues
    col_event_timestamp:
      labels:
      - optional column that stores the timestamp when the event/transaction happened
    col_inserted_at:
      labels:
      - optional column that stores the timestamp when row was ingested

BigQuery

{% import '/dialects/bigquery.sql.jinja2' as lib with context -%}

{% macro render_date_formats(date_formats) %}
    {%- if date_formats == 'DD/MM/YYYY'-%}
        "^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[/](0[1-9]|1[0-2])[/](\d{4})$"
    {%- elif date_formats == 'DD-MM-YYYY' -%}
        "^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[-](0[1-9]|1[0-2])[-](\d{4})$"
    {%- elif date_formats == 'DD.MM.YYYY' -%}
        "^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[.](0[1-9]|1[0-2])[.](\d{4})$"
    {%- elif date_formats == 'YYYY-MM-DD' -%}
        "^(\d{4})[-](0[1-9]|1[0-2])[-](0[1-9]|[1][0-9]|[2][0-9]|3[01])$"
    {%- endif -%}
{% endmacro -%}

SELECT
    CASE
        WHEN COUNT({{ lib.render_target_column('analyzed_table') }}) = 0 THEN NULL
        ELSE 100.0 * SUM(
            CASE
                WHEN REGEXP_CONTAINS(CAST({{ lib.render_target_column('analyzed_table') }} AS STRING), r{{render_date_formats(parameters.date_formats)}}) IS NOT FALSE
                    THEN 1
                ELSE 0
            END
        ) / COUNT(*)
    END AS actual_value
    {{- lib.render_data_grouping_projections('analyzed_table') }}
    {{- lib.render_time_dimension_projection('analyzed_table') }}
FROM {{ lib.render_target_table() }} AS analyzed_table
{{- lib.render_where_clause() -}}
{{- lib.render_group_by() -}}
{{- lib.render_order_by() -}}
SELECT
    CASE
        WHEN COUNT(analyzed_table.`target_column`) = 0 THEN NULL
        ELSE 100.0 * SUM(
            CASE
                WHEN REGEXP_CONTAINS(CAST(analyzed_table.`target_column` AS STRING), r"^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[/](0[1-9]|1[0-2])[/](\d{4})$") IS NOT FALSE
                    THEN 1
                ELSE 0
            END
        ) / COUNT(*)
    END AS actual_value,
    CAST(analyzed_table.`` AS DATE) AS time_period,
    TIMESTAMP(CAST(analyzed_table.`` AS DATE)) AS time_period_utc
FROM `your-google-project-id`.`<target_schema>`.`<target_table>` AS analyzed_table
GROUP BY time_period, time_period_utc
ORDER BY time_period, time_period_utc

MySQL

{% import '/dialects/mysql.sql.jinja2' as lib with context -%}

{% macro render_date_formats(date_formats) %}
    {%- if date_formats == 'DD/MM/YYYY'-%}
        '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[/](0[1-9]|1[0-2])[/](\d{4})$'
    {%- elif date_formats == 'DD-MM-YYYY' -%}
        '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[-](0[1-9]|1[0-2])[-](\d{4})$'
    {%- elif date_formats == 'DD.MM.YYYY' -%}
        '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[.](0[1-9]|1[0-2])[.](\d{4})$'
    {%- elif date_formats == 'YYYY-MM-DD' -%}
        '^([0-9]{4})[-](0[1-9]|1[0-2])[-](0[1-9]|[1][0-9]|[2][0-9]|3[01])$'
    {%- endif -%}
{% endmacro -%}

SELECT
    CASE
        WHEN COUNT({{ lib.render_target_column('analyzed_table') }}) = 0 THEN NULL
        ELSE 100.0 * SUM(CASE
              WHEN REGEXP_LIKE({{ lib.render_target_column('analyzed_table') }}, {{render_date_formats(parameters.date_formats)}})
                 THEN 1
              ELSE 0
            END
        ) / COUNT(*)
    END AS actual_value
    {{- lib.render_data_grouping_projections('analyzed_table') }}
    {{- lib.render_time_dimension_projection('analyzed_table') }}
FROM {{ lib.render_target_table() }} AS analyzed_table
{{- lib.render_where_clause() -}}
{{- lib.render_group_by() -}}
{{- lib.render_order_by() -}}
SELECT
    CASE
        WHEN COUNT(analyzed_table.`target_column`) = 0 THEN NULL
        ELSE 100.0 * SUM(CASE
              WHEN REGEXP_LIKE(analyzed_table.`target_column`, '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[/](0[1-9]|1[0-2])[/](\d{4})$')
                 THEN 1
              ELSE 0
            END
        ) / COUNT(*)
    END AS actual_value,
    DATE_FORMAT(analyzed_table.``, '%Y-%m-%d 00:00:00') AS time_period,
    FROM_UNIXTIME(UNIX_TIMESTAMP(DATE_FORMAT(analyzed_table.``, '%Y-%m-%d 00:00:00'))) AS time_period_utc
FROM `<target_table>` AS analyzed_table
GROUP BY time_period, time_period_utc
ORDER BY time_period, time_period_utc

Oracle

{% import '/dialects/oracle.sql.jinja2' as lib with context -%}

  {% macro render_date_formats(date_formats) %}
  {%- if date_formats == 'DD/MM/YYYY'-%}
  '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[/](0[1-9]|1[0-2])[/](\d{4})$'
  {%- elif date_formats == 'DD-MM-YYYY' -%}
  '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[-](0[1-9]|1[0-2])[-](\d{4})$'
  {%- elif date_formats == 'DD.MM.YYYY' -%}
  '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[.](0[1-9]|1[0-2])[.](\d{4})$'
  {%- elif date_formats == 'YYYY-MM-DD' -%}
  '^([0-9]{4})[-](0[1-9]|1[0-2])[-](0[1-9]|[1][0-9]|[2][0-9]|3[01])$'
  {%- endif -%}
  {% endmacro -%}

SELECT
    CASE
        WHEN COUNT({{ lib.render_target_column('analyzed_table') }}) = 0 THEN NULL
        ELSE 100.0 * SUM(CASE
              WHEN REGEXP_LIKE({{ lib.render_target_column('analyzed_table') }}, {{render_date_formats(parameters.date_formats)}})
                 THEN 1
              ELSE 0
            END
        ) / COUNT(*)
    END AS actual_value
    {{- lib.render_data_grouping_projections_reference('analyzed_table') }}
    {{- lib.render_time_dimension_projection_reference('analyzed_table') }}
FROM (
    SELECT
        original_table.*
        {{- lib.render_data_grouping_projections('original_table') }}
        {{- lib.render_time_dimension_projection('original_table') }}
    FROM {{ lib.render_target_table() }} original_table
    {{- lib.render_where_clause(table_alias_prefix='original_table') }}
) analyzed_table
{{- lib.render_group_by() -}}
{{- lib.render_order_by() -}}
SELECT
    CASE
        WHEN COUNT(analyzed_table."target_column") = 0 THEN NULL
        ELSE 100.0 * SUM(CASE
              WHEN REGEXP_LIKE(analyzed_table."target_column", '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[/](0[1-9]|1[0-2])[/](\d{4})$')
                 THEN 1
              ELSE 0
            END
        ) / COUNT(*)
    END AS actual_value,
    time_period,
    time_period_utc
FROM (
    SELECT
        original_table.*,
    TRUNC(CAST(original_table."" AS DATE)) AS time_period,
    CAST(TRUNC(CAST(original_table."" AS DATE)) AS TIMESTAMP WITH TIME ZONE) AS time_period_utc
    FROM "<target_schema>"."<target_table>" original_table
) analyzed_table
GROUP BY time_period, time_period_utc
ORDER BY time_period, time_period_utc

PostgreSQL

{% import '/dialects/postgresql.sql.jinja2' as lib with context -%}

{% macro render_date_formats(date_formats) %}
    {%- if date_formats == 'DD/MM/YYYY'-%}
        '^([0][1-9]|[1-2][0-9]|[3][0-1])/(0[1-9]|1[0-2])/([0-9]{4})$'
    {%- elif date_formats == 'DD-MM-YYYY' -%}
        '^([0][1-9]|[1-2][0-9]|[3][0-1])-(0[1-9]|1[0-2])-(\d{4})$'
    {%- elif date_formats == 'DD.MM.YYYY' -%}
        '^([0][1-9]|[1-2][0-9]|[3][0-1]).(0[1-9]|1[0-2]).(\d{4})$'
    {%- elif date_formats == 'YYYY-MM-DD' -%}
        '^(\d{4})-(0[1-9]|1[0-2])-(0[1-9]|[1-2][0-9]|3[0-1])$'
    {%- endif -%}
{% endmacro -%}

SELECT
    CASE
        WHEN COUNT({{ lib.render_target_column('analyzed_table') }}) = 0 THEN NULL
        ELSE 100.0 * SUM(
            CASE
                WHEN CAST({{lib.render_target_column('analyzed_table')}} AS VARCHAR) ~ {{render_date_formats(parameters.date_formats)}} IS NOT NULL
                    THEN 1
                ELSE 0
            END
        ) / COUNT(*)
    END AS actual_value
    {{- lib.render_data_grouping_projections('analyzed_table') }}
    {{- lib.render_time_dimension_projection('analyzed_table') }}
FROM {{ lib.render_target_table() }} AS analyzed_table
{{- lib.render_where_clause() -}}
{{- lib.render_group_by() -}}
{{- lib.render_order_by() -}}
SELECT
    CASE
        WHEN COUNT(analyzed_table."target_column") = 0 THEN NULL
        ELSE 100.0 * SUM(
            CASE
                WHEN CAST(analyzed_table."target_column" AS VARCHAR) ~ '^([0][1-9]|[1-2][0-9]|[3][0-1])/(0[1-9]|1[0-2])/([0-9]{4})$' IS NOT NULL
                    THEN 1
                ELSE 0
            END
        ) / COUNT(*)
    END AS actual_value,
    CAST(analyzed_table."" AS date) AS time_period,
    CAST((CAST(analyzed_table."" AS date)) AS TIMESTAMP WITH TIME ZONE) AS time_period_utc
FROM "your_postgresql_database"."<target_schema>"."<target_table>" AS analyzed_table
GROUP BY time_period, time_period_utc
ORDER BY time_period, time_period_utc

Redshift

{% import '/dialects/redshift.sql.jinja2' as lib with context -%}

{% macro render_date_formats(date_formats) %}
    {%- if date_formats == 'DD/MM/YYYY'-%}
        '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[/](0[1-9]|1[0-2])[/]([1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9])$'
    {%- elif date_formats == 'DD-MM-YYYY' -%}
        '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[-](0[1-9]|1[0-2])[-]([1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9])$'
    {%- elif date_formats == 'DD.MM.YYYY' -%}
        '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[.](0[1-9]|1[0-2])[.]([1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9])$'
    {%- elif date_formats == 'YYYY-MM-DD' -%}
        '^([1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9])[-](0[1-9]|1[0-2])[-](0[1-9]|[1][0-9]|[2][0-9]|3[01])$'
    {%- endif -%}
{% endmacro -%}


SELECT
    CASE
        WHEN COUNT({{ lib.render_target_column('analyzed_table') }}) = 0 THEN NULL
        ELSE 100.0 * SUM(
            CASE
                WHEN {{lib.render_target_column('analyzed_table')}} ~ {{render_date_formats(parameters.date_formats)}})
                    THEN 1
                ELSE 0
            END
        ) / COUNT(*)
    END AS actual_value
    {{- lib.render_data_grouping_projections('analyzed_table') }}
    {{- lib.render_time_dimension_projection('analyzed_table') }}
FROM {{ lib.render_target_table() }} AS analyzed_table
{{- lib.render_where_clause() -}}
{{- lib.render_group_by() -}}
{{- lib.render_order_by() -}}
SELECT
    CASE
        WHEN COUNT(analyzed_table."target_column") = 0 THEN NULL
        ELSE 100.0 * SUM(
            CASE
                WHEN analyzed_table."target_column" ~ '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[/](0[1-9]|1[0-2])[/]([1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9])$')
                    THEN 1
                ELSE 0
            END
        ) / COUNT(*)
    END AS actual_value,
    CAST(analyzed_table."" AS date) AS time_period,
    CAST((CAST(analyzed_table."" AS date)) AS TIMESTAMP WITH TIME ZONE) AS time_period_utc
FROM "your_redshift_database"."<target_schema>"."<target_table>" AS analyzed_table
GROUP BY time_period, time_period_utc
ORDER BY time_period, time_period_utc

Snowflake

{% import '/dialects/snowflake.sql.jinja2' as lib with context -%}

{% macro render_date_formats(date_formats) %}
    {%- if date_formats == 'DD/MM/YYYY'-%}
        '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[/](0[1-9]|1[0-2])[/]([1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9])$'
    {%- elif date_formats == 'DD-MM-YYYY' -%}
        '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[-](0[1-9]|1[0-2])[-]([1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9])$'
    {%- elif date_formats == 'DD.MM.YYYY' -%}
        '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[.](0[1-9]|1[0-2])[.]([1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9])$'
    {%- elif date_formats == 'YYYY-MM-DD' -%}
        '^([1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9])[-](0[1-9]|1[0-2])[-](0[1-9]|[1][0-9]|[2][0-9]|3[01])$'
    {%- endif -%}
{% endmacro -%}


SELECT
    CASE
        WHEN COUNT({{ lib.render_target_column('analyzed_table') }}) = 0 THEN NULL
        ELSE 100.0 * SUM(
            CASE
                WHEN REGEXP_LIKE({{ lib.render_target_column('analyzed_table') }}, {{render_date_formats(parameters.date_formats)}})
                    THEN 1
                ELSE 0
            END
        ) / COUNT(*)
    END AS actual_value
    {{- lib.render_data_grouping_projections('analyzed_table') }}
    {{- lib.render_time_dimension_projection('analyzed_table') }}
FROM {{ lib.render_target_table() }} AS analyzed_table
{{- lib.render_where_clause() -}}
{{- lib.render_group_by() -}}
{{- lib.render_order_by() -}}
SELECT
    CASE
        WHEN COUNT(analyzed_table."target_column") = 0 THEN NULL
        ELSE 100.0 * SUM(
            CASE
                WHEN REGEXP_LIKE(analyzed_table."target_column", '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[/](0[1-9]|1[0-2])[/]([1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9])$')
                    THEN 1
                ELSE 0
            END
        ) / COUNT(*)
    END AS actual_value,
    CAST(analyzed_table."" AS date) AS time_period,
    TO_TIMESTAMP(CAST(analyzed_table."" AS date)) AS time_period_utc
FROM "your_snowflake_database"."<target_schema>"."<target_table>" AS analyzed_table
GROUP BY time_period, time_period_utc
ORDER BY time_period, time_period_utc

SQL Server

{% import '/dialects/sqlserver.sql.jinja2' as lib with context -%}

{% macro render_date_formats(date_formats) %}
    {%- if date_formats == 'DD/MM/YYYY'-%}
        '^(0[1-9]|[1-2][0-9]|3[0-1])/(0[1-9]|1[0-2])/([1-9][0-9]{3})$'
    {%- elif date_formats == 'DD-MM-YYYY' -%}
        '^(0[1-9]|[1-2][0-9]|3[0-1])-(0[1-9]|1[0-2])-([1-9][0-9]{3})$'
    {%- elif date_formats == 'DD.MM.YYYY' -%}
        '^(0[1-9]|[1-2][0-9]|3[0-1])\.(0[1-9]|1[0-2])\.([1-9][0-9]{3})$'
    {%- elif date_formats == 'YYYY-MM-DD' -%}
        '^([1-9][0-9]{3})-(0[1-9]|1[0-2])-(0[1-9]|[1-2][0-9]|3[0-1])$'
    {%- endif -%}
{% endmacro %}

SELECT
    CASE
        WHEN COUNT({{ lib.render_target_column('analyzed_table') }}) = 0 THEN NULL
        ELSE 100.0 * SUM(
            CASE
                WHEN {{ lib.render_target_column('analyzed_table') }} LIKE {{render_date_formats(parameters.date_formats)}} ESCAPE '~'
                    THEN 1
                ELSE 0
            END
        ) / COUNT(*)
    END AS actual_value
    {{- lib.render_data_grouping_projections('analyzed_table') }}
    {{- lib.render_time_dimension_projection('analyzed_table') }}
FROM {{ lib.render_target_table() }} AS analyzed_table
{{- lib.render_where_clause() -}}
{{- lib.render_group_by() -}}
{{- lib.render_order_by() -}}
SELECT
    CASE
        WHEN COUNT(analyzed_table.[target_column]) = 0 THEN NULL
        ELSE 100.0 * SUM(
            CASE
                WHEN analyzed_table.[target_column] LIKE '^(0[1-9]|[1-2][0-9]|3[0-1])/(0[1-9]|1[0-2])/([1-9][0-9]{3})$' ESCAPE '~'
                    THEN 1
                ELSE 0
            END
        ) / COUNT(*)
    END AS actual_value,
    CAST(analyzed_table.[] AS date) AS time_period,
    CAST((CAST(analyzed_table.[] AS date)) AS DATETIME) AS time_period_utc
FROM [your_sql_server_database].[<target_schema>].[<target_table>] AS analyzed_table
GROUP BY CAST(analyzed_table.[] AS date), CAST(analyzed_table.[] AS date)
ORDER BY CAST(analyzed_table.[] AS date)

Configuration with data grouping

Click to see more

Sample configuration (Yaml)

# yaml-language-server: $schema=https://cloud.dqo.ai/dqo-yaml-schema/TableYaml-schema.json
apiVersion: dqo/v1
kind: table
spec:
  timestamp_columns:
    event_timestamp_column: col_event_timestamp
    ingestion_timestamp_column: col_inserted_at
  incremental_time_window:
    daily_partitioning_recent_days: 7
    monthly_partitioning_recent_months: 1
  default_grouping_name: group_by_country_and_state
  groupings:
    group_by_country_and_state:
      level_1:
        source: column_value
        column: country
      level_2:
        source: column_value
        column: state
  columns:
    target_column:
      partitioned_checks:
        daily:
          datatype:
            daily_partition_date_match_format_percent:
              parameters:
                date_formats: DD/MM/YYYY
              warning:
                min_percent: 100.0
              error:
                min_percent: 99.0
              fatal:
                min_percent: 95.0
      labels:
      - This is the column that is analyzed for data quality issues
    col_event_timestamp:
      labels:
      - optional column that stores the timestamp when the event/transaction happened
    col_inserted_at:
      labels:
      - optional column that stores the timestamp when row was ingested
    country:
      labels:
      - column used as the first grouping key
    state:
      labels:
      - column used as the second grouping key
BigQuery

{% import '/dialects/bigquery.sql.jinja2' as lib with context -%}

{% macro render_date_formats(date_formats) %}
    {%- if date_formats == 'DD/MM/YYYY'-%}
        "^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[/](0[1-9]|1[0-2])[/](\d{4})$"
    {%- elif date_formats == 'DD-MM-YYYY' -%}
        "^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[-](0[1-9]|1[0-2])[-](\d{4})$"
    {%- elif date_formats == 'DD.MM.YYYY' -%}
        "^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[.](0[1-9]|1[0-2])[.](\d{4})$"
    {%- elif date_formats == 'YYYY-MM-DD' -%}
        "^(\d{4})[-](0[1-9]|1[0-2])[-](0[1-9]|[1][0-9]|[2][0-9]|3[01])$"
    {%- endif -%}
{% endmacro -%}

SELECT
    CASE
        WHEN COUNT({{ lib.render_target_column('analyzed_table') }}) = 0 THEN NULL
        ELSE 100.0 * SUM(
            CASE
                WHEN REGEXP_CONTAINS(CAST({{ lib.render_target_column('analyzed_table') }} AS STRING), r{{render_date_formats(parameters.date_formats)}}) IS NOT FALSE
                    THEN 1
                ELSE 0
            END
        ) / COUNT(*)
    END AS actual_value
    {{- lib.render_data_grouping_projections('analyzed_table') }}
    {{- lib.render_time_dimension_projection('analyzed_table') }}
FROM {{ lib.render_target_table() }} AS analyzed_table
{{- lib.render_where_clause() -}}
{{- lib.render_group_by() -}}
{{- lib.render_order_by() -}}
SELECT
    CASE
        WHEN COUNT(analyzed_table.`target_column`) = 0 THEN NULL
        ELSE 100.0 * SUM(
            CASE
                WHEN REGEXP_CONTAINS(CAST(analyzed_table.`target_column` AS STRING), r"^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[/](0[1-9]|1[0-2])[/](\d{4})$") IS NOT FALSE
                    THEN 1
                ELSE 0
            END
        ) / COUNT(*)
    END AS actual_value,
    analyzed_table.`country` AS grouping_level_1,
    analyzed_table.`state` AS grouping_level_2,
    CAST(analyzed_table.`` AS DATE) AS time_period,
    TIMESTAMP(CAST(analyzed_table.`` AS DATE)) AS time_period_utc
FROM `your-google-project-id`.`<target_schema>`.`<target_table>` AS analyzed_table
GROUP BY grouping_level_1, grouping_level_2, time_period, time_period_utc
ORDER BY grouping_level_1, grouping_level_2, time_period, time_period_utc

MySQL

{% import '/dialects/mysql.sql.jinja2' as lib with context -%}

{% macro render_date_formats(date_formats) %}
    {%- if date_formats == 'DD/MM/YYYY'-%}
        '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[/](0[1-9]|1[0-2])[/](\d{4})$'
    {%- elif date_formats == 'DD-MM-YYYY' -%}
        '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[-](0[1-9]|1[0-2])[-](\d{4})$'
    {%- elif date_formats == 'DD.MM.YYYY' -%}
        '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[.](0[1-9]|1[0-2])[.](\d{4})$'
    {%- elif date_formats == 'YYYY-MM-DD' -%}
        '^([0-9]{4})[-](0[1-9]|1[0-2])[-](0[1-9]|[1][0-9]|[2][0-9]|3[01])$'
    {%- endif -%}
{% endmacro -%}

SELECT
    CASE
        WHEN COUNT({{ lib.render_target_column('analyzed_table') }}) = 0 THEN NULL
        ELSE 100.0 * SUM(CASE
              WHEN REGEXP_LIKE({{ lib.render_target_column('analyzed_table') }}, {{render_date_formats(parameters.date_formats)}})
                 THEN 1
              ELSE 0
            END
        ) / COUNT(*)
    END AS actual_value
    {{- lib.render_data_grouping_projections('analyzed_table') }}
    {{- lib.render_time_dimension_projection('analyzed_table') }}
FROM {{ lib.render_target_table() }} AS analyzed_table
{{- lib.render_where_clause() -}}
{{- lib.render_group_by() -}}
{{- lib.render_order_by() -}}
SELECT
    CASE
        WHEN COUNT(analyzed_table.`target_column`) = 0 THEN NULL
        ELSE 100.0 * SUM(CASE
              WHEN REGEXP_LIKE(analyzed_table.`target_column`, '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[/](0[1-9]|1[0-2])[/](\d{4})$')
                 THEN 1
              ELSE 0
            END
        ) / COUNT(*)
    END AS actual_value,
    analyzed_table.`country` AS grouping_level_1,
    analyzed_table.`state` AS grouping_level_2,
    DATE_FORMAT(analyzed_table.``, '%Y-%m-%d 00:00:00') AS time_period,
    FROM_UNIXTIME(UNIX_TIMESTAMP(DATE_FORMAT(analyzed_table.``, '%Y-%m-%d 00:00:00'))) AS time_period_utc
FROM `<target_table>` AS analyzed_table
GROUP BY grouping_level_1, grouping_level_2, time_period, time_period_utc
ORDER BY grouping_level_1, grouping_level_2, time_period, time_period_utc

Oracle

{% import '/dialects/oracle.sql.jinja2' as lib with context -%}

  {% macro render_date_formats(date_formats) %}
  {%- if date_formats == 'DD/MM/YYYY'-%}
  '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[/](0[1-9]|1[0-2])[/](\d{4})$'
  {%- elif date_formats == 'DD-MM-YYYY' -%}
  '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[-](0[1-9]|1[0-2])[-](\d{4})$'
  {%- elif date_formats == 'DD.MM.YYYY' -%}
  '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[.](0[1-9]|1[0-2])[.](\d{4})$'
  {%- elif date_formats == 'YYYY-MM-DD' -%}
  '^([0-9]{4})[-](0[1-9]|1[0-2])[-](0[1-9]|[1][0-9]|[2][0-9]|3[01])$'
  {%- endif -%}
  {% endmacro -%}

SELECT
    CASE
        WHEN COUNT({{ lib.render_target_column('analyzed_table') }}) = 0 THEN NULL
        ELSE 100.0 * SUM(CASE
              WHEN REGEXP_LIKE({{ lib.render_target_column('analyzed_table') }}, {{render_date_formats(parameters.date_formats)}})
                 THEN 1
              ELSE 0
            END
        ) / COUNT(*)
    END AS actual_value
    {{- lib.render_data_grouping_projections_reference('analyzed_table') }}
    {{- lib.render_time_dimension_projection_reference('analyzed_table') }}
FROM (
    SELECT
        original_table.*
        {{- lib.render_data_grouping_projections('original_table') }}
        {{- lib.render_time_dimension_projection('original_table') }}
    FROM {{ lib.render_target_table() }} original_table
    {{- lib.render_where_clause(table_alias_prefix='original_table') }}
) analyzed_table
{{- lib.render_group_by() -}}
{{- lib.render_order_by() -}}
SELECT
    CASE
        WHEN COUNT(analyzed_table."target_column") = 0 THEN NULL
        ELSE 100.0 * SUM(CASE
              WHEN REGEXP_LIKE(analyzed_table."target_column", '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[/](0[1-9]|1[0-2])[/](\d{4})$')
                 THEN 1
              ELSE 0
            END
        ) / COUNT(*)
    END AS actual_value,

                analyzed_table.grouping_level_1,

                analyzed_table.grouping_level_2
,
    time_period,
    time_period_utc
FROM (
    SELECT
        original_table.*,
    original_table."country" AS grouping_level_1,
    original_table."state" AS grouping_level_2,
    TRUNC(CAST(original_table."" AS DATE)) AS time_period,
    CAST(TRUNC(CAST(original_table."" AS DATE)) AS TIMESTAMP WITH TIME ZONE) AS time_period_utc
    FROM "<target_schema>"."<target_table>" original_table
) analyzed_table
GROUP BY grouping_level_1, grouping_level_2, time_period, time_period_utc
ORDER BY grouping_level_1, grouping_level_2, time_period, time_period_utc

PostgreSQL

{% import '/dialects/postgresql.sql.jinja2' as lib with context -%}

{% macro render_date_formats(date_formats) %}
    {%- if date_formats == 'DD/MM/YYYY'-%}
        '^([0][1-9]|[1-2][0-9]|[3][0-1])/(0[1-9]|1[0-2])/([0-9]{4})$'
    {%- elif date_formats == 'DD-MM-YYYY' -%}
        '^([0][1-9]|[1-2][0-9]|[3][0-1])-(0[1-9]|1[0-2])-(\d{4})$'
    {%- elif date_formats == 'DD.MM.YYYY' -%}
        '^([0][1-9]|[1-2][0-9]|[3][0-1]).(0[1-9]|1[0-2]).(\d{4})$'
    {%- elif date_formats == 'YYYY-MM-DD' -%}
        '^(\d{4})-(0[1-9]|1[0-2])-(0[1-9]|[1-2][0-9]|3[0-1])$'
    {%- endif -%}
{% endmacro -%}

SELECT
    CASE
        WHEN COUNT({{ lib.render_target_column('analyzed_table') }}) = 0 THEN NULL
        ELSE 100.0 * SUM(
            CASE
                WHEN CAST({{lib.render_target_column('analyzed_table')}} AS VARCHAR) ~ {{render_date_formats(parameters.date_formats)}} IS NOT NULL
                    THEN 1
                ELSE 0
            END
        ) / COUNT(*)
    END AS actual_value
    {{- lib.render_data_grouping_projections('analyzed_table') }}
    {{- lib.render_time_dimension_projection('analyzed_table') }}
FROM {{ lib.render_target_table() }} AS analyzed_table
{{- lib.render_where_clause() -}}
{{- lib.render_group_by() -}}
{{- lib.render_order_by() -}}
SELECT
    CASE
        WHEN COUNT(analyzed_table."target_column") = 0 THEN NULL
        ELSE 100.0 * SUM(
            CASE
                WHEN CAST(analyzed_table."target_column" AS VARCHAR) ~ '^([0][1-9]|[1-2][0-9]|[3][0-1])/(0[1-9]|1[0-2])/([0-9]{4})$' IS NOT NULL
                    THEN 1
                ELSE 0
            END
        ) / COUNT(*)
    END AS actual_value,
    analyzed_table."country" AS grouping_level_1,
    analyzed_table."state" AS grouping_level_2,
    CAST(analyzed_table."" AS date) AS time_period,
    CAST((CAST(analyzed_table."" AS date)) AS TIMESTAMP WITH TIME ZONE) AS time_period_utc
FROM "your_postgresql_database"."<target_schema>"."<target_table>" AS analyzed_table
GROUP BY grouping_level_1, grouping_level_2, time_period, time_period_utc
ORDER BY grouping_level_1, grouping_level_2, time_period, time_period_utc

Redshift

{% import '/dialects/redshift.sql.jinja2' as lib with context -%}

{% macro render_date_formats(date_formats) %}
    {%- if date_formats == 'DD/MM/YYYY'-%}
        '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[/](0[1-9]|1[0-2])[/]([1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9])$'
    {%- elif date_formats == 'DD-MM-YYYY' -%}
        '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[-](0[1-9]|1[0-2])[-]([1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9])$'
    {%- elif date_formats == 'DD.MM.YYYY' -%}
        '^(0[1-9]|[1][0-9]|[2][0-9]|3[01])[.](0[1-9]|1[0-2])[.]([1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9])$'
    {%- elif date_formats == 'YYYY-MM-DD' -%}
        '^([1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9])[-](0[1-9]|1[0-2])[-](0[1-9]|[1][0-9]|[2][0-9]|3[01])$'
    {%- endif -%}
{% endmacro -%}


SELECT
    CASE
        WHEN COUNT({{ lib.render_target_column('analyzed_table') }}) = 0 THEN NULL
        ELSE 100.0 * SUM(
            CASE
                WHEN {{lib.render_target_column('analyzed_table')}} ~ {{render_date_formats(parameters.date_formats)}})
                    THEN 1
                ELSE 0
            END
        ) / COUNT(*)
    END AS actual_value
    {{- lib.render_data_grouping_projections('analyzed_table') }}
    {{- lib.render_time_dimension_projection(