Skip to content

sum anomaly stationary 30 days

sum anomaly stationary 30 days checks

Description
Column level check that ensures that the sum in a monitored column is within a two-tailed percentile from measurements made during the last 30 days. Use in partitioned checks.


daily partition sum anomaly stationary 30 days

Check description
Verifies that the sum in a column is within a percentile from measurements made during the last 30 days.

Check name Check type Time scale Sensor definition Quality rule
daily_partition_sum_anomaly_stationary_30_days partitioned daily sum anomaly_stationary_percentile_moving_average_30_days

Enable check (Shell)
To enable this check provide connection name and check name in check enable command

dqo> check enable -c=connection_name -ch=daily_partition_sum_anomaly_stationary_30_days
Run check (Shell)
To run this check provide check name in check run command
dqo> check run -ch=daily_partition_sum_anomaly_stationary_30_days
It is also possible to run this check on a specific connection. In order to do this, add the connection name to the below
dqo> check run -c=connection_name -ch=daily_partition_sum_anomaly_stationary_30_days
It is additionally feasible to run this check on a specific table. In order to do this, add the table name to the below
dqo> check run -c=connection_name -t=table_name -ch=daily_partition_sum_anomaly_stationary_30_days
It is furthermore viable to combine run this check on a specific column. In order to do this, add the column name to the below
dqo> check run -c=connection_name -t=table_name -col=column_name -ch=daily_partition_sum_anomaly_stationary_30_days
Check structure (Yaml)
      partitioned_checks:
        daily:
          anomaly:
            daily_partition_sum_anomaly_stationary_30_days:
              warning:
                anomaly_percent: 0.1
              error:
                anomaly_percent: 0.1
              fatal:
                anomaly_percent: 0.1
Sample configuration (Yaml)
# yaml-language-server: $schema=https://cloud.dqo.ai/dqo-yaml-schema/TableYaml-schema.json
apiVersion: dqo/v1
kind: table
spec:
  timestamp_columns:
    event_timestamp_column: col_event_timestamp
    ingestion_timestamp_column: col_inserted_at
  incremental_time_window:
    daily_partitioning_recent_days: 7
    monthly_partitioning_recent_months: 1
  columns:
    target_column:
      partitioned_checks:
        daily:
          anomaly:
            daily_partition_sum_anomaly_stationary_30_days:
              warning:
                anomaly_percent: 0.1
              error:
                anomaly_percent: 0.1
              fatal:
                anomaly_percent: 0.1
      labels:
      - This is the column that is analyzed for data quality issues
    col_event_timestamp:
      labels:
      - optional column that stores the timestamp when the event/transaction happened
    col_inserted_at:
      labels:
      - optional column that stores the timestamp when row was ingested

BigQuery

{% import '/dialects/bigquery.sql.jinja2' as lib with context -%}
SELECT
    SUM({{ lib.render_target_column('analyzed_table')}}) AS actual_value
    {{- lib.render_data_grouping_projections('analyzed_table') }}
    {{- lib.render_time_dimension_projection('analyzed_table') }}
FROM {{ lib.render_target_table() }} AS analyzed_table
{{- lib.render_where_clause() -}}
{{- lib.render_group_by() -}}
{{- lib.render_order_by() -}}
SELECT
    SUM(analyzed_table.`target_column`) AS actual_value,
    CAST(analyzed_table.`` AS DATE) AS time_period,
    TIMESTAMP(CAST(analyzed_table.`` AS DATE)) AS time_period_utc
FROM `your-google-project-id`.`<target_schema>`.`<target_table>` AS analyzed_table
GROUP BY time_period, time_period_utc
ORDER BY time_period, time_period_utc

MySQL

{% import '/dialects/mysql.sql.jinja2' as lib with context -%}
SELECT
    SUM({{ lib.render_target_column('analyzed_table')}}) AS actual_value
    {{- lib.render_data_grouping_projections('analyzed_table') }}
    {{- lib.render_time_dimension_projection('analyzed_table') }}
FROM {{ lib.render_target_table() }} AS analyzed_table
{{- lib.render_where_clause() -}}
{{- lib.render_group_by() -}}
{{- lib.render_order_by() -}}
SELECT
    SUM(analyzed_table.`target_column`) AS actual_value,
    DATE_FORMAT(analyzed_table.``, '%Y-%m-%d 00:00:00') AS time_period,
    FROM_UNIXTIME(UNIX_TIMESTAMP(DATE_FORMAT(analyzed_table.``, '%Y-%m-%d 00:00:00'))) AS time_period_utc
FROM `<target_table>` AS analyzed_table
GROUP BY time_period, time_period_utc
ORDER BY time_period, time_period_utc

Oracle

{% import '/dialects/oracle.sql.jinja2' as lib with context -%}
SELECT
    SUM({{ lib.render_target_column('analyzed_table')}}) AS actual_value
    {{- lib.render_data_grouping_projections_reference('analyzed_table') }}
     {{- lib.render_time_dimension_projection_reference('analyzed_table') }}
 FROM(
     SELECT
         original_table.*
         {{- lib.render_data_grouping_projections('original_table') }}
         {{- lib.render_time_dimension_projection('original_table') }}
     FROM {{ lib.render_target_table() }} original_table
     {{- lib.render_where_clause(table_alias_prefix='original_table') }}
 ) analyzed_table
 {{- lib.render_group_by() -}}
 {{- lib.render_order_by() -}}
SELECT
    SUM(analyzed_table."target_column") AS actual_value,
    time_period,
    time_period_utc
 FROM(
     SELECT
         original_table.*,
    TRUNC(CAST(original_table."" AS DATE)) AS time_period,
    CAST(TRUNC(CAST(original_table."" AS DATE)) AS TIMESTAMP WITH TIME ZONE) AS time_period_utc
     FROM "<target_schema>"."<target_table>" original_table
 ) analyzed_table
GROUP BY time_period, time_period_utc
ORDER BY time_period, time_period_utc

PostgreSQL

{% import '/dialects/postgresql.sql.jinja2' as lib with context -%}
SELECT
    SUM({{ lib.render_target_column('analyzed_table')}}) AS actual_value
    {{- lib.render_data_grouping_projections('analyzed_table') }}
    {{- lib.render_time_dimension_projection('analyzed_table') }}
FROM {{ lib.render_target_table() }} AS analyzed_table
{{- lib.render_where_clause() -}}
{{- lib.render_group_by() -}}
{{- lib.render_order_by() -}}
SELECT
    SUM(analyzed_table."target_column") AS actual_value,
    CAST(analyzed_table."" AS date) AS time_period,
    CAST((CAST(analyzed_table."" AS date)) AS TIMESTAMP WITH TIME ZONE) AS time_period_utc
FROM "your_postgresql_database"."<target_schema>"."<target_table>" AS analyzed_table
GROUP BY time_period, time_period_utc
ORDER BY time_period, time_period_utc

Redshift

{% import '/dialects/redshift.sql.jinja2' as lib with context -%}
SELECT
    SUM({{ lib.render_target_column('analyzed_table')}}) AS actual_value
    {{- lib.render_data_grouping_projections('analyzed_table') }}
    {{- lib.render_time_dimension_projection('analyzed_table') }}
FROM {{ lib.render_target_table() }} AS analyzed_table
{{- lib.render_where_clause() -}}
{{- lib.render_group_by() -}}
{{- lib.render_order_by() -}}
SELECT
    SUM(analyzed_table."target_column") AS actual_value,
    CAST(analyzed_table."" AS date) AS time_period,
    CAST((CAST(analyzed_table."" AS date)) AS TIMESTAMP WITH TIME ZONE) AS time_period_utc
FROM "your_redshift_database"."<target_schema>"."<target_table>" AS analyzed_table
GROUP BY time_period, time_period_utc
ORDER BY time_period, time_period_utc

Snowflake

{% import '/dialects/snowflake.sql.jinja2' as lib with context -%}
SELECT
    SUM({{ lib.render_target_column('analyzed_table')}}) AS actual_value
    {{- lib.render_data_grouping_projections('analyzed_table') }}
    {{- lib.render_time_dimension_projection('analyzed_table') }}
FROM {{ lib.render_target_table() }} AS analyzed_table
{{- lib.render_where_clause() -}}
{{- lib.render_group_by() -}}
{{- lib.render_order_by() -}}
SELECT
    SUM(analyzed_table."target_column") AS actual_value,
    CAST(analyzed_table."" AS date) AS time_period,
    TO_TIMESTAMP(CAST(analyzed_table."" AS date)) AS time_period_utc
FROM "your_snowflake_database"."<target_schema>"."<target_table>" AS analyzed_table
GROUP BY time_period, time_period_utc
ORDER BY time_period, time_period_utc

SQL Server

{% import '/dialects/sqlserver.sql.jinja2' as lib with context -%}
SELECT
    SUM({{ lib.render_target_column('analyzed_table')}}) AS actual_value
    {{- lib.render_data_grouping_projections('analyzed_table') }}
    {{- lib.render_time_dimension_projection('analyzed_table') }}
FROM {{ lib.render_target_table() }} AS analyzed_table
{{- lib.render_where_clause() -}}
{{- lib.render_group_by() -}}
{{- lib.render_order_by() -}}
SELECT
    SUM(analyzed_table.[target_column]) AS actual_value,
    CAST(analyzed_table.[] AS date) AS time_period,
    CAST((CAST(analyzed_table.[] AS date)) AS DATETIME) AS time_period_utc
FROM [your_sql_server_database].[<target_schema>].[<target_table>] AS analyzed_table
GROUP BY CAST(analyzed_table.[] AS date), CAST(analyzed_table.[] AS date)
ORDER BY CAST(analyzed_table.[] AS date)

Configuration with data grouping

Click to see more

Sample configuration (Yaml)

# yaml-language-server: $schema=https://cloud.dqo.ai/dqo-yaml-schema/TableYaml-schema.json
apiVersion: dqo/v1
kind: table
spec:
  timestamp_columns:
    event_timestamp_column: col_event_timestamp
    ingestion_timestamp_column: col_inserted_at
  incremental_time_window:
    daily_partitioning_recent_days: 7
    monthly_partitioning_recent_months: 1
  default_grouping_name: group_by_country_and_state
  groupings:
    group_by_country_and_state:
      level_1:
        source: column_value
        column: country
      level_2:
        source: column_value
        column: state
  columns:
    target_column:
      partitioned_checks:
        daily:
          anomaly:
            daily_partition_sum_anomaly_stationary_30_days:
              warning:
                anomaly_percent: 0.1
              error:
                anomaly_percent: 0.1
              fatal:
                anomaly_percent: 0.1
      labels:
      - This is the column that is analyzed for data quality issues
    col_event_timestamp:
      labels:
      - optional column that stores the timestamp when the event/transaction happened
    col_inserted_at:
      labels:
      - optional column that stores the timestamp when row was ingested
    country:
      labels:
      - column used as the first grouping key
    state:
      labels:
      - column used as the second grouping key
BigQuery

{% import '/dialects/bigquery.sql.jinja2' as lib with context -%}
SELECT
    SUM({{ lib.render_target_column('analyzed_table')}}) AS actual_value
    {{- lib.render_data_grouping_projections('analyzed_table') }}
    {{- lib.render_time_dimension_projection('analyzed_table') }}
FROM {{ lib.render_target_table() }} AS analyzed_table
{{- lib.render_where_clause() -}}
{{- lib.render_group_by() -}}
{{- lib.render_order_by() -}}
SELECT
    SUM(analyzed_table.`target_column`) AS actual_value,
    analyzed_table.`country` AS grouping_level_1,
    analyzed_table.`state` AS grouping_level_2,
    CAST(analyzed_table.`` AS DATE) AS time_period,
    TIMESTAMP(CAST(analyzed_table.`` AS DATE)) AS time_period_utc
FROM `your-google-project-id`.`<target_schema>`.`<target_table>` AS analyzed_table
GROUP BY grouping_level_1, grouping_level_2, time_period, time_period_utc
ORDER BY grouping_level_1, grouping_level_2, time_period, time_period_utc

MySQL

{% import '/dialects/mysql.sql.jinja2' as lib with context -%}
SELECT
    SUM({{ lib.render_target_column('analyzed_table')}}) AS actual_value
    {{- lib.render_data_grouping_projections('analyzed_table') }}
    {{- lib.render_time_dimension_projection('analyzed_table') }}
FROM {{ lib.render_target_table() }} AS analyzed_table
{{- lib.render_where_clause() -}}
{{- lib.render_group_by() -}}
{{- lib.render_order_by() -}}
SELECT
    SUM(analyzed_table.`target_column`) AS actual_value,
    analyzed_table.`country` AS grouping_level_1,
    analyzed_table.`state` AS grouping_level_2,
    DATE_FORMAT(analyzed_table.``, '%Y-%m-%d 00:00:00') AS time_period,
    FROM_UNIXTIME(UNIX_TIMESTAMP(DATE_FORMAT(analyzed_table.``, '%Y-%m-%d 00:00:00'))) AS time_period_utc
FROM `<target_table>` AS analyzed_table
GROUP BY grouping_level_1, grouping_level_2, time_period, time_period_utc
ORDER BY grouping_level_1, grouping_level_2, time_period, time_period_utc

Oracle

{% import '/dialects/oracle.sql.jinja2' as lib with context -%}
SELECT
    SUM({{ lib.render_target_column('analyzed_table')}}) AS actual_value
    {{- lib.render_data_grouping_projections_reference('analyzed_table') }}
     {{- lib.render_time_dimension_projection_reference('analyzed_table') }}
 FROM(
     SELECT
         original_table.*
         {{- lib.render_data_grouping_projections('original_table') }}
         {{- lib.render_time_dimension_projection('original_table') }}
     FROM {{ lib.render_target_table() }} original_table
     {{- lib.render_where_clause(table_alias_prefix='original_table') }}
 ) analyzed_table
 {{- lib.render_group_by() -}}
 {{- lib.render_order_by() -}}
SELECT
    SUM(analyzed_table."target_column") AS actual_value,

                analyzed_table.grouping_level_1,

                analyzed_table.grouping_level_2
,
    time_period,
    time_period_utc
 FROM(
     SELECT
         original_table.*,
    original_table."country" AS grouping_level_1,
    original_table."state" AS grouping_level_2,
    TRUNC(CAST(original_table."" AS DATE)) AS time_period,
    CAST(TRUNC(CAST(original_table."" AS DATE)) AS TIMESTAMP WITH TIME ZONE) AS time_period_utc
     FROM "<target_schema>"."<target_table>" original_table
 ) analyzed_table
GROUP BY grouping_level_1, grouping_level_2, time_period, time_period_utc
ORDER BY grouping_level_1, grouping_level_2, time_period, time_period_utc

PostgreSQL

{% import '/dialects/postgresql.sql.jinja2' as lib with context -%}
SELECT
    SUM({{ lib.render_target_column('analyzed_table')}}) AS actual_value
    {{- lib.render_data_grouping_projections('analyzed_table') }}
    {{- lib.render_time_dimension_projection('analyzed_table') }}
FROM {{ lib.render_target_table() }} AS analyzed_table
{{- lib.render_where_clause() -}}
{{- lib.render_group_by() -}}
{{- lib.render_order_by() -}}
SELECT
    SUM(analyzed_table."target_column") AS actual_value,
    analyzed_table."country" AS grouping_level_1,
    analyzed_table."state" AS grouping_level_2,
    CAST(analyzed_table."" AS date) AS time_period,
    CAST((CAST(analyzed_table."" AS date)) AS TIMESTAMP WITH TIME ZONE) AS time_period_utc
FROM "your_postgresql_database"."<target_schema>"."<target_table>" AS analyzed_table
GROUP BY grouping_level_1, grouping_level_2, time_period, time_period_utc
ORDER BY grouping_level_1, grouping_level_2, time_period, time_period_utc

Redshift

{% import '/dialects/redshift.sql.jinja2' as lib with context -%}
SELECT
    SUM({{ lib.render_target_column('analyzed_table')}}) AS actual_value
    {{- lib.render_data_grouping_projections('analyzed_table') }}
    {{- lib.render_time_dimension_projection('analyzed_table') }}
FROM {{ lib.render_target_table() }} AS analyzed_table
{{- lib.render_where_clause() -}}
{{- lib.render_group_by() -}}
{{- lib.render_order_by() -}}
SELECT
    SUM(analyzed_table."target_column") AS actual_value,
    analyzed_table."country" AS grouping_level_1,
    analyzed_table."state" AS grouping_level_2,
    CAST(analyzed_table."" AS date) AS time_period,
    CAST((CAST(analyzed_table."" AS date)) AS TIMESTAMP WITH TIME ZONE) AS time_period_utc
FROM "your_redshift_database"."<target_schema>"."<target_table>" AS analyzed_table
GROUP BY grouping_level_1, grouping_level_2, time_period, time_period_utc
ORDER BY grouping_level_1, grouping_level_2, time_period, time_period_utc

Snowflake

{% import '/dialects/snowflake.sql.jinja2' as lib with context -%}
SELECT
    SUM({{ lib.render_target_column('analyzed_table')}}) AS actual_value
    {{- lib.render_data_grouping_projections('analyzed_table') }}
    {{- lib.render_time_dimension_projection('analyzed_table') }}
FROM {{ lib.render_target_table() }} AS analyzed_table
{{- lib.render_where_clause() -}}
{{- lib.render_group_by() -}}
{{- lib.render_order_by() -}}
SELECT
    SUM(analyzed_table."target_column") AS actual_value,
    analyzed_table."country" AS grouping_level_1,
    analyzed_table."state" AS grouping_level_2,
    CAST(analyzed_table."" AS date) AS time_period,
    TO_TIMESTAMP(CAST(analyzed_table."" AS date)) AS time_period_utc
FROM "your_snowflake_database"."<target_schema>"."<target_table>" AS analyzed_table
GROUP BY grouping_level_1, grouping_level_2, time_period, time_period_utc
ORDER BY grouping_level_1, grouping_level_2, time_period, time_period_utc

SQL Server

{% import '/dialects/sqlserver.sql.jinja2' as lib with context -%}
SELECT
    SUM({{ lib.render_target_column('analyzed_table')}}) AS actual_value
    {{- lib.render_data_grouping_projections('analyzed_table') }}
    {{- lib.render_time_dimension_projection('analyzed_table') }}
FROM {{ lib.render_target_table() }} AS analyzed_table
{{- lib.render_where_clause() -}}
{{- lib.render_group_by() -}}
{{- lib.render_order_by() -}}
SELECT
    SUM(analyzed_table.[target_column]) AS actual_value,
    analyzed_table.[country] AS grouping_level_1,
    analyzed_table.[state] AS grouping_level_2,
    CAST(analyzed_table.[] AS date) AS time_period,
    CAST((CAST(analyzed_table.[] AS date)) AS DATETIME) AS time_period_utc
FROM [your_sql_server_database].[<target_schema>].[<target_table>] AS analyzed_table
GROUP BY analyzed_table.[country], analyzed_table.[state], CAST(analyzed_table.[] AS date), CAST(analyzed_table.[] AS date)
ORDER BY level_1, level_2CAST(analyzed_table.[] AS date)