Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,12 @@ repos:
hooks:
- id: black
args: [--line-length=120, --skip-string-normalization]
- repo: https://github.com/tconbeer/sqlfmt
rev: v0.10.1
hooks:
- id: sqlfmt
language_version: python
exclude: 'rasgotransforms/rasgotransforms/snippets'
ci:
autofix_commit_msg: '[pre-commit.ci] auto fixes from pre-commit.com hooks'
autofix_prs: true
Expand Down
55 changes: 28 additions & 27 deletions rasgotransforms/rasgotransforms/transforms/aggregate/aggregate.sql
Original file line number Diff line number Diff line change
@@ -1,38 +1,39 @@
{%- set aggregations = aggregations.copy() %}
{%- if 'numeric columns' in aggregations.keys() and aggregations['numeric columns']|length > 0 %}
{%- set all_columns = get_columns(source_table) %}
{%- for column, column_type in all_columns.items() %}
{%- if column not in aggregations.keys() and column_type|lower in ['int', 'integer', 'bigint', 'smallint', 'number', 'numeric', 'float', 'float4', 'float8', 'decimal', 'double precision', 'real'] %}
{%- do aggregations.setdefault(column, []).extend(aggregations['numeric columns']) %}
{%- endif %}
{%- endfor %}
{%- set _ = aggregations.pop('numeric columns') %}
{%- set all_columns = get_columns(source_table) %}
{%- for column, column_type in all_columns.items() %}
{%- if column not in aggregations.keys() and column_type|lower in ['int', 'integer', 'bigint', 'smallint', 'number', 'numeric', 'float', 'float4', 'float8', 'decimal', 'double precision', 'real'] %}
{%- do aggregations.setdefault(column, []).extend(aggregations['numeric columns']) %}
{%- endif %}
{%- endfor %}
{%- set _ = aggregations.pop('numeric columns') %}
{%- endif -%}

{%- if 'nonnumeric columns' in aggregations.keys() and aggregations['nonnumeric columns']|length > 0 %}
{%- set all_columns = all_columns if all_columns is defined else get_columns(source_table) %}
{%- for column, column_type in all_columns.items() %}
{%- if column not in aggregations.keys() and column_type|lower not in ['int', 'integer', 'bigint', 'smallint', 'number', 'numeric', 'float', 'float4', 'float8', 'decimal', 'double precision', 'real'] %}
{%- do aggregations.setdefault(column, []).extend(aggregations['nonnumeric columns']) %}
{%- endif %}
{%- endfor %}
{%- set _ = aggregations.pop('nonnumeric columns') %}
{%- set all_columns = all_columns if all_columns is defined else get_columns(source_table) %}
{%- for column, column_type in all_columns.items() %}
{%- if column not in aggregations.keys() and column_type|lower not in ['int', 'integer', 'bigint', 'smallint', 'number', 'numeric', 'float', 'float4', 'float8', 'decimal', 'double precision', 'real'] %}
{%- do aggregations.setdefault(column, []).extend(aggregations['nonnumeric columns']) %}
{%- endif %}
{%- endfor %}
{%- set _ = aggregations.pop('nonnumeric columns') %}
{%- endif -%}

SELECT
{%- for group_item in group_by %}
{{ group_item }},
{%- endfor -%}
select
{%- for group_item in group_by %} {{ group_item }}, {%- endfor -%}

{%- for col, aggs in aggregations.items() %}
{%- for col, aggs in aggregations.items() %}
{%- set outer_loop = loop -%}
{%- for agg in aggs %}
{%- if ' DISTINCT' in agg|upper %}
{{ agg|upper|replace(" DISTINCT", "") }}(DISTINCT {{ col }}) as {{ col ~ '_' ~ agg|upper|replace(" DISTINCT", "") ~ 'DISTINCT'}}{{ '' if loop.last and outer_loop.last else ',' }}
{%- else %}
{{ agg }}({{ col }}) as {{ col + '_' + agg }}{{ '' if loop.last and outer_loop.last else ',' }}
{%- endif %}
{%- if ' DISTINCT' in agg|upper %}
{{ agg|upper|replace(" DISTINCT", "") }} (distinct {{ col }})
as {{ col ~ '_' ~ agg|upper|replace(" DISTINCT", "") ~ 'DISTINCT' }}{{ '' if loop.last and outer_loop.last else ',' }}
{%- else %}
{{ agg }} (
{{ col }}
) as {{ col + '_' + agg }}{{ '' if loop.last and outer_loop.last else ',' }}
{%- endif %}
{%- endfor -%}
{%- endfor %}
FROM {{ source_table }}
GROUP BY {{ group_by | join(', ') }}
{%- endfor %}
from {{ source_table }}
group by {{ group_by | join(', ') }}
Original file line number Diff line number Diff line change
@@ -1,116 +1,125 @@
{%- set aggregations = aggregations.copy() %}
{%- if 'numeric columns' in aggregations.keys() and aggregations['numeric columns']|length > 0 %}
{%- set all_columns = get_columns(source_table) %}
{%- for column, column_type in all_columns.items() %}
{%- if column not in aggregations.keys() and column_type|lower in ['int', 'integer', 'bigint', 'smallint', 'number', 'numeric', 'float', 'float4', 'float8', 'decimal', 'double precision', 'real'] %}
{%- do aggregations.setdefault(column, []).extend(aggregations['numeric columns']) %}
{%- endif %}
{%- endfor %}
{%- set _ = aggregations.pop('numeric columns') %}
{%- set all_columns = get_columns(source_table) %}
{%- for column, column_type in all_columns.items() %}
{%- if column not in aggregations.keys() and column_type|lower in ['int', 'integer', 'bigint', 'smallint', 'number', 'numeric', 'float', 'float4', 'float8', 'decimal', 'double precision', 'real'] %}
{%- do aggregations.setdefault(column, []).extend(aggregations['numeric columns']) %}
{%- endif %}
{%- endfor %}
{%- set _ = aggregations.pop('numeric columns') %}
{%- endif -%}

{%- if 'nonnumeric columns' in aggregations.keys() and aggregations['nonnumeric columns']|length > 0 %}
{%- set all_columns = all_columns if all_columns is defined else get_columns(source_table) %}
{%- for column, column_type in all_columns.items() %}
{%- if column not in aggregations.keys() and column_type|lower not in ['int', 'integer', 'bigint', 'smallint', 'number', 'numeric', 'float', 'float4', 'float8', 'decimal', 'double precision', 'real'] %}
{%- do aggregations.setdefault(column, []).extend(aggregations['nonnumeric columns']) %}
{%- endif %}
{%- endfor %}
{%- set _ = aggregations.pop('nonnumeric columns') %}
{%- set all_columns = all_columns if all_columns is defined else get_columns(source_table) %}
{%- for column, column_type in all_columns.items() %}
{%- if column not in aggregations.keys() and column_type|lower not in ['int', 'integer', 'bigint', 'smallint', 'number', 'numeric', 'float', 'float4', 'float8', 'decimal', 'double precision', 'real'] %}
{%- do aggregations.setdefault(column, []).extend(aggregations['nonnumeric columns']) %}
{%- endif %}
{%- endfor %}
{%- set _ = aggregations.pop('nonnumeric columns') %}
{%- endif -%}

{%- set median_aggs = dict() -%}
{%- set mode_aggs = dict() -%}
{%- for col, aggs in aggregations.items() -%}
{%- for agg in aggs -%}
{%- if 'MEDIAN' in agg|upper -%}
{%- set _ = median_aggs.update({col: agg}) -%}
{%- elif 'MODE' in agg|upper -%}
{%- set _ = mode_aggs.update({col: agg}) -%}
{%- endif -%}
{%- endfor -%}
{%- for agg in aggs -%}
{%- if 'MEDIAN' in agg|upper -%}
{%- set _ = median_aggs.update({col: agg}) -%}
{%- elif 'MODE' in agg|upper -%}
{%- set _ = mode_aggs.update({col: agg}) -%}
{%- endif -%}
{%- endfor -%}
{%- endfor -%}

{%- if median_aggs -%}
WITH MEDIAN_CTE AS(
SELECT
DISTINCT {{ group_by | join(', ') }}
with
median_cte as (
select distinct
{{ group_by | join(', ') }}
{%- for med_col, med_agg in median_aggs.items() %}
,PERCENTILE_CONT( {{ med_col }}, 0.5) OVER (PARTITION BY {{ group_by | join(', ') }}) AS {{ med_col }}_MEDIAN
,
percentile_cont({{ med_col }}, 0.5) over (
partition by {{ group_by | join(', ') }}
) as {{ med_col }}_median
{%- endfor %}
FROM {{ source_table }}
from {{ source_table }}
),
{%- endif -%}

{%- if mode_aggs -%}
{%- if not median_aggs %}
WITH
{%- endif %}
{%- if not median_aggs %}
with
{%- endif %}
{%- for mode_col, mode_agg in mode_aggs.items() %}
{{ mode_col }}_CTE AS (
SELECT
{{ group_by | join(',\n') }}
,{{ mode_col }} AS {{ mode_col }}_MODE
FROM (
SELECT
{{ group_by | join(', ') }}
,{{ mode_col }}
,ROW_NUMBER() OVER (PARTITION BY {{ group_by | join(', ') }} ORDER BY COUNT({{ mode_col }}) DESC) rn
FROM {{ source_table }}
GROUP BY {{ group_by | join(', ') }}, {{ mode_col }}
{{ mode_col }}_cte as (
select {{ group_by | join(',\n') }},{{ mode_col }} as {{ mode_col }}_mode
from
(
select
{{ group_by | join(', ') }},
{{ mode_col }},
row_number() over (
partition by {{ group_by | join(', ') }}
order by count({{ mode_col }}) desc
) rn
from {{ source_table }}
group by {{ group_by | join(', ') }}, {{ mode_col }}
)
WHERE rn = 1
),
where rn = 1
),
{%- endfor %}
{%- endif -%}

{%- if not (median_aggs or mode_aggs) %}
WITH
with
{%- endif %}
AGGS AS (
SELECT
{{ group_by | join(',\n') }}
{%- for col, aggs in aggregations.items() %}
aggs as (
select
{{ group_by | join(',\n') }}
{%- for col, aggs in aggregations.items() %}
{%- set outer_loop = loop -%}
{%- for agg in aggs %}
{%- if ('MEDIAN' not in agg|upper and 'MODE' not in agg|upper) %}
{%- if ' DISTINCT' in agg|upper %}
,{{ agg|replace(" DISTINCT", "") }}(DISTINCT {{ col }}) as {{ col ~ '_' ~ agg|replace(" DISTINCT", "") ~ 'DISTINCT'}}
{%- else %}
,{{ agg }}({{ col }}) as {{ col + '_' + agg }}
{%- endif %}
{%- endif %}
{%- if ('MEDIAN' not in agg|upper and 'MODE' not in agg|upper) %}
{%- if ' DISTINCT' in agg|upper %}
,
{{ agg|replace(" DISTINCT", "") }} (
distinct {{ col }}
) as {{ col ~ '_' ~ agg|replace(" DISTINCT", "") ~ 'DISTINCT' }}
{%- else %},{{ agg }} ({{ col }}) as {{ col + '_' + agg }}
{%- endif %}
{%- endif %}
{%- endfor -%}
{%- endfor %}
FROM {{ source_table }}
GROUP BY {{ group_by | join(', ') }}
)
SELECT
a.*
{%- if median_aggs %}
{%- endfor %}
from {{ source_table }}
group by {{ group_by | join(', ') }}
)
select
a.*
{%- if median_aggs %}
{%- for med_col, med_agg in median_aggs.items() %}
,med.{{ med_col }}_{{ med_agg }}
, med.{{ med_col }}_{{ med_agg }}
{%- endfor %}
{%- endif %}
{%- if mode_aggs %}
{%- endif %}
{%- if mode_aggs %}
{%- for mode_col, mode_agg in mode_aggs.items() %}
,{{ mode_col }}_CTE.{{ mode_col }}_MODE
,{{ mode_col }}_cte.{{ mode_col }}_mode
{%- endfor %}
{%- endif %}
FROM AGGS a
{%- endif %}
from aggs a
{%- if median_aggs %}
LEFT JOIN MEDIAN_CTE med
ON
{%- for group_col in group_by %}
{{'a.' + group_col + ' = med.' + group_col + (' AND' if not loop.last else '')}}
left join
median_cte med
on {%- for group_col in group_by %}
{{ 'a.' + group_col + ' = med.' + group_col + (' AND' if not loop.last else '') }}
{%- endfor %}
{%- endif %}
{%- if mode_aggs %}
{%- for mode_col, mode_agg in mode_aggs.items() %}
LEFT JOIN {{ mode_col }}_CTE
ON
{%- for group_col in group_by %}
a.{{ group_col }} = {{ mode_col }}_CTE.{{ group_col }} {{ 'AND ' if not loop.last else '' }}
{%- endfor %}
{%- for mode_col, mode_agg in mode_aggs.items() %}
left join
{{ mode_col }}_cte
on {%- for group_col in group_by %}
a.{{ group_col }}
= {{ mode_col }}_cte.{{ group_col }} {{ 'AND ' if not loop.last else '' }}
{%- endfor %}
{%- endfor %}
{%- endif %}
Original file line number Diff line number Diff line change
@@ -1,37 +1,38 @@
{%- set aggregations = aggregations.copy() %}
{%- if 'numeric columns' in aggregations.keys() and aggregations['numeric columns']|length > 0 %}
{%- set all_columns = get_columns(source_table) %}
{%- for column, column_type in all_columns.items() %}
{%- if column not in aggregations.keys() and column_type|lower in ['int', 'integer', 'bigint', 'smallint', 'number', 'numeric', 'float', 'float4', 'float8', 'decimal', 'double precision', 'real'] %}
{%- do aggregations.setdefault(column, []).extend(aggregations['numeric columns']) %}
{%- endif %}
{%- endfor %}
{%- set _ = aggregations.pop('numeric columns') %}
{%- set all_columns = get_columns(source_table) %}
{%- for column, column_type in all_columns.items() %}
{%- if column not in aggregations.keys() and column_type|lower in ['int', 'integer', 'bigint', 'smallint', 'number', 'numeric', 'float', 'float4', 'float8', 'decimal', 'double precision', 'real'] %}
{%- do aggregations.setdefault(column, []).extend(aggregations['numeric columns']) %}
{%- endif %}
{%- endfor %}
{%- set _ = aggregations.pop('numeric columns') %}
{%- endif -%}

{%- if 'nonnumeric columns' in aggregations.keys() and aggregations['nonnumeric columns']|length > 0 %}
{%- set all_columns = all_columns if all_columns is defined else get_columns(source_table) %}
{%- for column, column_type in all_columns.items() %}
{%- if column not in aggregations.keys() and column_type|lower not in ['int', 'integer', 'bigint', 'smallint', 'number', 'numeric', 'float', 'float4', 'float8', 'decimal', 'double precision', 'real'] %}
{%- do aggregations.setdefault(column, []).extend(aggregations['nonnumeric columns']) %}
{%- endif %}
{%- endfor %}
{%- set _ = aggregations.pop('nonnumeric columns') %}
{%- set all_columns = all_columns if all_columns is defined else get_columns(source_table) %}
{%- for column, column_type in all_columns.items() %}
{%- if column not in aggregations.keys() and column_type|lower not in ['int', 'integer', 'bigint', 'smallint', 'number', 'numeric', 'float', 'float4', 'float8', 'decimal', 'double precision', 'real'] %}
{%- do aggregations.setdefault(column, []).extend(aggregations['nonnumeric columns']) %}
{%- endif %}
{%- endfor %}
{%- set _ = aggregations.pop('nonnumeric columns') %}
{%- endif -%}

SELECT
{%- for group_item in group_by %}
{{ group_item }},
{%- endfor %}
{%- for col, aggs in aggregations.items() %}
select
{%- for group_item in group_by %} {{ group_item }}, {%- endfor %}
{%- for col, aggs in aggregations.items() %}
{%- set outer_loop = loop -%}
{%- for agg in aggs|unique %}
{%- if ' DISTINCT' in agg|upper %}
{{ agg|upper|replace(" DISTINCT", "") }}(DISTINCT {{ col }}) as {{ col ~ '_' ~ agg|upper|replace(" DISTINCT", "") ~ 'DISTINCT'}}{{ '' if loop.last and outer_loop.last else ',' }}
{%- else %}
{{ agg }}({{ col }}) as {{ col + '_' + agg }}{{ '' if loop.last and outer_loop.last else ',' }}
{%- endif %}
{%- if ' DISTINCT' in agg|upper %}
{{ agg|upper|replace(" DISTINCT", "") }} (distinct {{ col }})
as {{ col ~ '_' ~ agg|upper|replace(" DISTINCT", "") ~ 'DISTINCT' }}{{ '' if loop.last and outer_loop.last else ',' }}
{%- else %}
{{ agg }} (
{{ col }}
) as {{ col + '_' + agg }}{{ '' if loop.last and outer_loop.last else ',' }}
{%- endif %}
{%- endfor -%}
{%- endfor %}
FROM {{ source_table }}
GROUP BY {{ group_by | join(', ') }}
{%- endfor %}
from {{ source_table }}
group by {{ group_by | join(', ') }}
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
SELECT {{ group_by | join(', ') }}
{%- for agg_column in agg_columns %}
, listagg({{ 'distinct ' if distinct else ''}} {{agg_column}}, '{{sep}}')
WITHIN group (order by {{agg_column}} {{order}}) as {{agg_column}}_listagg
{%- endfor %}
FROM {{ source_table }}
GROUP BY {{ group_by | join(', ') }}
select
{{ group_by | join(', ') }}
{%- for agg_column in agg_columns %}
,
listagg(
{{ 'distinct ' if distinct else '' }} {{ agg_column }}, '{{sep}}'
) within group (order by {{ agg_column }} {{ order }}) as {{ agg_column }}_listagg
{%- endfor %}
from {{ source_table }}
group by {{ group_by | join(', ') }}
8 changes: 5 additions & 3 deletions rasgotransforms/rasgotransforms/transforms/apply/apply.sql
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
{# Placeholder code. Will be replaced by user supplied template #}
SELECT * FROM {{ source_table }}
{{ raise_exception('Placeholder code must be replaced by user supplied template') }}
{# Placeholder code. Will be replaced by user supplied template #}
select *
from
{{ source_table }}
{{ raise_exception('Placeholder code must be replaced by user supplied template') }}
Loading