Skip to content

Commit 33d1a3c

Browse files
authored
Merge pull request #218 from ScalefreeCOM/dev
Pulling in the latest dev-branch
2 parents fa7c330 + b78dcf7 commit 33d1a3c

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

67 files changed

+982
-630
lines changed

dbt_project.yml

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -48,17 +48,18 @@ vars:
4848
#Ghost Record Configuration
4949
datavault4dbt.beginning_of_all_times: {"bigquery":"0001-01-01T00-00-01","snowflake":"0001-01-01T00:00:01", "exasol": "0001-01-01 00:00:01", "postgres": "0001-01-01 00:00:01", "redshift": "0001-01-01 00:00:01", "synapse": "1901-01-01T00:00:01"}
5050
datavault4dbt.end_of_all_times: {"bigquery":"8888-12-31T23-59-59","snowflake":"8888-12-31T23:59:59", "exasol": "8888-12-31 23:59:59", "postgres": "8888-12-31 23:59:59", "redshift": "8888-12-31 23:59:59", "synapse": "8888-12-31T23:59:59"}
51-
datavault4dbt.timestamp_format: {"bigquery":"%Y-%m-%dT%H-%M-%S","snowflake":"YYYY-MM-DDTHH24:MI:SS", "exasol": "YYYY-mm-dd HH:MI:SS", "postgres": "%Y-%m-%dT%H-%M-%S", "redshift": "YYYY-MM-DD HH24:MI:SS", "synapse": 126}
51+
datavault4dbt.timestamp_format: {"bigquery":"%Y-%m-%dT%H-%M-%S","snowflake":"YYYY-MM-DDTHH24:MI:SS", "exasol": "YYYY-mm-dd HH:MI:SS", "postgres": "YYYY-MM-DD HH24:MI:SS", "redshift": "YYYY-MM-DD HH24:MI:SS", "synapse": 126}
5252

53-
datavault4dbt.beginning_of_all_times_date: {"bigquery":"0001-01-01","snowflake":"0001-01-01", "exasol": "0001-01-01", "postgres": "0001-01-01", "redshift": "0001-01-01", "synapse": "1901-01-01"}
54-
datavault4dbt.end_of_all_times_date: {"bigquery":"8888-12-31","snowflake":"8888-12-31", "exasol": "8888-12-31", "postgres": "8888-12-31", "redshift": "8888-12-31", "synapse": "8888-12-31"}
55-
datavault4dbt.date_format: {"bigquery":"%Y-%m-%d","snowflake":"YYYY-MM-DD", "exasol": "YYYY-mm-dd", "postgres": "%Y-%m-%d", "redshift": "YYYY-MM-DD", "synapse": "yyyy-MM-dd"}
53+
#datavault4dbt.beginning_of_all_times_date: {"bigquery":"0001-01-01","snowflake":"0001-01-01", "exasol": "0001-01-01", "postgres": "0001-01-01", "redshift": "0001-01-01", "synapse": "1901-01-01"}
54+
#datavault4dbt.end_of_all_times_date: {"bigquery":"8888-12-31","snowflake":"8888-12-31", "exasol": "8888-12-31", "postgres": "8888-12-31", "redshift": "8888-12-31", "synapse": "8888-12-31"}
55+
#datavault4dbt.date_format: {"bigquery":"%Y-%m-%d","snowflake":"YYYY-MM-DD", "exasol": "YYYY-mm-dd", "postgres": "YYYY-MM-DD", "redshift": "YYYY-MM-DD", "synapse": "yyyy-MM-dd"}
5656

5757
datavault4dbt.default_unknown_rsrc: 'SYSTEM'
5858
datavault4dbt.default_error_rsrc: 'ERROR'
59-
datavault4dbt.rsrc_default_dtype: 'VARCHAR(255)'
60-
datavault4dbt.stg_default_dtype: 'VARCHAR(255)'
61-
datavault4dbt.derived_columns_default_dtype: 'VARCHAR(255)'
59+
datavault4dbt.rsrc_default_dtype: {"bigquery":"STRING","snowflake":"VARCHAR", "exasol": "VARCHAR (2000000) UTF8", "postgres": "VARCHAR", "redshift": "VARCHAR", "synapse": "VARCHAR"}
60+
datavault4dbt.timestamp_default_dtype: {"bigquery":"TIMESTAMP","snowflake":"TIMESTAMP_TZ", "exasol": "TIMESTAMP(3) WITH LOCAL TIME ZONE", "postgres": "TIMESTAMPTZ", "redshift": "TIMESTAMPTZ", "synapse": "datetimeoffset"}
61+
datavault4dbt.stg_default_dtype: {"bigquery":"STRING","snowflake":"VARCHAR", "exasol": "VARCHAR (2000000) UTF8", "postgres": "VARCHAR", "redshift": "VARCHAR", "synapse": "VARCHAR"}
62+
datavault4dbt.derived_columns_default_dtype: {"bigquery":"STRING","snowflake":"VARCHAR", "exasol": "VARCHAR (2000000) UTF8", "postgres": "VARCHAR", "redshift": "VARCHAR", "synapse": "VARCHAR"}
6263

6364
#Datatype specific default values
6465
datavault4dbt.error_value__STRING: '(error)'
@@ -81,4 +82,4 @@ models:
8182
+materialized: view
8283
raw_vault:
8384
+schema: <name of your RDV schema>
84-
+materialized: table
85+
+materialized: table

macros/internal/helpers/stage_processing_macros.sql

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -123,4 +123,4 @@
123123
{%- endif %}
124124
{%- endfor -%}
125125

126-
{%- endmacro -%}
126+
{%- endmacro -%}

macros/internal/metadata_processing/escape_column_names.sql

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -139,8 +139,30 @@
139139
{%- set escape_char_left = var('escape_char_left', "") -%}
140140
{%- set escape_char_right = var('escape_char_right', "") -%}
141141

142+
{%- set escaped_column_name = escape_char_left ~ column | lower | replace(escape_char_left, '') | replace(escape_char_right, '') | trim ~ escape_char_right | indent(4) -%}
143+
144+
{%- do return(escaped_column_name) -%}
145+
146+
{%- endmacro -%}
147+
148+
{%- macro redshift__escape_column_name(column) -%}
149+
150+
{%- set escape_char_left = var('escape_char_left', '"') -%}
151+
{%- set escape_char_right = var('escape_char_right', '"') -%}
152+
153+
{%- set escaped_column_name = escape_char_left ~ column | lower | replace(escape_char_left, '') | replace(escape_char_right, '') | trim ~ escape_char_right | indent(4) -%}
154+
155+
{%- do return(escaped_column_name) -%}
156+
157+
{%- endmacro -%}
158+
159+
{%- macro exasol__escape_column_name(column) -%}
160+
161+
{%- set escape_char_left = var('escape_char_left', '') -%}
162+
{%- set escape_char_right = var('escape_char_right', '') -%}
163+
142164
{%- set escaped_column_name = escape_char_left ~ column | upper | replace(escape_char_left, '') | replace(escape_char_right, '') | trim ~ escape_char_right | indent(4) -%}
143165

144166
{%- do return(escaped_column_name) -%}
145167

146-
{%- endmacro -%}
168+
{%- endmacro -%}

macros/staging/bigquery/stage.sql

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -177,7 +177,10 @@
177177
{% set unknown_value_rsrc = var('datavault4dbt.default_unknown_rsrc', 'SYSTEM') %}
178178

179179
{# Setting the rsrc default datatype and length #}
180-
{% set rsrc_default_dtype = var('datavault4dbt.rsrc_default_dtype', 'STRING') %}
180+
{% set rsrc_default_dtype = datavault4dbt.string_default_dtype(type='rsrc') %}
181+
182+
{# Setting the ldts default datatype #}
183+
{% set ldts_default_dtype = datavault4dbt.timestamp_default_dtype() %}
181184

182185
WITH
183186

@@ -205,7 +208,7 @@ source_data AS (
205208
ldts_rsrc_data AS (
206209

207210
SELECT
208-
{{ ldts }} AS {{ load_datetime_col_name}},
211+
CAST( {{ ldts }} as {{ ldts_default_dtype }} ) AS {{ load_datetime_col_name }},
209212
CAST( {{ rsrc }} as {{ rsrc_default_dtype }} ) AS {{ record_source_col_name }}
210213
{%- if datavault4dbt.is_something(sequence) %},
211214
{{ sequence }} AS edwSequence
@@ -224,7 +227,7 @@ ldts_rsrc_data AS (
224227

225228
{%- set columns_without_excluded_columns_tmp = [] -%}
226229
{%- for column in columns_without_excluded_columns -%}
227-
{%- if column.name not in derived_column_names -%}
230+
{%- if column.name | lower not in derived_column_names | map('lower') -%}
228231
{%- do columns_without_excluded_columns_tmp.append(column) -%}
229232
{%- endif -%}
230233
{%- endfor -%}
@@ -369,7 +372,7 @@ ma_hashdiff_prep AS (
369372
SELECT
370373

371374
{% set processed_hash_columns = datavault4dbt.process_hash_column_excludes(tmp_ns.hashdiff_dict) -%}
372-
375+
373376
{{ multi_active_config['main_hashkey_column'] }},
374377
{# Generates only all hashdiffs. #}
375378
{{- datavault4dbt.hash_columns(columns=processed_hash_columns, multi_active_key=multi_active_config['multi_active_key'], main_hashkey_column=multi_active_config['main_hashkey_column']) | indent(4) }},
@@ -378,6 +381,10 @@ ma_hashdiff_prep AS (
378381
FROM main_hashkey_generation
379382
GROUP BY {{ multi_active_config['main_hashkey_column'] }}, {{ ldts_alias }}
380383

384+
385+
{% do processed_hash_columns.update(datavault4dbt.process_hash_column_excludes(tmp_ns.main_hashkey_dict)) -%} {# Add main hashkey to list of processed columns, otherwise ghost records dont get created #}
386+
387+
381388
),
382389

383390
hashed_columns AS (
@@ -389,9 +396,11 @@ hashed_columns AS (
389396
{# Generates only all remaining hashkeys, that are no hashdiffs #}
390397

391398
{%- if datavault4dbt.is_something(processed_remaining_hash_columns) %}
392-
{{- datavault4dbt.hash_columns(columns=processed_remaining_hash_columns) | indent(4) }}, {# All remaining hashed_columns get calculated. #}
399+
{{- datavault4dbt.hash_columns(columns=processed_remaining_hash_columns) | indent(4) }},
400+
{%- do processed_hash_columns.update(datavault4dbt.process_hash_column_excludes(tmp_ns.remaining_hashed_columns)) -%} {# All remaining hashed_columns get calculated. #}
393401
{% endif -%}
394402

403+
395404
{{ datavault4dbt.print_list(datavault4dbt.escape_column_names(tmp_ns.hashdiff_names)) }}, {# All MA Hashdiffs are selected. #}
396405
main_hashkey_generation.{{ multi_active_config['main_hashkey_column'] }} {# Main Hashkey selected. #}
397406

macros/staging/derived_column_datatypes.sql

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
{%- if not datavault4dbt.is_attribute(column_value) -%}
2525
{# If the value is a static value, it is not an attribute and no datatype needs to be detected. Instead a default datatype is applied. #}
2626

27-
{%- set datatype = var('datavault4dbt.derived_columns_default_dtype', 'STRING') -%}
27+
{%- set datatype = datavault4dbt.string_default_dtype(type='derived_columns') -%}
2828
{%- set value = column_value -%}
2929
{%- set col_size = "" -%}
3030

macros/staging/exasol/stage.sql

Lines changed: 33 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -172,7 +172,10 @@
172172
{% set unknown_value_rsrc = var('datavault4dbt.default_unknown_rsrc', 'SYSTEM') %}
173173

174174
{# Setting the rsrc default datatype and length #}
175-
{% set rsrc_default_dtype = var('datavault4dbt.rsrc_default_dtype', 'VARCHAR (2000000) UTF8') %}
175+
{% set rsrc_default_dtype = datavault4dbt.string_default_dtype(type='rsrc') %}
176+
177+
{# Setting the ldts default datatype #}
178+
{% set ldts_default_dtype = datavault4dbt.timestamp_default_dtype() %}
176179

177180
WITH
178181

@@ -194,7 +197,7 @@ source_data AS (
194197
ldts_rsrc_data AS (
195198

196199
SELECT
197-
{{ ldts }} AS {{ load_datetime_col_name}},
200+
CAST( {{ ldts }} as {{ ldts_default_dtype }} ) AS {{ load_datetime_col_name }},
198201
CAST( {{ rsrc }} as {{ rsrc_default_dtype }} ) AS {{ record_source_col_name }}
199202
{%- if datavault4dbt.is_something(sequence) %},
200203
{{ sequence }} AS edwSequence
@@ -209,6 +212,14 @@ ldts_rsrc_data AS (
209212

210213
{%- set last_cte = "ldts_rsrc_data" -%}
211214
{%- set final_columns_to_select = alias_columns + final_columns_to_select %}
215+
216+
{%- set columns_without_excluded_columns_tmp = [] -%}
217+
{%- for column in columns_without_excluded_columns -%}
218+
{%- if column.name | lower not in derived_column_names | map('lower') -%}
219+
{%- do columns_without_excluded_columns_tmp.append(column) -%}
220+
{%- endif -%}
221+
{%- endfor -%}
222+
{%- set columns_without_excluded_columns = columns_without_excluded_columns_tmp |list -%}
212223
),
213224

214225
{%- if datavault4dbt.is_something(missing_columns) %}
@@ -302,6 +313,8 @@ prejoined_columns AS (
302313
{# Adding derived columns to the selection #}
303314
derived_columns AS (
304315

316+
{%- set final_columns_to_select = datavault4dbt.process_columns_to_select(final_columns_to_select, derived_column_names) -%}
317+
305318
SELECT
306319
{% if final_columns_to_select | length > 0 -%}
307320
{{ datavault4dbt.print_list(datavault4dbt.escape_column_names(final_columns_to_select)) }},
@@ -318,15 +331,16 @@ derived_columns AS (
318331
{# Generating Hashed Columns (hashkeys and hashdiffs for Hubs/Links/Satellites) #}
319332
{% if datavault4dbt.is_something(multi_active_config) %}
320333

321-
{%- set tmp_ns = namespace(main_hashkey_dict={}, remaining_hashed_columns={}, hashdiff_names=[]) -%}
334+
{%- set tmp_ns = namespace(main_hashkey_dict={}, remaining_hashed_columns={}, hashdiff_names=[], hashdiff_dict={}) -%}
322335

323336
{%- for column in hashed_columns.keys() -%}
324-
{%- if column | lower == multi_active_config['main_hashkey_column']| lower and not hashed_columns[column].is_hashdiff -%}
337+
{%- if column == multi_active_config['main_hashkey_column'] and not hashed_columns[column].is_hashdiff -%}
325338
{%- do tmp_ns.main_hashkey_dict.update({column: hashed_columns[column]}) -%}
326339
{% elif column != multi_active_config['main_hashkey_column'] and not hashed_columns[column].is_hashdiff -%}
327340
{%- do tmp_ns.remaining_hashed_columns.update({column: hashed_columns[column]}) -%}
328341
{%- elif hashed_columns[column].is_hashdiff -%}
329342
{%- do tmp_ns.hashdiff_names.append(column) -%}
343+
{%- do tmp_ns.hashdiff_dict.update({column: hashed_columns[column]}) -%}
330344
{%- endif -%}
331345
{%- endfor -%}
332346

@@ -345,31 +359,38 @@ ma_hashdiff_prep AS (
345359

346360
SELECT
347361

348-
{% set processed_hash_columns = datavault4dbt.process_hash_column_excludes(hashed_columns) -%}
362+
{% set processed_hash_columns = datavault4dbt.process_hash_column_excludes(tmp_ns.hashdiff_dict) -%}
349363

364+
{{ multi_active_config['main_hashkey_column'] }},
350365
{# Generates only all hashdiffs. #}
351366
{{- datavault4dbt.hash_columns(columns=processed_hash_columns, multi_active_key=multi_active_config['multi_active_key'], main_hashkey_column=multi_active_config['main_hashkey_column']) | indent(4) }},
352367
{{ ldts_alias }}
353368

354-
FROM {{ last_cte }}
355-
GROUP BY local.{{ multi_active_config['main_hashkey_column'] }}, {{ ldts_alias }}
369+
FROM main_hashkey_generation
370+
GROUP BY {{ multi_active_config['main_hashkey_column'] }}, {{ ldts_alias }}
371+
372+
373+
{% do processed_hash_columns.update(datavault4dbt.process_hash_column_excludes(tmp_ns.main_hashkey_dict)) -%} {# Add main hashkey to list of processed columns, otherwise ghost records dont get created #}
374+
356375

357376
),
358377

359378
hashed_columns AS (
360379

361380
SELECT
362381

363-
{{ datavault4dbt.alias_all(columns=final_columns_to_select, prefix='main_hashkey_generation') }}, {# Everything from last_cte before hashed_columns. #}
382+
{{ datavault4dbt.alias_all(columns=final_columns_to_select, prefix='main_hashkey_generation') }}, {# Everything from last_cte before hashed_columns. #}
364383
{% set processed_remaining_hash_columns = datavault4dbt.process_hash_column_excludes(tmp_ns.remaining_hashed_columns) -%}
365384
{# Generates only all remaining hashkeys, that are no hashdiffs #}
366385

367386
{%- if datavault4dbt.is_something(processed_remaining_hash_columns) %}
368-
{{- datavault4dbt.hash_columns(columns=processed_remaining_hash_columns) | indent(4) }}, {# All remaining hashed_columns get calculated. #}
387+
{{- datavault4dbt.hash_columns(columns=processed_remaining_hash_columns) | indent(4) }},
388+
{%- do processed_hash_columns.update(datavault4dbt.process_hash_column_excludes(tmp_ns.remaining_hashed_columns)) -%} {# All remaining hashed_columns get calculated. #}
369389
{% endif -%}
370390

391+
371392
{{ datavault4dbt.print_list(datavault4dbt.escape_column_names(tmp_ns.hashdiff_names)) }}, {# All MA Hashdiffs are selected. #}
372-
main_hashkey_generation.{{ multi_active_config['main_hashkey_column'] }} {# Main Hashkey selected. #}
393+
main_hashkey_generation.{{ multi_active_config['main_hashkey_column'] }} {# Main Hashkey selected. #}
373394

374395
FROM main_hashkey_generation
375396
LEFT JOIN ma_hashdiff_prep
@@ -461,7 +482,7 @@ unknown_values AS (
461482
{%- if datavault4dbt.is_something(processed_hash_columns) -%},
462483

463484
{%- for hash_column in processed_hash_columns %}
464-
CAST({{ datavault4dbt.as_constant(column_str=unknown_key) }} as {{ hash_dtype }}) as "{{ hash_column }}"
485+
CAST({{ datavault4dbt.as_constant(column_str=unknown_key) }} as {{ hash_dtype }}) as {{ hash_column }}
465486
{%- if not loop.last %},{% endif %}
466487
{%- endfor -%}
467488

@@ -528,7 +549,7 @@ error_values AS (
528549
{%- if datavault4dbt.is_something(processed_hash_columns) -%},
529550

530551
{%- for hash_column in processed_hash_columns %}
531-
CAST({{ datavault4dbt.as_constant(column_str=error_key) }} as {{ hash_dtype }}) as "{{ hash_column }}"
552+
CAST({{ datavault4dbt.as_constant(column_str=error_key) }} as {{ hash_dtype }}) as {{ hash_column }}
532553
{%- if not loop.last %},{% endif %}
533554
{%- endfor -%}
534555

macros/staging/hash_columns.sql

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@
6666

6767
{%- endmacro -%}
6868

69+
6970
{%- macro redshift__hash_columns(columns, multi_active_key, main_hashkey_column) -%}
7071
{%- if columns is mapping and columns is not none -%}
7172

@@ -89,33 +90,34 @@
8990
{%- endif -%}
9091

9192

92-
{%- else -%}
93-
93+
{%- else -%}
9494
{% if columns[col] is mapping and columns[col].is_hashdiff -%}
95-
{%- if columns[col].use_rtrim -%}
96-
{%- set rtrim_hashdiff = true -%}
97-
{%- else -%}
98-
{%- set rtrim_hashdiff = false -%}
99-
{%- endif -%}
95+
10096
{{- datavault4dbt.hash(columns=columns[col]['columns'],
10197
alias=col,
102-
is_hashdiff=columns[col]['is_hashdiff'],
103-
rtrim_hashdiff=rtrim_hashdiff) -}}
98+
is_hashdiff=columns[col]['is_hashdiff']) -}}
99+
104100
{%- elif columns[col] is not mapping -%}
101+
105102
{{- datavault4dbt.hash(columns=columns[col],
106103
alias=col,
107104
is_hashdiff=false) -}}
108105

109106
{%- elif columns[col] is mapping and not columns[col].is_hashdiff -%}
107+
110108
{%- if execute -%}
111109
{%- do exceptions.warn("[" ~ this ~ "] Warning: You provided a list of columns under a 'columns' key, but did not provide the 'is_hashdiff' flag. Use list syntax for PKs.") -%}
112110
{% endif %}
111+
113112
{{- datavault4dbt.hash(columns=columns[col]['columns'], alias=col) -}}
113+
114114
{%- endif -%}
115+
115116
{{- ",\n" if not loop.last -}}
117+
116118
{%- endif -%}
117119

118120
{%- endfor -%}
121+
119122
{%- endif %}
120123
{%- endmacro -%}
121-

0 commit comments

Comments
 (0)