Skip to content

Commit 69e9a8a

Browse files
author
rmoff
committed
working ingestion
1 parent a0b03da commit 69e9a8a

File tree

8 files changed

+590
-153
lines changed

8 files changed

+590
-153
lines changed

postgres-to-snowflake-with-cdc/README.adoc

Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,3 +21,143 @@ Once up, find out your Postgres server host/post that is available on the intern
2121
curl -s localhost:4040/api/tunnels | jq -r '.tunnels[0].public_url' | sed 's/tcp:\/\///g'
2222
----
2323

24+
== Steps
25+
26+
=== Configure Postgres tables for replication
27+
28+
[source,bash]
29+
----
30+
docker exec -it postgres psql -h localhost -U postgres -d postgres -f /data/replication-config.sql
31+
----
32+
33+
[source,sql]
34+
----
35+
ALTER TABLE customers REPLICA IDENTITY FULL;
36+
ALTER TABLE pets REPLICA IDENTITY FULL;
37+
ALTER TABLE appointments REPLICA IDENTITY FULL;
38+
----
39+
40+
Check their replica status; each should show `f`:
41+
42+
[source,sql]
43+
----
44+
SELECT relreplident FROM pg_class
45+
WHERE oid in ( 'customers'::regclass, 'pets'::regclass, 'appointments'::regclass);
46+
----
47+
48+
=== Add PKs to the Postgres tables
49+
50+
[source,bash]
51+
----
52+
docker exec -it postgres psql -h localhost -U postgres -d postgres -f /data/add-pk.sql
53+
----
54+
55+
56+
[source,sql]
57+
----
58+
alter table customers
59+
alter column customer_id set not null;
60+
alter table customers
61+
add constraint pk_customers primary key (customer_id);
62+
63+
alter table pets
64+
alter column pet_id set not null;
65+
alter table pets
66+
add constraint pk_pets primary key (pet_id);
67+
68+
alter table appointments
69+
alter column appointment_id set not null;
70+
alter table appointments
71+
add constraint pk_appointments primary key (appointment_id );
72+
----
73+
74+
75+
=== Store the password
76+
77+
[source,bash]
78+
----
79+
decodable apply decodable/pg-secret.yaml
80+
----
81+
82+
[source,yaml]
83+
----
84+
---
85+
kind: secret
86+
name: omd-pg
87+
id: ee94bd72
88+
result: updated
89+
• Wrote plaintext values for secret IDs: [ee94bd72]
90+
----
91+
92+
=== Generate resource definitions
93+
94+
[source,bash]
95+
----
96+
decodable connection scan \
97+
--name oh-my-dawg-pg \
98+
--connector postgres-cdc \
99+
--type source \
100+
--prop hostname=$(curl -s localhost:4040/api/tunnels | jq -r '.tunnels[0].public_url' | sed 's/tcp:\/\///g' | cut -d':' -f1) \
101+
--prop port=$(curl -s localhost:4040/api/tunnels | jq -r '.tunnels[0].public_url' | sed 's/tcp:\/\///g' | cut -d':' -f2) \
102+
--prop database-name=postgres \
103+
--prop username=postgres \
104+
--prop password=$(decodable query --name omd-pg --keep-ids | yq '.metadata.id') \
105+
--include-pattern schema-name=public \
106+
--output-resource-name-template stream-name="omd-{table-name}" \
107+
> omd-pg.yaml
108+
----
109+
110+
=== Edit resource definitions
111+
112+
113+
- set to active
114+
115+
[source,yaml]
116+
----
117+
spec_version: v2
118+
spec:
119+
execution:
120+
active: true
121+
----
122+
123+
- add tags
124+
125+
[source,yaml]
126+
----
127+
metadata:
128+
tags:
129+
project: "oh-my-dawg"
130+
author: "rmoff"
131+
----
132+
133+
=== Apply resource definitions
134+
135+
[source,bash]
136+
----
137+
decodable apply omd-pg.yaml
138+
----
139+
140+
[source,yaml]
141+
----
142+
---
143+
kind: connection
144+
name: oh-my-dawg-pg
145+
id: 6d02ba15
146+
result: created
147+
---
148+
kind: stream
149+
name: omd-appointments
150+
id: 975bba8d
151+
result: created
152+
---
153+
kind: stream
154+
name: omd-customers
155+
id: 7885b32e
156+
result: created
157+
---
158+
kind: stream
159+
name: omd-pets
160+
id: 3cc8e060
161+
result: created
162+
----
163+
Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,135 @@
1+
---
2+
kind: connection
3+
metadata:
4+
name: oh-my-dawg-pg
5+
description: ""
6+
tags:
7+
project: "oh-my-dawg"
8+
author: "rmoff"
9+
spec_version: v2
10+
spec:
11+
execution:
12+
active: true
13+
connector: postgres-cdc
14+
properties:
15+
database-name: postgres
16+
hostname: 2.tcp.eu.ngrok.io
17+
password: omd-pg
18+
port: "19366"
19+
username: postgres
20+
stream_mappings:
21+
- stream_name: omd-appointments
22+
external_resource_specifier:
23+
database-name: postgres
24+
schema-name: public
25+
table-name: appointments
26+
- stream_name: omd-customers
27+
external_resource_specifier:
28+
database-name: postgres
29+
schema-name: public
30+
table-name: customers
31+
- stream_name: omd-pets
32+
external_resource_specifier:
33+
database-name: postgres
34+
schema-name: public
35+
table-name: pets
36+
type: source
37+
---
38+
kind: stream
39+
metadata:
40+
name: omd-appointments
41+
tags:
42+
project: "oh-my-dawg"
43+
author: "rmoff"
44+
spec_version: v1
45+
spec:
46+
schema_v2:
47+
constraints:
48+
primary_key:
49+
- appointment_id
50+
fields:
51+
- kind: physical
52+
name: appointment_id
53+
type: STRING NOT NULL
54+
- kind: physical
55+
name: duration_minutes
56+
type: BIGINT
57+
- kind: physical
58+
name: appointment_time
59+
type: STRING
60+
- kind: physical
61+
name: status
62+
type: STRING
63+
- kind: physical
64+
name: pet_id
65+
type: STRING
66+
- kind: physical
67+
name: customer_id
68+
type: STRING
69+
- kind: physical
70+
name: service_type
71+
type: STRING
72+
- kind: physical
73+
name: price
74+
type: DOUBLE
75+
- kind: physical
76+
name: last_updated
77+
type: STRING
78+
---
79+
kind: stream
80+
metadata:
81+
name: omd-customers
82+
tags:
83+
project: "oh-my-dawg"
84+
author: "rmoff"
85+
spec_version: v1
86+
spec:
87+
schema_v2:
88+
constraints:
89+
primary_key:
90+
- customer_id
91+
fields:
92+
- kind: physical
93+
name: customer_id
94+
type: STRING NOT NULL
95+
- kind: physical
96+
name: first_name
97+
type: STRING
98+
- kind: physical
99+
name: last_name
100+
type: STRING
101+
- kind: physical
102+
name: phone
103+
type: STRING
104+
- kind: physical
105+
name: email
106+
type: STRING
107+
- kind: physical
108+
name: registered_date
109+
type: STRING
110+
---
111+
kind: stream
112+
metadata:
113+
name: omd-pets
114+
tags:
115+
project: "oh-my-dawg"
116+
author: "rmoff"
117+
spec_version: v1
118+
spec:
119+
schema_v2:
120+
constraints:
121+
primary_key:
122+
- pet_id
123+
fields:
124+
- kind: physical
125+
name: pet_id
126+
type: STRING NOT NULL
127+
- kind: physical
128+
name: customer_id
129+
type: STRING
130+
- kind: physical
131+
name: pet_name
132+
type: STRING
133+
- kind: physical
134+
name: pet_type
135+
type: STRING
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
---
2+
kind: secret
3+
metadata:
4+
name: omd-pg
5+
spec_version: v1
6+
spec:
7+
value_literal: Welcome123

postgres-to-snowflake-with-cdc/docker-compose.yml

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,17 @@ services:
1313
- 4040:4040 # Web dashboard for ngrok
1414

1515
postgres:
16-
image: postgres:latest
17-
ports:
18-
- 5432:5432
1916
container_name: postgres
17+
image: quay.io/debezium/example-postgres:2.3
18+
ports:
19+
- "5432:5432"
2020
environment:
21-
POSTGRES_PASSWORD: Welcome123
21+
- POSTGRES_DB=postgres
22+
- POSTGRES_USER=postgres
23+
- POSTGRES_PASSWORD=Welcome123
24+
volumes:
25+
- ./postgres/postgresql.conf.sample:/usr/share/postgresql/postgresql.conf.sample
26+
- ${PWD}/postgres:/data
2227

2328
shadowtraffic:
2429
# watch 'docker exec shadowtraffic curl -s localhost:9400/metrics |grep events_sent'
@@ -28,9 +33,10 @@ services:
2833
- shadowtraffic/license.env
2934
volumes:
3035
- ./shadowtraffic:/data
31-
command: --config /data/config.json --with-studio --sample 1000 --watch
36+
command: --config /data/config.json
37+
# --with-studio --sample 20 --watch
3238
ports:
33-
- 8080:8080
39+
# - 8080:8080
3440
- 9400:9400
3541

3642
networks:
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
alter table customers
2+
alter column customer_id set not null;
3+
alter table customers
4+
add constraint pk_customers primary key (customer_id);
5+
6+
alter table pets
7+
alter column pet_id set not null;
8+
alter table pets
9+
add constraint pk_pets primary key (pet_id);
10+
11+
alter table appointments
12+
alter column appointment_id set not null;
13+
alter table appointments
14+
add constraint pk_appointments primary key (appointment_id );
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
# LOGGING
2+
# log_min_error_statement = fatal
3+
# log_min_messages = DEBUG1
4+
5+
# CONNECTION
6+
listen_addresses = '*'
7+
8+
# MODULES
9+
shared_preload_libraries = 'decoderbufs'
10+
11+
# REPLICATION
12+
wal_level = logical # minimal, archive, hot_standby, or logical (change requires restart)
13+
max_wal_senders = 10 # max number of walsender processes (change requires restart)
14+
#wal_keep_segments = 4 # in logfile segments, 16MB each; 0 disables
15+
#wal_sender_timeout = 60s # in milliseconds; 0 disables
16+
max_replication_slots = 10 # max number of replication slots (change requires restart)
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
ALTER TABLE customers REPLICA IDENTITY FULL;
2+
ALTER TABLE pets REPLICA IDENTITY FULL;
3+
ALTER TABLE appointments REPLICA IDENTITY FULL;

0 commit comments

Comments
 (0)