Skip to content

Commit 091d814

Browse files
committed
minor changes
1 parent c3e288e commit 091d814

File tree

2 files changed

+20
-19
lines changed

2 files changed

+20
-19
lines changed

scripts/aws_glue.py

Lines changed: 17 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -22,54 +22,55 @@
2222
sc = SparkContext()
2323
glueContext = GlueContext(sc)
2424
spark = glueContext.spark_session
25+
base = "s3://polars-pdsh/scale-factor-100.0/200/"
2526

2627
# Read arbitrary dataset from S3
2728
lineitem = glueContext.create_dynamic_frame.from_options(
2829
connection_type="s3",
29-
connection_options={"paths": ["s3://polars-pdsh/scale-factor-100.0/200/lineitem/"]},
30-
format="parquet", # Change to "json", "csv", etc.
30+
connection_options={"paths": [base + "lineitem/"]},
31+
format="parquet" # Change to "json", "csv", etc.
3132
).toDF()
3233

3334
customer = glueContext.create_dynamic_frame.from_options(
3435
connection_type="s3",
35-
connection_options={"paths": ["s3://polars-pdsh/scale-factor-100.0/200/customer/"]},
36-
format="parquet", # Change to "json", "csv", etc.
36+
connection_options={"paths": [base + "customer/"]},
37+
format="parquet" # Change to "json", "csv", etc.
3738
).toDF()
3839

3940
nation = glueContext.create_dynamic_frame.from_options(
4041
connection_type="s3",
41-
connection_options={"paths": ["s3://polars-pdsh/scale-factor-100.0/200/nation/"]},
42-
format="parquet", # Change to "json", "csv", etc.
42+
connection_options={"paths": [base + "nation/"]},
43+
format="parquet" # Change to "json", "csv", etc.
4344
).toDF()
4445

4546
orders = glueContext.create_dynamic_frame.from_options(
4647
connection_type="s3",
47-
connection_options={"paths": ["s3://polars-pdsh/scale-factor-100.0/200/orders/"]},
48-
format="parquet", # Change to "json", "csv", etc.
48+
connection_options={"paths": [base + "orders/"]},
49+
format="parquet" # Change to "json", "csv", etc.
4950
).toDF()
5051

5152
part = glueContext.create_dynamic_frame.from_options(
5253
connection_type="s3",
53-
connection_options={"paths": ["s3://polars-pdsh/scale-factor-100.0/200/part/"]},
54-
format="parquet", # Change to "json", "csv", etc.
54+
connection_options={"paths": [base + "part/"]},
55+
format="parquet" # Change to "json", "csv", etc.
5556
).toDF()
5657

5758
partsupp = glueContext.create_dynamic_frame.from_options(
5859
connection_type="s3",
59-
connection_options={"paths": ["s3://polars-pdsh/scale-factor-100.0/200/partsupp/"]},
60-
format="parquet", # Change to "json", "csv", etc.
60+
connection_options={"paths": [base + "partsupp/"]},
61+
format="parquet" # Change to "json", "csv", etc.
6162
).toDF()
6263

6364
region = glueContext.create_dynamic_frame.from_options(
6465
connection_type="s3",
65-
connection_options={"paths": ["s3://polars-pdsh/scale-factor-100.0/200/region/"]},
66-
format="parquet", # Change to "json", "csv", etc.
66+
connection_options={"paths": [base + "region/"]},
67+
format="parquet" # Change to "json", "csv", etc.
6768
).toDF()
6869

6970
supplier = glueContext.create_dynamic_frame.from_options(
7071
connection_type="s3",
71-
connection_options={"paths": ["s3://polars-pdsh/scale-factor-100.0/200/supplier/"]},
72-
format="parquet", # Change to "json", "csv", etc.
72+
connection_options={"paths": [base + "supplier/"]},
73+
format="parquet" # Change to "json", "csv", etc.
7374
).toDF()
7475

7576

scripts/pl_cloud.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
from queries.polars.q21 import q as q21
2727
from queries.polars.q22 import q as q22
2828

29-
pc.login()
29+
pc.authenticate()
3030

3131

3232
def _scan_ds(table_name: str) -> pl.LazyFrame:
@@ -81,8 +81,8 @@ def _scan_ds(table_name: str) -> pl.LazyFrame:
8181

8282

8383
ctx = pc.ComputeContext(
84-
workspace="ritchie-workspace",
85-
instance_type="t3.xlarge",
84+
workspace="polars-ritchie-dev",
85+
instance_type="m6.xlarge",
8686
cluster_size=10,
8787
interactive=True,
8888
)

0 commit comments

Comments
 (0)