|
22 | 22 | sc = SparkContext()
|
23 | 23 | glueContext = GlueContext(sc)
|
24 | 24 | spark = glueContext.spark_session
|
| 25 | +base = "s3://polars-pdsh/scale-factor-100.0/200/" |
25 | 26 |
|
26 | 27 | # Read arbitrary dataset from S3
|
27 | 28 | lineitem = glueContext.create_dynamic_frame.from_options(
|
28 | 29 | connection_type="s3",
|
29 |
| - connection_options={"paths": ["s3://polars-pdsh/scale-factor-100.0/200/lineitem/"]}, |
30 |
| - format="parquet", # Change to "json", "csv", etc. |
| 30 | + connection_options={"paths": [base + "lineitem/"]}, |
| 31 | + format="parquet" # Change to "json", "csv", etc. |
31 | 32 | ).toDF()
|
32 | 33 |
|
33 | 34 | customer = glueContext.create_dynamic_frame.from_options(
|
34 | 35 | connection_type="s3",
|
35 |
| - connection_options={"paths": ["s3://polars-pdsh/scale-factor-100.0/200/customer/"]}, |
36 |
| - format="parquet", # Change to "json", "csv", etc. |
| 36 | + connection_options={"paths": [base + "customer/"]}, |
| 37 | + format="parquet" # Change to "json", "csv", etc. |
37 | 38 | ).toDF()
|
38 | 39 |
|
39 | 40 | nation = glueContext.create_dynamic_frame.from_options(
|
40 | 41 | connection_type="s3",
|
41 |
| - connection_options={"paths": ["s3://polars-pdsh/scale-factor-100.0/200/nation/"]}, |
42 |
| - format="parquet", # Change to "json", "csv", etc. |
| 42 | + connection_options={"paths": [base + "nation/"]}, |
| 43 | + format="parquet" # Change to "json", "csv", etc. |
43 | 44 | ).toDF()
|
44 | 45 |
|
45 | 46 | orders = glueContext.create_dynamic_frame.from_options(
|
46 | 47 | connection_type="s3",
|
47 |
| - connection_options={"paths": ["s3://polars-pdsh/scale-factor-100.0/200/orders/"]}, |
48 |
| - format="parquet", # Change to "json", "csv", etc. |
| 48 | + connection_options={"paths": [base + "orders/"]}, |
| 49 | + format="parquet" # Change to "json", "csv", etc. |
49 | 50 | ).toDF()
|
50 | 51 |
|
51 | 52 | part = glueContext.create_dynamic_frame.from_options(
|
52 | 53 | connection_type="s3",
|
53 |
| - connection_options={"paths": ["s3://polars-pdsh/scale-factor-100.0/200/part/"]}, |
54 |
| - format="parquet", # Change to "json", "csv", etc. |
| 54 | + connection_options={"paths": [base + "part/"]}, |
| 55 | + format="parquet" # Change to "json", "csv", etc. |
55 | 56 | ).toDF()
|
56 | 57 |
|
57 | 58 | partsupp = glueContext.create_dynamic_frame.from_options(
|
58 | 59 | connection_type="s3",
|
59 |
| - connection_options={"paths": ["s3://polars-pdsh/scale-factor-100.0/200/partsupp/"]}, |
60 |
| - format="parquet", # Change to "json", "csv", etc. |
| 60 | + connection_options={"paths": [base + "partsupp/"]}, |
| 61 | + format="parquet" # Change to "json", "csv", etc. |
61 | 62 | ).toDF()
|
62 | 63 |
|
63 | 64 | region = glueContext.create_dynamic_frame.from_options(
|
64 | 65 | connection_type="s3",
|
65 |
| - connection_options={"paths": ["s3://polars-pdsh/scale-factor-100.0/200/region/"]}, |
66 |
| - format="parquet", # Change to "json", "csv", etc. |
| 66 | + connection_options={"paths": [base + "region/"]}, |
| 67 | + format="parquet" # Change to "json", "csv", etc. |
67 | 68 | ).toDF()
|
68 | 69 |
|
69 | 70 | supplier = glueContext.create_dynamic_frame.from_options(
|
70 | 71 | connection_type="s3",
|
71 |
| - connection_options={"paths": ["s3://polars-pdsh/scale-factor-100.0/200/supplier/"]}, |
72 |
| - format="parquet", # Change to "json", "csv", etc. |
| 72 | + connection_options={"paths": [base + "supplier/"]}, |
| 73 | + format="parquet" # Change to "json", "csv", etc. |
73 | 74 | ).toDF()
|
74 | 75 |
|
75 | 76 |
|
|
0 commit comments