Skip to content

Commit ce39f74

Browse files
committed
Fix linting
1 parent a0bb22f commit ce39f74

File tree

1 file changed

+15
-4
lines changed

1 file changed

+15
-4
lines changed

scripts/prepare_data.py

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,9 @@ def pipelined_data_generation(
7272
base_path.mkdir(parents=True, exist_ok=True)
7373

7474
num_dbgen_partitions = num_batches * parallelism
75-
for batch_idx, part_indices in enumerate(batch(range(1, num_dbgen_partitions + 1), n=parallelism)):
75+
for batch_idx, part_indices in enumerate(
76+
batch(range(1, num_dbgen_partitions + 1), n=parallelism)
77+
):
7678
logger.info("Partition %s: Generating CSV files", part_indices)
7779
with Pool(parallelism) as process_pool:
7880
process_pool.starmap(
@@ -209,13 +211,18 @@ def gen_parquet(
209211
lf = lf.select(columns)
210212

211213
if partitioned:
212-
def partition_file_name(ctx):
214+
215+
def partition_file_name(ctx) -> pathlib.Path:
213216
partition = f"{batch_idx}_{ctx.file_idx}"
214217
(base_path / table_name / partition).mkdir(parents=True, exist_ok=True) # noqa: B023
215218
return pathlib.Path(partition) / "part.parquet"
216219

217220
path = base_path / table_name
218-
lf.sink_parquet(pl.PartitionMaxSize(path, file_path=partition_file_name, max_size=rows_per_file))
221+
lf.sink_parquet(
222+
pl.PartitionMaxSize(
223+
path, file_path=partition_file_name, max_size=rows_per_file
224+
)
225+
)
219226
else:
220227
path = base_path / f"{table_name}.parquet"
221228
lf.sink_parquet(path)
@@ -241,7 +248,11 @@ def partition_file_name(ctx):
241248
type=int,
242249
)
243250
parser.add_argument(
244-
"--num-batches", default=None, help="Number of batches used to generate the data", type=int, nargs="?"
251+
"--num-batches",
252+
default=None,
253+
help="Number of batches used to generate the data",
254+
type=int,
255+
nargs="?",
245256
)
246257
parser.add_argument(
247258
"--aws-s3-sync-location",

0 commit comments

Comments
 (0)