Skip to content

Commit c66fa35

Browse files
committed
Add Iceberg table property to set the max columns number for which metrics are collected
1 parent 3544e0e commit c66fa35

File tree

10 files changed

+116
-25
lines changed

10 files changed

+116
-25
lines changed

presto-docs/src/main/sphinx/connector/iceberg.rst

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -320,6 +320,9 @@ Property Name Description
320320

321321
``iceberg.metadata-delete-after-commit`` Set to ``true`` to delete the oldest metadata files after ``false``
322322
each commit.
323+
324+
``iceberg.metrics-max-inferred-column`` The maximum number of columns for which metrics ``100``
325+
are collected.
323326
======================================================= ============================================================= ============
324327

325328
Table Properties
@@ -367,6 +370,9 @@ Property Name Description
367370

368371
``metadata_delete_after_commit`` Set to ``true`` to delete the oldest metadata file after ``false``
369372
each commit.
373+
374+
``metrics_max_inferred_column`` Optionally specifies the maximum number of columns for which ``100``
375+
metrics are collected.
370376
======================================= =============================================================== ============
371377

372378
The table definition below specifies format ``ORC``, partitioning by columns ``c1`` and ``c2``,

presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergAbstractMetadata.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,7 @@
122122
import static com.facebook.presto.iceberg.IcebergTableProperties.LOCATION_PROPERTY;
123123
import static com.facebook.presto.iceberg.IcebergTableProperties.METADATA_DELETE_AFTER_COMMIT;
124124
import static com.facebook.presto.iceberg.IcebergTableProperties.METADATA_PREVIOUS_VERSIONS_MAX;
125+
import static com.facebook.presto.iceberg.IcebergTableProperties.METRICS_MAX_INFERRED_COLUMN;
125126
import static com.facebook.presto.iceberg.IcebergTableProperties.PARTITIONING_PROPERTY;
126127
import static com.facebook.presto.iceberg.IcebergTableType.CHANGELOG;
127128
import static com.facebook.presto.iceberg.IcebergTableType.DATA;
@@ -561,6 +562,7 @@ protected ImmutableMap<String, Object> createMetadataProperties(Table icebergTab
561562
properties.put(DELETE_MODE, IcebergUtil.getDeleteMode(icebergTable));
562563
properties.put(METADATA_PREVIOUS_VERSIONS_MAX, IcebergUtil.getMetadataPreviousVersionsMax(icebergTable));
563564
properties.put(METADATA_DELETE_AFTER_COMMIT, IcebergUtil.isMetadataDeleteAfterCommit(icebergTable));
565+
properties.put(METRICS_MAX_INFERRED_COLUMN, IcebergUtil.getMetricsMaxInferredColumn(icebergTable));
564566

565567
return properties.build();
566568
}

presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergConfig.java

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
import static org.apache.iceberg.CatalogProperties.IO_MANIFEST_CACHE_MAX_TOTAL_BYTES_DEFAULT;
3939
import static org.apache.iceberg.TableProperties.METADATA_DELETE_AFTER_COMMIT_ENABLED_DEFAULT;
4040
import static org.apache.iceberg.TableProperties.METADATA_PREVIOUS_VERSIONS_MAX_DEFAULT;
41+
import static org.apache.iceberg.TableProperties.METRICS_MAX_INFERRED_COLUMN_DEFAULTS_DEFAULT;
4142

4243
public class IcebergConfig
4344
{
@@ -57,6 +58,7 @@ public class IcebergConfig
5758
private int rowsForMetadataOptimizationThreshold = 1000;
5859
private int metadataPreviousVersionsMax = METADATA_PREVIOUS_VERSIONS_MAX_DEFAULT;
5960
private boolean metadataDeleteAfterCommit = METADATA_DELETE_AFTER_COMMIT_ENABLED_DEFAULT;
61+
private int metricsMaxInferredColumn = METRICS_MAX_INFERRED_COLUMN_DEFAULTS_DEFAULT;
6062

6163
private EnumSet<ColumnStatisticType> hiveStatisticsMergeFlags = EnumSet.noneOf(ColumnStatisticType.class);
6264
private String fileIOImpl = HadoopFileIO.class.getName();
@@ -380,4 +382,17 @@ public IcebergConfig setMetadataDeleteAfterCommit(boolean metadataDeleteAfterCom
380382
this.metadataDeleteAfterCommit = metadataDeleteAfterCommit;
381383
return this;
382384
}
385+
386+
public int getMetricsMaxInferredColumn()
387+
{
388+
return metricsMaxInferredColumn;
389+
}
390+
391+
@Config("iceberg.metrics-max-inferred-column")
392+
@ConfigDescription("The maximum number of columns for which metrics are collected")
393+
public IcebergConfig setMetricsMaxInferredColumn(int metricsMaxInferredColumn)
394+
{
395+
this.metricsMaxInferredColumn = metricsMaxInferredColumn;
396+
return this;
397+
}
383398
}

presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergTableProperties.java

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ public class IcebergTableProperties
4343
public static final String DELETE_MODE = "delete_mode";
4444
public static final String METADATA_PREVIOUS_VERSIONS_MAX = "metadata_previous_versions_max";
4545
public static final String METADATA_DELETE_AFTER_COMMIT = "metadata_delete_after_commit";
46+
public static final String METRICS_MAX_INFERRED_COLUMN = "metrics_max_inferred_column";
4647
private static final String DEFAULT_FORMAT_VERSION = "2";
4748

4849
private final List<PropertyMetadata<?>> tableProperties;
@@ -106,6 +107,11 @@ public IcebergTableProperties(IcebergConfig icebergConfig)
106107
"Whether enables to delete the oldest metadata file after commit",
107108
icebergConfig.isMetadataDeleteAfterCommit(),
108109
false))
110+
.add(integerProperty(
111+
METRICS_MAX_INFERRED_COLUMN,
112+
"The maximum number of columns for which metrics are collected",
113+
icebergConfig.getMetricsMaxInferredColumn(),
114+
false))
109115
.build();
110116

111117
columnProperties = ImmutableList.of(stringProperty(
@@ -166,4 +172,9 @@ public static Boolean isMetadataDeleteAfterCommit(Map<String, Object> tablePrope
166172
{
167173
return (Boolean) tableProperties.get(METADATA_DELETE_AFTER_COMMIT);
168174
}
175+
176+
public static Integer getMetricsMaxInferredColumn(Map<String, Object> tableProperties)
177+
{
178+
return (Integer) tableProperties.get(METRICS_MAX_INFERRED_COLUMN);
179+
}
169180
}

presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergUtil.java

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -189,6 +189,8 @@
189189
import static org.apache.iceberg.TableProperties.METADATA_DELETE_AFTER_COMMIT_ENABLED_DEFAULT;
190190
import static org.apache.iceberg.TableProperties.METADATA_PREVIOUS_VERSIONS_MAX;
191191
import static org.apache.iceberg.TableProperties.METADATA_PREVIOUS_VERSIONS_MAX_DEFAULT;
192+
import static org.apache.iceberg.TableProperties.METRICS_MAX_INFERRED_COLUMN_DEFAULTS;
193+
import static org.apache.iceberg.TableProperties.METRICS_MAX_INFERRED_COLUMN_DEFAULTS_DEFAULT;
192194
import static org.apache.iceberg.TableProperties.ORC_COMPRESSION;
193195
import static org.apache.iceberg.TableProperties.PARQUET_COMPRESSION;
194196
import static org.apache.iceberg.TableProperties.UPDATE_MODE;
@@ -1133,6 +1135,9 @@ public static Map<String, String> populateTableProperties(ConnectorTableMetadata
11331135

11341136
Boolean metadataDeleteAfterCommit = IcebergTableProperties.isMetadataDeleteAfterCommit(tableMetadata.getProperties());
11351137
propertiesBuilder.put(METADATA_DELETE_AFTER_COMMIT_ENABLED, String.valueOf(metadataDeleteAfterCommit));
1138+
1139+
Integer metricsMaxInferredColumn = IcebergTableProperties.getMetricsMaxInferredColumn(tableMetadata.getProperties());
1140+
propertiesBuilder.put(METRICS_MAX_INFERRED_COLUMN_DEFAULTS, String.valueOf(metricsMaxInferredColumn));
11361141
return propertiesBuilder.build();
11371142
}
11381143

@@ -1167,6 +1172,13 @@ public static boolean isMetadataDeleteAfterCommit(Table table)
11671172
String.valueOf(METADATA_DELETE_AFTER_COMMIT_ENABLED_DEFAULT)));
11681173
}
11691174

1175+
public static int getMetricsMaxInferredColumn(Table table)
1176+
{
1177+
return Integer.parseInt(table.properties()
1178+
.getOrDefault(METRICS_MAX_INFERRED_COLUMN_DEFAULTS,
1179+
String.valueOf(METRICS_MAX_INFERRED_COLUMN_DEFAULTS_DEFAULT)));
1180+
}
1181+
11701182
public static Optional<PartitionData> partitionDataFromJson(PartitionSpec spec, Optional<String> partitionDataAsJson)
11711183
{
11721184
org.apache.iceberg.types.Type[] partitionColumnTypes = spec.fields().stream()

presto-iceberg/src/test/java/com/facebook/presto/iceberg/IcebergDistributedSmokeTestBase.java

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,8 @@ public void testShowCreateTable()
142142
" format_version = '2',\n" +
143143
" location = '%s',\n" +
144144
" metadata_delete_after_commit = false,\n" +
145-
" metadata_previous_versions_max = 100\n" +
145+
" metadata_previous_versions_max = 100,\n" +
146+
" metrics_max_inferred_column = 100\n" +
146147
")", getLocation("tpch", "orders")));
147148
}
148149

@@ -422,6 +423,7 @@ private void testCreatePartitionedTableAs(Session session, FileFormat fileFormat
422423
" location = '%s',\n" +
423424
" metadata_delete_after_commit = false,\n" +
424425
" metadata_previous_versions_max = 100,\n" +
426+
" metrics_max_inferred_column = 100,\n" +
425427
" partitioning = ARRAY['order_status','ship_priority','bucket(order_key, 9)']\n" +
426428
")",
427429
getSession().getCatalog().get(),
@@ -623,7 +625,8 @@ public void testTableComments()
623625
" format_version = '2',\n" +
624626
" location = '%s',\n" +
625627
" metadata_delete_after_commit = false,\n" +
626-
" metadata_previous_versions_max = 100\n" +
628+
" metadata_previous_versions_max = 100,\n" +
629+
" metrics_max_inferred_column = 100\n" +
627630
")";
628631
String createTableSql = format(createTableTemplate, "test table comment", getLocation("tpch", "test_table_comments"));
629632

@@ -714,6 +717,7 @@ private void testCreateTableLike()
714717
" location = '%s',\n" +
715718
" metadata_delete_after_commit = false,\n" +
716719
" metadata_previous_versions_max = 100,\n" +
720+
" metrics_max_inferred_column = 100,\n" +
717721
" partitioning = ARRAY['adate']\n" +
718722
")", getLocation("tpch", "test_create_table_like_original")));
719723

@@ -729,7 +733,8 @@ private void testCreateTableLike()
729733
" format_version = '2',\n" +
730734
" location = '%s',\n" +
731735
" metadata_delete_after_commit = false,\n" +
732-
" metadata_previous_versions_max = 100\n" +
736+
" metadata_previous_versions_max = 100,\n" +
737+
" metrics_max_inferred_column = 100\n" +
733738
")", getLocation("tpch", "test_create_table_like_copy1")));
734739
dropTable(session, "test_create_table_like_copy1");
735740

@@ -740,7 +745,8 @@ private void testCreateTableLike()
740745
" format_version = '2',\n" +
741746
" location = '%s',\n" +
742747
" metadata_delete_after_commit = false,\n" +
743-
" metadata_previous_versions_max = 100\n" +
748+
" metadata_previous_versions_max = 100,\n" +
749+
" metrics_max_inferred_column = 100\n" +
744750
")", getLocation("tpch", "test_create_table_like_copy2")));
745751
dropTable(session, "test_create_table_like_copy2");
746752

@@ -752,6 +758,7 @@ private void testCreateTableLike()
752758
" location = '%s',\n" +
753759
" metadata_delete_after_commit = false,\n" +
754760
" metadata_previous_versions_max = 100,\n" +
761+
" metrics_max_inferred_column = 100,\n" +
755762
" partitioning = ARRAY['adate']\n" +
756763
")", catalogType.equals(CatalogType.HIVE) ?
757764
getLocation("tpch", "test_create_table_like_original") :
@@ -766,6 +773,7 @@ private void testCreateTableLike()
766773
" location = '%s',\n" +
767774
" metadata_delete_after_commit = false,\n" +
768775
" metadata_previous_versions_max = 100,\n" +
776+
" metrics_max_inferred_column = 100,\n" +
769777
" partitioning = ARRAY['adate']\n" +
770778
")", catalogType.equals(CatalogType.HIVE) ?
771779
getLocation("tpch", "test_create_table_like_original") :
@@ -809,7 +817,8 @@ protected void testCreateTableWithFormatVersion(String formatVersion, String def
809817
" format_version = '%s',\n" +
810818
" location = '%s',\n" +
811819
" metadata_delete_after_commit = false,\n" +
812-
" metadata_previous_versions_max = 100\n" +
820+
" metadata_previous_versions_max = 100,\n" +
821+
" metrics_max_inferred_column = 100\n" +
813822
")",
814823
getSession().getCatalog().get(),
815824
getSession().getSchema().get(),

presto-iceberg/src/test/java/com/facebook/presto/iceberg/IcebergDistributedTestBase.java

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@
4545
import com.facebook.presto.testing.QueryRunner;
4646
import com.facebook.presto.tests.AbstractTestQueryFramework;
4747
import com.google.common.base.Joiner;
48+
import com.google.common.base.Strings;
4849
import com.google.common.collect.ImmutableList;
4950
import com.google.common.collect.ImmutableMap;
5051
import com.google.common.collect.ImmutableSet;
@@ -638,17 +639,24 @@ public void testPartitionedByVarbinaryType()
638639
assertQuerySucceeds("drop table test_partition_columns_varbinary");
639640
}
640641

641-
@Test
642-
public void testReadWriteStatsWithColumnLimits()
642+
@DataProvider(name = "columnCount")
643+
public Object[][] getColumnCount()
644+
{
645+
return new Object[][] {{2}, {16}, {100}};
646+
}
647+
648+
@Test(dataProvider = "columnCount")
649+
public void testReadWriteStatsWithColumnLimits(int columnCount)
643650
{
644-
// The columns number of an Iceberg table for which metrics are collected is 100 by default
645-
int columnCount = 100;
646651
try {
647652
String columns = Joiner.on(", ")
648653
.join(IntStream.iterate(2, i -> i + 1).limit(columnCount - 2)
649654
.mapToObj(idx -> "column_" + idx + " int")
650655
.iterator());
651-
assertUpdate("CREATE TABLE test_stats_with_column_limits (column_0 int, column_1 varchar, " + columns + ", column_10001 varchar)");
656+
String comma = Strings.isNullOrEmpty(columns.trim()) ? "" : ", ";
657+
658+
// The columns number of `test_stats_with_column_limits` for which metrics are collected is set to `columnCount`
659+
assertUpdate("CREATE TABLE test_stats_with_column_limits (column_0 int, column_1 varchar, " + columns + comma + "column_10001 varchar) with(metrics_max_inferred_column = " + columnCount + ")");
652660
assertTrue(getQueryRunner().tableExists(getSession(), "test_stats_with_column_limits"));
653661
List<String> columnNames = IntStream.iterate(0, i -> i + 1).limit(columnCount)
654662
.mapToObj(idx -> "column_" + idx).collect(Collectors.toList());
@@ -675,7 +683,10 @@ public void testReadWriteStatsWithColumnLimits()
675683
.mapToObj(idx -> "300" + idx)
676684
.iterator());
677685
// test after simple insert we get a good estimate
678-
assertUpdate("INSERT INTO test_stats_with_column_limits VALUES (1, '1001', " + values1 + ", 'abc'), (2, '2001', " + values2 + ", 'xyz'), (3, '3001', " + values3 + ", 'lmnopqrst')", 3);
686+
assertUpdate("INSERT INTO test_stats_with_column_limits VALUES " +
687+
"(1, '1001', " + values1 + comma + "'abc'), " +
688+
"(2, '2001', " + values2 + comma + "'xyz'), " +
689+
"(3, '3001', " + values3 + comma + "'lmnopqrst')", 3);
679690
getQueryRunner().execute("ANALYZE test_stats_with_column_limits");
680691
stats = getTableStats("test_stats_with_column_limits");
681692
columnStats = remapper.apply(stats.getColumnStatistics());

presto-iceberg/src/test/java/com/facebook/presto/iceberg/TestIcebergConfig.java

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
import static org.apache.iceberg.CatalogProperties.IO_MANIFEST_CACHE_MAX_TOTAL_BYTES_DEFAULT;
3636
import static org.apache.iceberg.TableProperties.METADATA_DELETE_AFTER_COMMIT_ENABLED_DEFAULT;
3737
import static org.apache.iceberg.TableProperties.METADATA_PREVIOUS_VERSIONS_MAX_DEFAULT;
38+
import static org.apache.iceberg.TableProperties.METRICS_MAX_INFERRED_COLUMN_DEFAULTS_DEFAULT;
3839

3940
public class TestIcebergConfig
4041
{
@@ -64,7 +65,8 @@ public void testDefaults()
6465
.setManifestCacheMaxContentLength(IO_MANIFEST_CACHE_MAX_CONTENT_LENGTH_DEFAULT)
6566
.setSplitManagerThreads(Runtime.getRuntime().availableProcessors())
6667
.setMetadataPreviousVersionsMax(METADATA_PREVIOUS_VERSIONS_MAX_DEFAULT)
67-
.setMetadataDeleteAfterCommit(METADATA_DELETE_AFTER_COMMIT_ENABLED_DEFAULT));
68+
.setMetadataDeleteAfterCommit(METADATA_DELETE_AFTER_COMMIT_ENABLED_DEFAULT)
69+
.setMetricsMaxInferredColumn(METRICS_MAX_INFERRED_COLUMN_DEFAULTS_DEFAULT));
6870
}
6971

7072
@Test
@@ -94,6 +96,7 @@ public void testExplicitPropertyMappings()
9496
.put("iceberg.split-manager-threads", "42")
9597
.put("iceberg.metadata-previous-versions-max", "1")
9698
.put("iceberg.metadata-delete-after-commit", "true")
99+
.put("iceberg.metrics-max-inferred-column", "16")
97100
.build();
98101

99102
IcebergConfig expected = new IcebergConfig()
@@ -119,7 +122,8 @@ public void testExplicitPropertyMappings()
119122
.setManifestCacheMaxContentLength(10485760)
120123
.setSplitManagerThreads(42)
121124
.setMetadataPreviousVersionsMax(1)
122-
.setMetadataDeleteAfterCommit(true);
125+
.setMetadataDeleteAfterCommit(true)
126+
.setMetricsMaxInferredColumn(16);
123127

124128
assertFullMapping(properties, expected);
125129
}

0 commit comments

Comments
 (0)