Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
45 commits
Select commit Hold shift + click to select a range
2234128
HIVE-29413: Avoid code duplication by updating getPartCols method for…
ramitg254 Apr 8, 2026
81b7315
commit-2
ramitg254 Apr 9, 2026
06e220b
corrected bucket-map-join test
ramitg254 Apr 9, 2026
cec2d60
corrected update statements
ramitg254 Apr 10, 2026
36a24a6
corrected load, partition evolution tests
ramitg254 Apr 11, 2026
4a9da76
refractored
ramitg254 Apr 11, 2026
1083bf6
addressed sonar issues
ramitg254 Apr 12, 2026
202a28b
updated table api and usage
ramitg254 Apr 26, 2026
959b109
introduced index optimization
ramitg254 May 24, 2026
0227b5f
corrected implementation
ramitg254 May 24, 2026
f07adf7
updated describe implementation and outputs
ramitg254 May 24, 2026
d705641
updated api and test
ramitg254 May 25, 2026
ef7875c
updated update implementation
ramitg254 May 25, 2026
52cc1ee
updated partition pruning and query rewriting
ramitg254 May 25, 2026
25f39ae
changes related to metatable
ramitg254 May 25, 2026
58b2347
corrected alter and semantic analyzer implementation
ramitg254 May 26, 2026
ef70751
updated merge implementation and test output
ramitg254 May 26, 2026
0c2647d
updated ctas create and tests output
ramitg254 May 26, 2026
ad81dba
updated stats autogather and test output
ramitg254 May 26, 2026
c44c9ab
updated getPartitionKeys
ramitg254 May 27, 2026
dd39015
removed getStorageSchemaCols part-1
ramitg254 May 30, 2026
8377450
removed getStorageSchemaCols part-2
ramitg254 May 31, 2026
5c5a419
removed getStorageSchemaCols part-3
ramitg254 May 31, 2026
5efcba2
removed workaround
ramitg254 May 31, 2026
72fca70
addressed sonar issues
ramitg254 Jun 1, 2026
baa9a74
non part cols retrieval made lazy
ramitg254 Jun 1, 2026
a7ae8f1
reviewed required changes
ramitg254 Jun 9, 2026
7cd3d00
corrected partition.getCols for iceberg table
ramitg254 Jun 9, 2026
9d3231a
added wrapper for lineage
ramitg254 Jun 9, 2026
11e3261
reverted getPartitionKeys
ramitg254 Jun 9, 2026
517577a
refractor-1
ramitg254 Jun 10, 2026
24b251e
refractor-2
ramitg254 Jun 10, 2026
02302ed
correction for rebased iceberg view commit recently merged to master
ramitg254 Jun 11, 2026
a16de04
removed isTableTypeSet and merged partition column comments
ramitg254 Jun 11, 2026
d151eea
updated conflicts for rebase
ramitg254 Jun 11, 2026
2239677
reverted to user comment override and refractored
ramitg254 Jun 11, 2026
a3bdb12
moved helpers to MetaStoreUtils
ramitg254 Jun 12, 2026
4788072
moved to HiveTableUtil
ramitg254 Jun 18, 2026
93352d7
updated method name
ramitg254 Jun 19, 2026
bcc3a8e
refractor
ramitg254 Jun 22, 2026
f31785e
refractor-2
ramitg254 Jun 22, 2026
9666c28
refractored Table metadata and merge rewriter
ramitg254 Jun 25, 2026
1730301
refractor
ramitg254 Jun 29, 2026
7bf42ec
refractor
ramitg254 Jun 29, 2026
115c839
refractor
ramitg254 Jul 1, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.metastore.HiveMetaHook;
import org.apache.hadoop.hive.metastore.TableType;
import org.apache.hadoop.hive.metastore.api.CreateTableRequest;
import org.apache.hadoop.hive.metastore.api.EnvironmentContext;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
Expand Down Expand Up @@ -117,16 +116,6 @@ public BaseHiveIcebergMetaHook(Configuration conf) {
this.conf = conf;
}

public static boolean isIcebergView(org.apache.hadoop.hive.metastore.api.Table hmsTable) {
if (hmsTable == null ||
hmsTable.getParameters() == null ||
!TableType.VIRTUAL_VIEW.toString().equals(hmsTable.getTableType())) {
return false;
}
String storageHandler = hmsTable.getParameters().get(hive_metastoreConstants.META_TABLE_STORAGE);
return HiveMetaHook.HIVE_ICEBERG_STORAGE_HANDLER.equals(storageHandler);
}

@Override
public void preCreateTable(org.apache.hadoop.hive.metastore.api.Table hmsTable) {
CreateTableRequest request = new CreateTableRequest(hmsTable);
Expand All @@ -139,7 +128,7 @@ public void preCreateTable(CreateTableRequest request) {
if (hmsTable.isTemporary()) {
throw new UnsupportedOperationException("Creation of temporary iceberg tables is not supported.");
}
if (isIcebergView(hmsTable)) {
if (HiveTableUtil.isIcebergView(hmsTable)) {
preCreateIcebergView(request);
return;
}
Expand Down Expand Up @@ -522,7 +511,7 @@ protected void setWriteModeDefaults(Table icebergTbl, Map<String, String> newPro
public void postGetTable(org.apache.hadoop.hive.metastore.api.Table hmsTable) {
if (hmsTable != null) {
try {
if (isIcebergView(hmsTable)) {
if (HiveTableUtil.isIcebergView(hmsTable)) {
IcebergViewSupport.enrichHmsTableFromIcebergView(hmsTable, conf);
return;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ public HiveIcebergMetaHook(Configuration conf) {

@Override
public void commitCreateTable(org.apache.hadoop.hive.metastore.api.Table hmsTable) {
if (isIcebergView(hmsTable)) {
if (HiveTableUtil.isIcebergView(hmsTable)) {
tableProperties = IcebergTableProperties.getTableProperties(hmsTable, conf);
Map<String, String> tblProps =
hmsTable.getParameters() == null ? Maps.newHashMap() : Maps.newHashMap(hmsTable.getParameters());
Expand Down Expand Up @@ -266,7 +266,7 @@ public void commitDropTable(org.apache.hadoop.hive.metastore.api.Table hmsTable,
@Override
public void preAlterTable(org.apache.hadoop.hive.metastore.api.Table hmsTable, EnvironmentContext context)
throws MetaException {
if (BaseHiveIcebergMetaHook.isIcebergView(hmsTable)) {
if (HiveTableUtil.isIcebergView(hmsTable)) {
currentAlterTableOp = null;
if (commitLock == null) {
commitLock = new NoLock();
Expand Down Expand Up @@ -494,7 +494,7 @@ public void commitAlterTable(org.apache.hadoop.hive.metastore.api.Table hmsTable
if (commitLock == null) {
throw new IllegalStateException("Hive commit lock should already be set");
}
if (BaseHiveIcebergMetaHook.isIcebergView(hmsTable)) {
if (HiveTableUtil.isIcebergView(hmsTable)) {
tableProperties = IcebergTableProperties.getTableProperties(hmsTable, conf);
Map<String, String> tblProps =
hmsTable.getParameters() == null ? Maps.newHashMap() : Maps.newHashMap(hmsTable.getParameters());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -433,7 +433,7 @@ public boolean canProvidePartitionStatistics(org.apache.hadoop.hive.ql.metadata.
if (!getStatsSource().equals(HiveMetaHook.ICEBERG)) {
return false;
}
if (BaseHiveIcebergMetaHook.isIcebergView(hmsTable.getTTable())) {
if (HiveTableUtil.isIcebergView(hmsTable.getTTable())) {
return false;
}
Table table = IcebergTableUtil.getTable(conf, hmsTable.getTTable());
Expand Down Expand Up @@ -899,7 +899,7 @@ public boolean supportsPartitionTransform() {

@Override
public List<TransformSpec> getPartitionTransformSpec(org.apache.hadoop.hive.ql.metadata.Table hmsTable) {
if (BaseHiveIcebergMetaHook.isIcebergView(hmsTable.getTTable())) {
if (HiveTableUtil.isIcebergView(hmsTable.getTTable())) {
return Collections.emptyList();
}
Table table = IcebergTableUtil.getTable(conf, hmsTable.getTTable());
Expand All @@ -916,7 +916,7 @@ public List<TransformSpec> getPartitionTransformSpec(org.apache.hadoop.hive.ql.m
@Override
public Map<Integer, List<TransformSpec>> getPartitionTransformSpecs(
org.apache.hadoop.hive.ql.metadata.Table hmsTable) {
if (BaseHiveIcebergMetaHook.isIcebergView(hmsTable.getTTable())) {
if (HiveTableUtil.isIcebergView(hmsTable.getTTable())) {
return Collections.emptyMap();
}
Table table = IcebergTableUtil.getTable(conf, hmsTable.getTTable());
Expand Down Expand Up @@ -1550,10 +1550,10 @@ public List<FieldSchema> acidSelectColumns(org.apache.hadoop.hive.ql.metadata.Ta
case DELETE ->
// TODO: make it configurable whether we want to include the table columns in the select query.
// It might make delete writes faster if we don't have to write out the row object
ListUtils.union(ACID_VIRTUAL_COLS_AS_FIELD_SCHEMA, table.getCols());
ListUtils.union(ACID_VIRTUAL_COLS_AS_FIELD_SCHEMA, table.getAllCols());
Comment thread
deniskuzZ marked this conversation as resolved.
case UPDATE -> shouldOverwrite(table, operation) ?
ACID_VIRTUAL_COLS_AS_FIELD_SCHEMA :
ListUtils.union(ACID_VIRTUAL_COLS_AS_FIELD_SCHEMA, table.getCols());
ListUtils.union(ACID_VIRTUAL_COLS_AS_FIELD_SCHEMA, table.getAllCols());
case MERGE -> ACID_VIRTUAL_COLS_AS_FIELD_SCHEMA;
default -> ImmutableList.of();
};
Expand Down Expand Up @@ -1584,7 +1584,7 @@ public boolean supportsSortColumns() {

@Override
public List<FieldSchema> sortColumns(org.apache.hadoop.hive.ql.metadata.Table hmsTable) {
if (BaseHiveIcebergMetaHook.isIcebergView(hmsTable.getTTable())) {
if (HiveTableUtil.isIcebergView(hmsTable.getTTable())) {
return Collections.emptyList();
}
TableDesc tableDesc = Utilities.getTableDesc(hmsTable);
Expand Down Expand Up @@ -1989,8 +1989,9 @@ public void setTableParametersForCTLT(org.apache.hadoop.hive.ql.metadata.Table t
desc.setIsExternal(true);
}

// If source is Iceberg table set the schema and the partition spec
if (MetaStoreUtils.isIcebergTable(origParams)) {
// parameter table_type is set to "ICEBERG" in case of Iceberg tables
// set the schema and the partition spec accordingly
if (HiveTableUtil.isTableTypeSet(origParams)) {
tbl.getParameters()
.put(InputFormatConfig.TABLE_SCHEMA, origParams.get(InputFormatConfig.TABLE_SCHEMA));
tbl.getParameters()
Expand Down Expand Up @@ -2147,13 +2148,12 @@ public List<Partition> getPartitions(org.apache.hadoop.hive.ql.metadata.Table hm
}

public boolean isPartitioned(org.apache.hadoop.hive.ql.metadata.Table hmsTable) {
if (BaseHiveIcebergMetaHook.isIcebergView(hmsTable.getTTable())) {
List<FieldSchema> partCols = hmsTable.getPartCols();
return partCols != null && !partCols.isEmpty();
}
if (!hmsTable.getTTable().isSetId()) {
if (hmsTable.getMetaTable() != null) {
return false;
}
if (!HiveTableUtil.isRegistered(hmsTable) || HiveTableUtil.isIcebergView(hmsTable.getTTable())) {
return !hmsTable.getPartitionKeys().isEmpty();
}
Table table = IcebergTableUtil.getTable(conf, hmsTable.getTTable());
Snapshot snapshot = IcebergTableUtil.getTableSnapshot(table, hmsTable);

Expand Down Expand Up @@ -2296,13 +2296,12 @@ public boolean canPerformMetadataDelete(org.apache.hadoop.hive.ql.metadata.Table

@Override
public List<FieldSchema> getPartitionKeys(org.apache.hadoop.hive.ql.metadata.Table hmsTable) {
if (BaseHiveIcebergMetaHook.isIcebergView(hmsTable.getTTable())) {
List<FieldSchema> partCols = hmsTable.getPartCols();
return partCols != null ? partCols : Collections.emptyList();
}
if (!hmsTable.getTTable().isSetId()) {
if (hmsTable.getMetaTable() != null) {
return Collections.emptyList();
}
if (!HiveTableUtil.isRegistered(hmsTable) || HiveTableUtil.isIcebergView(hmsTable.getTTable())) {
return hmsTable.getPartitionKeys();
Comment thread
deniskuzZ marked this conversation as resolved.
}
Table icebergTable = IcebergTableUtil.getTable(conf, hmsTable.getTTable());
return MetastoreUtil.getPartitionKeys(icebergTable, icebergTable.spec().specId());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RemoteIterator;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.HiveMetaHook;
import org.apache.hadoop.hive.metastore.TableType;
import org.apache.hadoop.hive.metastore.Warehouse;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.metastore.api.MetaException;
Expand Down Expand Up @@ -380,4 +382,23 @@ public static boolean isCtas(Properties properties) {
return Boolean.parseBoolean(properties.getProperty(hive_metastoreConstants.TABLE_IS_CTAS));
}

public static boolean isTableTypeSet(Map<String, String> params) {
Comment thread
deniskuzZ marked this conversation as resolved.
return params != null &&
HiveMetaHook.ICEBERG.equalsIgnoreCase(params.get(HiveMetaHook.TABLE_TYPE));
}

public static boolean isIcebergView(org.apache.hadoop.hive.metastore.api.Table hmsTable) {
if (hmsTable == null ||
hmsTable.getParameters() == null ||
!TableType.VIRTUAL_VIEW.toString().equals(hmsTable.getTableType())) {
return false;
}
String storageHandler = hmsTable.getParameters().get(hive_metastoreConstants.META_TABLE_STORAGE);
return HiveMetaHook.HIVE_ICEBERG_STORAGE_HANDLER.equals(storageHandler);
}

public static boolean isRegistered(org.apache.hadoop.hive.ql.metadata.Table hmsTable) {
return hmsTable.getTTable().isSetId() && isTableTypeSet(hmsTable.getParameters());
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -946,7 +946,8 @@ public void testCreateTableWithoutColumnComments() {
@Test
public void testCreatePartitionedTableWithColumnComments() {
TableIdentifier identifier = TableIdentifier.of("default", "partitioned_with_comment_table");
String[] expectedDoc = new String[] {"int column", "string column", null, "partition column", null};
String[] expectedDoc = new String[] {"int column", "string column", null, "partition column",
"Transform: identity"};
shell.executeStatement("CREATE EXTERNAL TABLE partitioned_with_comment_table (" +
"t_int INT COMMENT 'int column', " +
"t_string STRING COMMENT 'string column', " +
Expand All @@ -959,13 +960,18 @@ public void testCreatePartitionedTableWithColumnComments() {

List<Object[]> rows = shell.executeStatement("DESCRIBE default.partitioned_with_comment_table");
List<Types.NestedField> columns = icebergTable.schema().columns();
List<String> partitionColumns = List.of("t_string_3", "t_string_4");
// The partition transform information and partition information is 6 extra lines, and 4 more line for the columns
Assert.assertEquals(columns.size() + 10, rows.size());
for (int i = 0; i < columns.size(); i++) {
Types.NestedField field = columns.get(i);
Assert.assertArrayEquals(new Object[] {field.name(), HiveSchemaUtil.convert(field.type()).getTypeName(),
field.doc() != null ? field.doc() : ""}, rows.get(i));
Assert.assertEquals(expectedDoc[i], field.doc());
String fieldDoc = field.doc();
if (fieldDoc == null && partitionColumns.contains(field.name())) {
fieldDoc = "Transform: identity";
}
Assert.assertArrayEquals(new Object[]{field.name(), HiveSchemaUtil.convert(field.type()).getTypeName(),
fieldDoc != null ? fieldDoc : ""}, rows.get(i));
Assert.assertEquals(expectedDoc[i], fieldDoc);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -177,8 +177,6 @@ POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@tbl_orc
# col_name data_type comment
a int
b string
c string

# Partition Information
# col_name data_type comment
Expand Down Expand Up @@ -452,8 +450,6 @@ POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@tbl_parquet
# col_name data_type comment
a int
b string
c string

# Partition Information
# col_name data_type comment
Expand Down Expand Up @@ -727,8 +723,6 @@ POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@tbl_avro
# col_name data_type comment
a int
b string
c string

# Partition Information
# col_name data_type comment
Expand Down Expand Up @@ -1063,9 +1057,6 @@ POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@tbl_orc_mixed
# col_name data_type comment
a int
b double
c int
d string

# Partition Information
# col_name data_type comment
Expand Down Expand Up @@ -1509,9 +1500,6 @@ POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@tbl_parquet_mixed
# col_name data_type comment
a int
b double
c int
d string

# Partition Information
# col_name data_type comment
Expand Down Expand Up @@ -1955,9 +1943,6 @@ POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@tbl_avro_mixed
# col_name data_type comment
a int
b double
c int
d string

# Partition Information
# col_name data_type comment
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,6 @@ POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@tbl_orc
# col_name data_type comment
a int
b string

# Partition Information
# col_name data_type comment
Expand Down Expand Up @@ -414,7 +413,6 @@ POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@tbl_parquet
# col_name data_type comment
a int
b string

# Partition Information
# col_name data_type comment
Expand Down Expand Up @@ -768,7 +766,6 @@ POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@tbl_parquet_int
# col_name data_type comment
a int
b int

# Partition Information
# col_name data_type comment
Expand Down Expand Up @@ -1122,7 +1119,6 @@ POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@tbl_parquet_double
# col_name data_type comment
a int
b double

# Partition Information
# col_name data_type comment
Expand Down Expand Up @@ -1422,7 +1418,6 @@ POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@tbl_avro
# col_name data_type comment
a int
b string

# Partition Information
# col_name data_type comment
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,21 +34,21 @@ Stage-0
limit:-1
Stage-1
Map 1 vectorized
File Output Operator [FS_53]
Map Join Operator [MAPJOIN_52] (rows=2 width=530)
BucketMapJoin:true,Conds:SEL_51._col1, _col2=RS_49._col1, _col2(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5"]
File Output Operator [FS_23]
Map Join Operator [MAPJOIN_22] (rows=2 width=530)
BucketMapJoin:true,Conds:SEL_21._col1, _col2=RS_19._col1, _col2(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5"]
<-Map 2 [CUSTOM_EDGE] vectorized
MULTICAST [RS_49]
MULTICAST [RS_19]
PartitionCols:_col2, _col1
Select Operator [SEL_48] (rows=2 width=265)
Select Operator [SEL_18] (rows=2 width=265)
Output:["_col0","_col1","_col2"]
Filter Operator [FIL_47] (rows=2 width=265)
Filter Operator [FIL_17] (rows=2 width=265)
predicate:(id is not null and part is not null)
TableScan [TS_3] (rows=2 width=265)
default@tbl,tbl2,Tbl:COMPLETE,Col:COMPLETE,Output:["foid","part","id"]
<-Select Operator [SEL_51] (rows=2 width=265)
<-Select Operator [SEL_21] (rows=2 width=265)
Output:["_col0","_col1","_col2"]
Filter Operator [FIL_50] (rows=2 width=265)
Filter Operator [FIL_20] (rows=2 width=265)
predicate:(id is not null and part is not null)
TableScan [TS_0] (rows=2 width=265)
default@tbl,tbl,Tbl:COMPLETE,Col:COMPLETE,Grouping Num Buckets:100,Grouping Partition Columns:["id","part"],Output:["foid","part","id"]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -347,7 +347,7 @@ PREHOOK: Input: default@tbl_ice_puffin
POSTHOOK: query: desc formatted tbl_ice_puffin C
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@tbl_ice_puffin
col_name C
col_name c
data_type int
min 52
max 56
Expand All @@ -358,7 +358,7 @@ max_col_len
num_trues
num_falses
bit_vector HL
comment
comment Transform: identity
COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\",\"c\":\"true\"}}
PREHOOK: query: EXPLAIN select count(*) from src_ice t1 join tbl_ice_puffin t2 on (t1.a = t2.a)
PREHOOK: type: QUERY
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -273,8 +273,6 @@ POSTHOOK: query: describe formatted tbl_ice
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@tbl_ice
# col_name data_type comment
a int
b string
c int

# Partition Information
Expand Down
Loading
Loading