Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Iterables;
import org.apache.calcite.adapter.druid.DruidQuery;
import org.apache.calcite.linq4j.Ord;
import org.apache.calcite.plan.RelOptTable;
import org.apache.calcite.plan.RelOptUtil;
import org.apache.calcite.rel.RelCollation;
Expand Down Expand Up @@ -102,8 +101,7 @@ public class HiveRelFieldTrimmer extends RelFieldTrimmer {

private static final ThreadLocal<ColumnAccessInfo> COLUMN_ACCESS_INFO =
new ThreadLocal<>();
private static final ThreadLocal<Map<HiveProject, Table>> VIEW_PROJECT_TO_TABLE_SCHEMA =
new ThreadLocal<>();
private static final ThreadLocal<Map<RelNode, Table>> VIEW_RELNODE_TO_TABLE = new ThreadLocal<>();


protected HiveRelFieldTrimmer(boolean fetchStats) {
Expand Down Expand Up @@ -155,17 +153,18 @@ public RelNode trim(RelBuilder relBuilder, RelNode root) {
}

public RelNode trim(RelBuilder relBuilder, RelNode root,
ColumnAccessInfo columnAccessInfo, Map<HiveProject, Table> viewToTableSchema) {
ColumnAccessInfo columnAccessInfo,
Map<RelNode, Table> relNodeToTable) {
try {
// Set local thread variables
COLUMN_ACCESS_INFO.set(columnAccessInfo);
VIEW_PROJECT_TO_TABLE_SCHEMA.set(viewToTableSchema);
VIEW_RELNODE_TO_TABLE.set(relNodeToTable);
// Execute pruning
return super.trim(relBuilder, root);
} finally {
// Always remove the local thread variables to avoid leaks
COLUMN_ACCESS_INFO.remove();
VIEW_PROJECT_TO_TABLE_SCHEMA.remove();
VIEW_RELNODE_TO_TABLE.remove();
}
}

Expand Down Expand Up @@ -203,6 +202,25 @@ protected RexNode handle(RexFieldAccess fieldAccess) {
return dispatchTrimFields(input, fieldsUsedBuilder.build(), extraFields);
}

@Override
protected void setColumnAccessInfoForViews(RelNode rel, ImmutableBitSet fieldsUsed) {
final ColumnAccessInfo columnAccessInfo = COLUMN_ACCESS_INFO.get();
final Map<RelNode, Table> relNodeToTableAndProjects = VIEW_RELNODE_TO_TABLE.get();

// HiveTableScans are handled separately in HiveTableScan's trimFields method.
if (!(rel instanceof HiveTableScan) &&
columnAccessInfo != null &&
relNodeToTableAndProjects != null &&
relNodeToTableAndProjects.containsKey(rel)) {
Table table = relNodeToTableAndProjects.get(rel);
List<FieldSchema> tableAllCols = table.getAllCols();

for (int i = fieldsUsed.nextSetBit(0); i >= 0; i = fieldsUsed.nextSetBit(i + 1)) {
columnAccessInfo.add(table.getCompleteName(), tableAllCols.get(i).getName());
}
}
}

/**
* Variant of {@link #trimFields(RelNode, ImmutableBitSet, Set)} for
* {@link org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveMultiJoin}.
Expand Down Expand Up @@ -726,27 +744,6 @@ public TrimResult trimFields(Aggregate aggregate, ImmutableBitSet fieldsUsed, Se
return result(relBuilder.build(), mapping);
}

/**
* Variant of {@link #trimFields(RelNode, ImmutableBitSet, Set)} for
* {@link org.apache.calcite.rel.logical.LogicalProject}.
*/
public TrimResult trimFields(Project project, ImmutableBitSet fieldsUsed,
Set<RelDataTypeField> extraFields) {
// set columnAccessInfo for ViewColumnAuthorization
final ColumnAccessInfo columnAccessInfo = COLUMN_ACCESS_INFO.get();
final Map<HiveProject, Table> viewProjectToTableSchema = VIEW_PROJECT_TO_TABLE_SCHEMA.get();
if (columnAccessInfo != null && viewProjectToTableSchema != null
&& viewProjectToTableSchema.containsKey(project)) {
for (Ord<RexNode> ord : Ord.zip(project.getProjects())) {
if (fieldsUsed.get(ord.i)) {
Table tab = viewProjectToTableSchema.get(project);
columnAccessInfo.add(tab.getCompleteName(), tab.getAllCols().get(ord.i).getName());
}
}
}
return super.trimFields(project, fieldsUsed, extraFields);
}

public TrimResult trimFields(HiveTableScan tableAccessRel, ImmutableBitSet fieldsUsed,
Set<RelDataTypeField> extraFields) {
final TrimResult result = super.trimFields(tableAccessRel, fieldsUsed, extraFields);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -282,6 +282,7 @@ protected final TrimResult dispatchTrimFields(
RelNode rel,
ImmutableBitSet fieldsUsed,
Set<RelDataTypeField> extraFields) {
setColumnAccessInfoForViews(rel, fieldsUsed);
final TrimResult trimResult =
trimFieldsDispatcher.invoke(rel, fieldsUsed, extraFields);
final RelNode newRel = trimResult.left;
Expand Down Expand Up @@ -1239,6 +1240,17 @@ public TrimResult trimFields(
return result(newTableAccessRel, mapping);
}

/**
* Sets columnAccessInfo object for views.
*
* @param rel RelNode
* @param fieldsUsed Fields used
*/
protected void setColumnAccessInfoForViews(RelNode rel, ImmutableBitSet fieldsUsed) {
// This method is overridden in child class
throw new UnsupportedOperationException();
}

//~ Inner Classes ----------------------------------------------------------

/**
Expand Down
22 changes: 11 additions & 11 deletions ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
Original file line number Diff line number Diff line change
Expand Up @@ -1577,7 +1577,7 @@ public class CalcitePlannerAction implements Frameworks.PlannerAction<RelNode> {
private final Map<String, PrunedPartitionList> partitionCache;
private final Map<String, ColumnStatsList> colStatsCache;
private final ColumnAccessInfo columnAccessInfo;
private Map<HiveProject, Table> viewProjectToTableSchema;
private Map<RelNode, Table> relNodeToTable;
private final QB rootQB;

// correlated vars across subqueries within same query needs to have different ID
Expand Down Expand Up @@ -1662,8 +1662,12 @@ public RelNode apply(RelOptCluster cluster, RelOptSchema relOptSchema, SchemaPlu
// We need to get the ColumnAccessInfo and viewToTableSchema for views.
if (conf.getBoolVar(ConfVars.HIVE_STATS_COLLECT_SCANCOLS) || !skipAuthorization()) {
HiveRelFieldTrimmer.get()
.trim(HiveRelFactories.HIVE_BUILDER.create(optCluster, null), calcitePlan, this.columnAccessInfo,
this.viewProjectToTableSchema);
.trim(
HiveRelFactories.HIVE_BUILDER.create(optCluster, null),
calcitePlan,
this.columnAccessInfo,
this.relNodeToTable
);
}
perfLogger.perfLogEnd(this.getClass().getName(), PerfLogger.MV_REWRITE_FIELD_TRIMMER);

Expand Down Expand Up @@ -4918,15 +4922,10 @@ private RelNode genLogicalPlan(QB qb, boolean outerMostQB,

aliasToRel.put(subqAlias, relNode);
if (qb.getViewToTabSchema().containsKey(subqAlias)) {
if (relNode instanceof HiveProject) {
if (this.viewProjectToTableSchema == null) {
this.viewProjectToTableSchema = new LinkedHashMap<>();
}
viewProjectToTableSchema.put((HiveProject) relNode, qb.getViewToTabSchema().get(subqAlias));
} else {
throw new SemanticException("View " + subqAlias + " is corresponding to "
+ relNode.toString() + ", rather than a HiveProject.");
if (this.relNodeToTable == null) {
this.relNodeToTable = new HashMap<>();
}
relNodeToTable.put(relNode, qb.getViewToTabSchema().get(subqAlias));
}
}

Expand Down Expand Up @@ -5048,6 +5047,7 @@ private RelNode genLogicalPlan(QB qb, boolean outerMostQB,
return srcRel;
}


private RelNode genGBHavingLogicalPlan(QB qb, RelNode srcRel) throws SemanticException {
RelNode gbFilter = null;
QBParseInfo qbp = getQBParseInfo(qb);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
set hive.security.authorization.enabled=true;
create table t1 (username string, id int);

create view vw_t1 as select distinct username from t1 limit 5;
explain cbo select * from vw_t1;
select * from vw_t1;

create view vw_t2 as
select username from (select username, id from t1 where id > 10 limit 1) x where username > 'a' order by id;
explain cbo select * from vw_t2;
select * from vw_t2;

create view vw_t3 as
select username from (select username, id from t1 where id > 10 limit 10) x where username > 'a' limit 5;
explain cbo select * from vw_t3;
select * from vw_t3;
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
PREHOOK: query: create table t1 (username string, id int)
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
PREHOOK: Output: default@t1
POSTHOOK: query: create table t1 (username string, id int)
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@t1
PREHOOK: query: create view vw_t1 as select distinct username from t1 limit 5
PREHOOK: type: CREATEVIEW
PREHOOK: Input: default@t1
PREHOOK: Output: database:default
PREHOOK: Output: default@vw_t1
POSTHOOK: query: create view vw_t1 as select distinct username from t1 limit 5
POSTHOOK: type: CREATEVIEW
POSTHOOK: Input: default@t1
POSTHOOK: Output: database:default
POSTHOOK: Output: default@vw_t1
POSTHOOK: Lineage: vw_t1.username SIMPLE [(t1)t1.FieldSchema(name:username, type:string, comment:null), ]
PREHOOK: query: explain cbo select * from vw_t1
PREHOOK: type: QUERY
PREHOOK: Input: default@t1
PREHOOK: Input: default@vw_t1
#### A masked pattern was here ####
POSTHOOK: query: explain cbo select * from vw_t1
POSTHOOK: type: QUERY
POSTHOOK: Input: default@t1
POSTHOOK: Input: default@vw_t1
#### A masked pattern was here ####
CBO PLAN:
HiveSortLimit(fetch=[5])
HiveProject(username=[$0])
HiveAggregate(group=[{0}])
HiveTableScan(table=[[default, t1]], table:alias=[t1])

PREHOOK: query: select * from vw_t1
PREHOOK: type: QUERY
PREHOOK: Input: default@t1
PREHOOK: Input: default@vw_t1
#### A masked pattern was here ####
POSTHOOK: query: select * from vw_t1
POSTHOOK: type: QUERY
POSTHOOK: Input: default@t1
POSTHOOK: Input: default@vw_t1
#### A masked pattern was here ####
PREHOOK: query: create view vw_t2 as
select username from (select username, id from t1 where id > 10 limit 1) x where username > 'a' order by id
PREHOOK: type: CREATEVIEW
PREHOOK: Input: default@t1
PREHOOK: Output: database:default
PREHOOK: Output: default@vw_t2
POSTHOOK: query: create view vw_t2 as
select username from (select username, id from t1 where id > 10 limit 1) x where username > 'a' order by id
POSTHOOK: type: CREATEVIEW
POSTHOOK: Input: default@t1
POSTHOOK: Output: database:default
POSTHOOK: Output: default@vw_t2
POSTHOOK: Lineage: vw_t2.username SIMPLE [(t1)t1.FieldSchema(name:username, type:string, comment:null), ]
PREHOOK: query: explain cbo select * from vw_t2
PREHOOK: type: QUERY
PREHOOK: Input: default@t1
PREHOOK: Input: default@vw_t2
#### A masked pattern was here ####
POSTHOOK: query: explain cbo select * from vw_t2
POSTHOOK: type: QUERY
POSTHOOK: Input: default@t1
POSTHOOK: Input: default@vw_t2
#### A masked pattern was here ####
CBO PLAN:
HiveFilter(condition=[>($0, _UTF-16LE'a')])
HiveProject(username=[$0])
HiveSortLimit(fetch=[1])
HiveProject(username=[$0])
HiveFilter(condition=[>($1, 10)])
HiveTableScan(table=[[default, t1]], table:alias=[t1])

PREHOOK: query: select * from vw_t2
PREHOOK: type: QUERY
PREHOOK: Input: default@t1
PREHOOK: Input: default@vw_t2
#### A masked pattern was here ####
POSTHOOK: query: select * from vw_t2
POSTHOOK: type: QUERY
POSTHOOK: Input: default@t1
POSTHOOK: Input: default@vw_t2
#### A masked pattern was here ####
PREHOOK: query: create view vw_t3 as
select username from (select username, id from t1 where id > 10 limit 10) x where username > 'a' limit 5
PREHOOK: type: CREATEVIEW
PREHOOK: Input: default@t1
PREHOOK: Output: database:default
PREHOOK: Output: default@vw_t3
POSTHOOK: query: create view vw_t3 as
select username from (select username, id from t1 where id > 10 limit 10) x where username > 'a' limit 5
POSTHOOK: type: CREATEVIEW
POSTHOOK: Input: default@t1
POSTHOOK: Output: database:default
POSTHOOK: Output: default@vw_t3
POSTHOOK: Lineage: vw_t3.username SIMPLE [(t1)t1.FieldSchema(name:username, type:string, comment:null), ]
PREHOOK: query: explain cbo select * from vw_t3
PREHOOK: type: QUERY
PREHOOK: Input: default@t1
PREHOOK: Input: default@vw_t3
#### A masked pattern was here ####
POSTHOOK: query: explain cbo select * from vw_t3
POSTHOOK: type: QUERY
POSTHOOK: Input: default@t1
POSTHOOK: Input: default@vw_t3
#### A masked pattern was here ####
CBO PLAN:
HiveSortLimit(fetch=[5])
HiveProject(username=[$0])
HiveFilter(condition=[>($0, _UTF-16LE'a')])
HiveProject(username=[$0])
HiveSortLimit(fetch=[10])
HiveProject(username=[$0])
HiveFilter(condition=[>($1, 10)])
HiveTableScan(table=[[default, t1]], table:alias=[t1])

PREHOOK: query: select * from vw_t3
PREHOOK: type: QUERY
PREHOOK: Input: default@t1
PREHOOK: Input: default@vw_t3
#### A masked pattern was here ####
POSTHOOK: query: select * from vw_t3
POSTHOOK: type: QUERY
POSTHOOK: Input: default@t1
POSTHOOK: Input: default@vw_t3
#### A masked pattern was here ####