Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Iterables;
import org.apache.calcite.adapter.druid.DruidQuery;
import org.apache.calcite.linq4j.Ord;
import org.apache.calcite.plan.RelOptTable;
import org.apache.calcite.plan.RelOptUtil;
import org.apache.calcite.rel.RelCollation;
Expand Down Expand Up @@ -102,8 +101,7 @@ public class HiveRelFieldTrimmer extends RelFieldTrimmer {

private static final ThreadLocal<ColumnAccessInfo> COLUMN_ACCESS_INFO =
new ThreadLocal<>();
private static final ThreadLocal<Map<HiveProject, Table>> VIEW_PROJECT_TO_TABLE_SCHEMA =
new ThreadLocal<>();
private static final ThreadLocal<Map<RelNode, Table>> REL_TO_TABLE = new ThreadLocal<>();


protected HiveRelFieldTrimmer(boolean fetchStats) {
Expand Down Expand Up @@ -155,17 +153,18 @@ public RelNode trim(RelBuilder relBuilder, RelNode root) {
}

public RelNode trim(RelBuilder relBuilder, RelNode root,
ColumnAccessInfo columnAccessInfo, Map<HiveProject, Table> viewToTableSchema) {
ColumnAccessInfo columnAccessInfo,
Map<RelNode, Table> relNodeToTable) {
try {
// Set local thread variables
COLUMN_ACCESS_INFO.set(columnAccessInfo);
VIEW_PROJECT_TO_TABLE_SCHEMA.set(viewToTableSchema);
REL_TO_TABLE.set(relNodeToTable);
// Execute pruning
return super.trim(relBuilder, root);
} finally {
// Always remove the local thread variables to avoid leaks
COLUMN_ACCESS_INFO.remove();
VIEW_PROJECT_TO_TABLE_SCHEMA.remove();
REL_TO_TABLE.remove();
}
}

Expand Down Expand Up @@ -203,6 +202,30 @@ protected RexNode handle(RexFieldAccess fieldAccess) {
return dispatchTrimFields(input, fieldsUsedBuilder.build(), extraFields);
}

@Override
protected void preTrim(RelNode rel, ImmutableBitSet fieldsUsed) {
setColumnAccessInfoForViews(rel, fieldsUsed);
}

protected void setColumnAccessInfoForViews(RelNode rel, ImmutableBitSet fieldsUsed) {
final ColumnAccessInfo columnAccessInfo = COLUMN_ACCESS_INFO.get();
final Map<RelNode, Table> relToTable = REL_TO_TABLE.get();

// HiveTableScans are handled separately in HiveTableScan's trimFields method.
if (!(rel instanceof HiveTableScan) &&
columnAccessInfo != null &&
relToTable != null &&
relToTable.containsKey(rel)) {
Table table = relToTable.get(rel);
String tableName = table.getCompleteName();
List<FieldSchema> tableAllCols = table.getAllCols();

for (int i : fieldsUsed) {
columnAccessInfo.add(tableName, tableAllCols.get(i).getName());
}
}
}

/**
* Variant of {@link #trimFields(RelNode, ImmutableBitSet, Set)} for
* {@link org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveMultiJoin}.
Expand Down Expand Up @@ -726,27 +749,6 @@ public TrimResult trimFields(Aggregate aggregate, ImmutableBitSet fieldsUsed, Se
return result(relBuilder.build(), mapping);
}

/**
* Variant of {@link #trimFields(RelNode, ImmutableBitSet, Set)} for
* {@link org.apache.calcite.rel.logical.LogicalProject}.
*/
public TrimResult trimFields(Project project, ImmutableBitSet fieldsUsed,
Set<RelDataTypeField> extraFields) {
// set columnAccessInfo for ViewColumnAuthorization
final ColumnAccessInfo columnAccessInfo = COLUMN_ACCESS_INFO.get();
final Map<HiveProject, Table> viewProjectToTableSchema = VIEW_PROJECT_TO_TABLE_SCHEMA.get();
if (columnAccessInfo != null && viewProjectToTableSchema != null
&& viewProjectToTableSchema.containsKey(project)) {
for (Ord<RexNode> ord : Ord.zip(project.getProjects())) {
if (fieldsUsed.get(ord.i)) {
Table tab = viewProjectToTableSchema.get(project);
columnAccessInfo.add(tab.getCompleteName(), tab.getAllCols().get(ord.i).getName());
}
}
}
return super.trimFields(project, fieldsUsed, extraFields);
}

public TrimResult trimFields(HiveTableScan tableAccessRel, ImmutableBitSet fieldsUsed,
Set<RelDataTypeField> extraFields) {
final TrimResult result = super.trimFields(tableAccessRel, fieldsUsed, extraFields);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -282,6 +282,7 @@ protected final TrimResult dispatchTrimFields(
RelNode rel,
ImmutableBitSet fieldsUsed,
Set<RelDataTypeField> extraFields) {
preTrim(rel, fieldsUsed);
final TrimResult trimResult =
trimFieldsDispatcher.invoke(rel, fieldsUsed, extraFields);
final RelNode newRel = trimResult.left;
Expand Down Expand Up @@ -1239,6 +1240,14 @@ public TrimResult trimFields(
return result(newTableAccessRel, mapping);
}

/**
* Run this method before trimming columns from a relational expression.
*
* @param rel RelNode
* @param fieldsUsed Fields used
*/
protected void preTrim(RelNode rel, ImmutableBitSet fieldsUsed) {}

//~ Inner Classes ----------------------------------------------------------

/**
Expand Down
23 changes: 10 additions & 13 deletions ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
Original file line number Diff line number Diff line change
Expand Up @@ -1577,7 +1577,7 @@ public class CalcitePlannerAction implements Frameworks.PlannerAction<RelNode> {
private final Map<String, PrunedPartitionList> partitionCache;
private final Map<String, ColumnStatsList> colStatsCache;
private final ColumnAccessInfo columnAccessInfo;
private Map<HiveProject, Table> viewProjectToTableSchema;
private final Map<RelNode, Table> relToTable;
private final QB rootQB;

// correlated vars across subqueries within same query needs to have different ID
Expand All @@ -1604,6 +1604,7 @@ protected CalcitePlannerAction(
this.rootQB = rootQB;
this.colStatsCache = ctx.getOpContext().getColStatsCache();
this.columnAccessInfo = columnAccessInfo;
this.relToTable = new HashMap<>();
}

@Override
Expand Down Expand Up @@ -1662,8 +1663,12 @@ public RelNode apply(RelOptCluster cluster, RelOptSchema relOptSchema, SchemaPlu
// We need to get the ColumnAccessInfo and viewToTableSchema for views.
if (conf.getBoolVar(ConfVars.HIVE_STATS_COLLECT_SCANCOLS) || !skipAuthorization()) {
HiveRelFieldTrimmer.get()
.trim(HiveRelFactories.HIVE_BUILDER.create(optCluster, null), calcitePlan, this.columnAccessInfo,
this.viewProjectToTableSchema);
.trim(
HiveRelFactories.HIVE_BUILDER.create(optCluster, null),
calcitePlan,
this.columnAccessInfo,
this.relToTable
);
}
perfLogger.perfLogEnd(this.getClass().getName(), PerfLogger.MV_REWRITE_FIELD_TRIMMER);

Expand Down Expand Up @@ -4917,15 +4922,7 @@ private RelNode genLogicalPlan(QB qb, boolean outerMostQB,

aliasToRel.put(subqAlias, relNode);
if (qb.getViewToTabSchema().containsKey(subqAlias)) {
if (relNode instanceof HiveProject) {
if (this.viewProjectToTableSchema == null) {
this.viewProjectToTableSchema = new LinkedHashMap<>();
}
viewProjectToTableSchema.put((HiveProject) relNode, qb.getViewToTabSchema().get(subqAlias));
} else {
throw new SemanticException("View " + subqAlias + " is corresponding to "
+ relNode.toString() + ", rather than a HiveProject.");
}
relToTable.put(relNode, qb.getViewToTabSchema().get(subqAlias));
}
}

Expand Down Expand Up @@ -5046,7 +5043,7 @@ private RelNode genLogicalPlan(QB qb, boolean outerMostQB,
setQB(qb);
return srcRel;
}

private RelNode genGBHavingLogicalPlan(QB qb, RelNode srcRel) throws SemanticException {
RelNode gbFilter = null;
QBParseInfo qbp = getQBParseInfo(qb);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
set hive.security.authorization.enabled=true;
create table t1 (username string, id int);

create view vw_t0 as select distinct username from t1 group by username;
explain cbo select * from vw_t0;

create view vw_t1 as select distinct username from t1 order by username desc limit 5;
explain cbo select * from vw_t1;

create view vw_t2 as
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does vw_t2 and vw_t3 have a different operator on top of the view? If not then we are not really adding test coverage so we should drop those test cases.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Removed vw_t3 as it was producing a HiveSortLimit, which is already covered in another test. Kept vw_t2 as it produces a HiveFilter on top.

select username from (select username, id from t1 where id > 10 limit 1) x where username > 'a' order by id;
explain cbo select * from vw_t2;
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
PREHOOK: query: create table t1 (username string, id int)
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
PREHOOK: Output: default@t1
POSTHOOK: query: create table t1 (username string, id int)
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@t1
PREHOOK: query: create view vw_t0 as select distinct username from t1 group by username
PREHOOK: type: CREATEVIEW
PREHOOK: Input: default@t1
PREHOOK: Output: database:default
PREHOOK: Output: default@vw_t0
POSTHOOK: query: create view vw_t0 as select distinct username from t1 group by username
POSTHOOK: type: CREATEVIEW
POSTHOOK: Input: default@t1
POSTHOOK: Output: database:default
POSTHOOK: Output: default@vw_t0
POSTHOOK: Lineage: vw_t0.username SIMPLE [(t1)t1.FieldSchema(name:username, type:string, comment:null), ]
PREHOOK: query: explain cbo select * from vw_t0
PREHOOK: type: QUERY
PREHOOK: Input: default@t1
PREHOOK: Input: default@vw_t0
#### A masked pattern was here ####
POSTHOOK: query: explain cbo select * from vw_t0
POSTHOOK: type: QUERY
POSTHOOK: Input: default@t1
POSTHOOK: Input: default@vw_t0
#### A masked pattern was here ####
CBO PLAN:
HiveAggregate(group=[{0}])
HiveTableScan(table=[[default, t1]], table:alias=[t1])

PREHOOK: query: create view vw_t1 as select distinct username from t1 order by username desc limit 5
PREHOOK: type: CREATEVIEW
PREHOOK: Input: default@t1
PREHOOK: Output: database:default
PREHOOK: Output: default@vw_t1
POSTHOOK: query: create view vw_t1 as select distinct username from t1 order by username desc limit 5
POSTHOOK: type: CREATEVIEW
POSTHOOK: Input: default@t1
POSTHOOK: Output: database:default
POSTHOOK: Output: default@vw_t1
POSTHOOK: Lineage: vw_t1.username SIMPLE [(t1)t1.FieldSchema(name:username, type:string, comment:null), ]
PREHOOK: query: explain cbo select * from vw_t1
PREHOOK: type: QUERY
PREHOOK: Input: default@t1
PREHOOK: Input: default@vw_t1
#### A masked pattern was here ####
POSTHOOK: query: explain cbo select * from vw_t1
POSTHOOK: type: QUERY
POSTHOOK: Input: default@t1
POSTHOOK: Input: default@vw_t1
#### A masked pattern was here ####
CBO PLAN:
HiveSortLimit(sort0=[$0], dir0=[DESC], fetch=[5])
HiveProject(username=[$0])
HiveAggregate(group=[{0}])
HiveTableScan(table=[[default, t1]], table:alias=[t1])

PREHOOK: query: create view vw_t2 as
select username from (select username, id from t1 where id > 10 limit 1) x where username > 'a' order by id
PREHOOK: type: CREATEVIEW
PREHOOK: Input: default@t1
PREHOOK: Output: database:default
PREHOOK: Output: default@vw_t2
POSTHOOK: query: create view vw_t2 as
select username from (select username, id from t1 where id > 10 limit 1) x where username > 'a' order by id
POSTHOOK: type: CREATEVIEW
POSTHOOK: Input: default@t1
POSTHOOK: Output: database:default
POSTHOOK: Output: default@vw_t2
POSTHOOK: Lineage: vw_t2.username SIMPLE [(t1)t1.FieldSchema(name:username, type:string, comment:null), ]
PREHOOK: query: explain cbo select * from vw_t2
PREHOOK: type: QUERY
PREHOOK: Input: default@t1
PREHOOK: Input: default@vw_t2
#### A masked pattern was here ####
POSTHOOK: query: explain cbo select * from vw_t2
POSTHOOK: type: QUERY
POSTHOOK: Input: default@t1
POSTHOOK: Input: default@vw_t2
#### A masked pattern was here ####
CBO PLAN:
HiveFilter(condition=[>($0, _UTF-16LE'a')])
HiveProject(username=[$0])
HiveSortLimit(fetch=[1])
HiveProject(username=[$0])
HiveFilter(condition=[>($1, 10)])
HiveTableScan(table=[[default, t1]], table:alias=[t1])
Comment on lines +84 to +90
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't see the order by id reflected in the plan. Is this normal? Does it really matter that is present in the view definition?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This seems to be by design, and can be controlled by:

    HIVE_REMOVE_ORDERBY_IN_SUBQUERY("hive.remove.orderby.in.subquery", true,
        "If set to true, order/sort by without limit in sub queries will be removed.")