Skip to content

Commit

Permalink
Query planner refactor 1 (typedb#5087)
Browse files Browse the repository at this point in the history
## What is the goal of this PR?
Begin cleaning up very procedural and overly generic code in Query Planner to be more understandible.

## What are the changes implemented in this PR?
* Remove Generic from `DirectedEdge<T>` and all associated classes, including tests. Usage was early exclusively `DirectedEdge<Node>`. Now none of the classes in the Query Planner accept a generic. It seems like the generic originally existed to allow using an Integer as a Node, rather than a Node object in tests - tests for now use `Node` with the Integer encoded as a variable string.
* `Node` had methods `addIfPresent(a, b, allNodes)`, which did a create-if-present in the `allNodes` map and returned the Node. This functionality is moved out. 
* `NodeId` now uses a factory constructor because the instances are not meant to be unique
* Move a large chunk of isolated code from `GreedyTraversalPlan.java` into `RelationTypeInference.java` - all to do with inferring types from role players to generate more Label fragments.
* `Fragment`s provide the `Node`s that  go into the internal QueryPlanner traversal graph - Fragments that represent Janus edges for instance provide a `middle` node that is not a valid starting point. These are collected into the set of all Nodes at the start of query planning now rather than deep in the chain of c-like function calls.
* Move chunks of code into their own methods to deal with later (`chooseStartingNodes`, `buildDependenciesBetweenNodes`)
* _important_ experimental change to NOT access all the indexed vertices first thing in the query plan - do them as a normal step in the flattened minimum spanning tree as required (starting point is still almost always an indexed vertex) 
* Remove `lowPriorityStartingNodes` as we currently always reify nodes, so implicit relationships are as good of a starting point as any
  • Loading branch information
flyingsilverfin authored Apr 9, 2019
1 parent 1b5c16e commit 58b6d60
Show file tree
Hide file tree
Showing 29 changed files with 799 additions and 597 deletions.
368 changes: 110 additions & 258 deletions server/src/graql/gremlin/GreedyTraversalPlan.java

Large diffs are not rendered by default.

184 changes: 184 additions & 0 deletions server/src/graql/gremlin/RelationTypeInference.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,184 @@
/*
* GRAKN.AI - THE KNOWLEDGE GRAPH
* Copyright (C) 2018 Grakn Labs Ltd
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/

package grakn.core.graql.gremlin;

import com.google.common.collect.HashMultimap;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Iterators;
import com.google.common.collect.Multimap;
import com.google.common.collect.Sets;
import grakn.core.concept.Label;
import grakn.core.concept.type.RelationType;
import grakn.core.concept.type.Role;
import grakn.core.concept.type.SchemaConcept;
import grakn.core.concept.type.Type;
import grakn.core.graql.gremlin.fragment.Fragment;
import grakn.core.graql.gremlin.fragment.Fragments;
import grakn.core.graql.gremlin.fragment.InIsaFragment;
import grakn.core.graql.gremlin.fragment.InSubFragment;
import grakn.core.graql.gremlin.fragment.LabelFragment;
import grakn.core.graql.gremlin.fragment.OutRolePlayerFragment;
import grakn.core.graql.gremlin.sets.EquivalentFragmentSets;
import grakn.core.server.session.TransactionOLTP;
import graql.lang.property.IsaProperty;
import graql.lang.property.TypeProperty;
import graql.lang.statement.Statement;
import graql.lang.statement.Variable;

import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;

import static graql.lang.Graql.var;

public class RelationTypeInference {
// infer type of relation type if we know the type of the role players
// add label fragment and isa fragment if we can infer any
public static Set<Fragment> inferRelationTypes(TransactionOLTP tx, Set<Fragment> allFragments) {

Set<Fragment> inferredFragments = new HashSet<>();

Map<Variable, Type> labelVarTypeMap = getLabelVarTypeMap(tx, allFragments);
if (labelVarTypeMap.isEmpty()) return inferredFragments;

Multimap<Variable, Type> instanceVarTypeMap = getInstanceVarTypeMap(allFragments, labelVarTypeMap);

Multimap<Variable, Variable> relationRolePlayerMap = getRelationRolePlayerMap(allFragments, instanceVarTypeMap);
if (relationRolePlayerMap.isEmpty()) return inferredFragments;

// for each type, get all possible relation type it could be in
Multimap<Type, RelationType> relationMap = HashMultimap.create();
labelVarTypeMap.values().stream().distinct().forEach(
type -> addAllPossibleRelations(relationMap, type));

// inferred labels should be kept separately, even if they are already in allFragments set
Map<Label, Statement> inferredLabels = new HashMap<>();
relationRolePlayerMap.asMap().forEach((relationVar, rolePlayerVars) -> {

Set<Type> possibleRelationTypes = rolePlayerVars.stream()
.filter(instanceVarTypeMap::containsKey)
.map(rolePlayer -> getAllPossibleRelationTypes(
instanceVarTypeMap.get(rolePlayer), relationMap))
.reduce(Sets::intersection).orElse(Collections.emptySet());

//TODO: if possibleRelationTypes here is empty, the query will not match any data
if (possibleRelationTypes.size() == 1) {

Type relationType = possibleRelationTypes.iterator().next();
Label label = relationType.label();

// add label fragment if this label has not been inferred
if (!inferredLabels.containsKey(label)) {
Statement labelVar = var();
inferredLabels.put(label, labelVar);
Fragment labelFragment = Fragments.label(new TypeProperty(label.getValue()), labelVar.var(), ImmutableSet.of(label));
inferredFragments.add(labelFragment);
}

// finally, add inferred isa fragments
Statement labelVar = inferredLabels.get(label);
IsaProperty isaProperty = new IsaProperty(labelVar);
EquivalentFragmentSet isaEquivalentFragmentSet = EquivalentFragmentSets.isa(isaProperty,
relationVar, labelVar.var(), relationType.isImplicit());
inferredFragments.addAll(isaEquivalentFragmentSet.fragments());
}
});

return inferredFragments;
}

// find all vars with direct or indirect out isa edges
private static Multimap<Variable, Type> getInstanceVarTypeMap(
Set<Fragment> allFragments, Map<Variable, Type> labelVarTypeMap) {
Multimap<Variable, Type> instanceVarTypeMap = HashMultimap.create();
int oldSize;
do {
oldSize = instanceVarTypeMap.size();
allFragments.stream()
.filter(fragment -> labelVarTypeMap.containsKey(fragment.start())) // restrict to types
.filter(fragment -> fragment instanceof InIsaFragment || fragment instanceof InSubFragment) //
.forEach(fragment -> instanceVarTypeMap.put(fragment.end(), labelVarTypeMap.get(fragment.start())));
} while (oldSize != instanceVarTypeMap.size());
return instanceVarTypeMap;
}

// find all vars representing types
private static Map<Variable, Type> getLabelVarTypeMap(TransactionOLTP tx, Set<Fragment> allFragments) {
Map<Variable, Type> labelVarTypeMap = new HashMap<>();
allFragments.stream()
.filter(LabelFragment.class::isInstance)
.forEach(fragment -> {
// TODO: labels() should return ONE label instead of a set
SchemaConcept schemaConcept = tx.getSchemaConcept(
Iterators.getOnlyElement(((LabelFragment) fragment).labels().iterator()));
if (schemaConcept != null && !schemaConcept.isRole() && !schemaConcept.isRule()) {
labelVarTypeMap.put(fragment.start(), schemaConcept.asType());
}
});
return labelVarTypeMap;
}

private static Multimap<Variable, Variable> getRelationRolePlayerMap(
Set<Fragment> allFragments, Multimap<Variable, Type> instanceVarTypeMap) {
// relation vars and its role player vars
Multimap<Variable, Variable> relationRolePlayerMap = HashMultimap.create();
allFragments.stream().filter(OutRolePlayerFragment.class::isInstance)
.forEach(fragment -> relationRolePlayerMap.put(fragment.start(), fragment.end()));

// find all the relation requiring type inference
Iterator<Variable> iterator = relationRolePlayerMap.keySet().iterator();
while (iterator.hasNext()) {
Variable relation = iterator.next();

// the relation should have at least 2 known role players so we can infer something useful
if (instanceVarTypeMap.containsKey(relation) ||
relationRolePlayerMap.get(relation).size() < 2) {
iterator.remove();
} else {
int numRolePlayersHaveType = 0;
for (Variable rolePlayer : relationRolePlayerMap.get(relation)) {
if (instanceVarTypeMap.containsKey(rolePlayer)) {
numRolePlayersHaveType++;
}
}
if (numRolePlayersHaveType < 2) {
iterator.remove();
}
}
}
return relationRolePlayerMap;
}

private static void addAllPossibleRelations(Multimap<Type, RelationType> relationMap, Type metaType) {
metaType.subs().forEach(type -> type.playing().flatMap(Role::relations)
.forEach(relationType -> relationMap.put(type, relationType)));
}

private static Set<Type> getAllPossibleRelationTypes(
Collection<Type> instanceVarTypes, Multimap<Type, RelationType> relationMap) {

return instanceVarTypes.stream()
.map(rolePlayerType -> (Set<Type>) new HashSet<Type>(relationMap.get(rolePlayerType)))
.reduce(Sets::intersection).orElse(Collections.emptySet());
}
}
36 changes: 33 additions & 3 deletions server/src/graql/gremlin/fragment/AbstractRolePlayerFragment.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
package grakn.core.graql.gremlin.fragment;

import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Sets;
import grakn.core.concept.Label;
import grakn.core.concept.type.Role;
import grakn.core.graql.gremlin.spanningtree.graph.DirectedEdge;
Expand All @@ -33,11 +34,15 @@
import org.apache.tinkerpop.gremlin.structure.Edge;

import javax.annotation.Nullable;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;

import static grakn.core.graql.gremlin.fragment.Fragments.displayOptionalTypeLabels;
import static grakn.core.graql.gremlin.spanningtree.util.Weighted.weighted;
import static java.util.stream.Collectors.toSet;

/**
Expand Down Expand Up @@ -83,9 +88,34 @@ final ImmutableSet<Variable> otherVars() {
}

@Override
public final Set<Weighted<DirectedEdge<Node>>> directedEdges(
Map<NodeId, Node> nodes, Map<Node, Map<Node, Fragment>> edges) {
return directedEdges(edge(), nodes, edges);
public Set<Node> getNodes() {
Node start = new Node(NodeId.of(NodeId.NodeType.VAR, start()));
Node end = new Node(NodeId.of(NodeId.NodeType.VAR, end()));
Node middle = new Node(NodeId.of(NodeId.NodeType.VAR, edge()));
middle.setInvalidStartingPoint();
return new HashSet<>(Arrays.asList(start, end, middle));
}

@Override
public final Set<Weighted<DirectedEdge>> directedEdges(Map<NodeId, Node> nodes,
Map<Node, Map<Node, Fragment>> edges) {

// this is a somewhat special case, where the middle node being converted to a vertex
// may be addressed by a variable

Node start = nodes.get(NodeId.of(NodeId.NodeType.VAR, start()));
Node end = nodes.get(NodeId.of(NodeId.NodeType.VAR, end()));
Node middle = nodes.get(NodeId.of(NodeId.NodeType.VAR, edge()));
middle.setInvalidStartingPoint();

if (!edges.containsKey(middle)) {
edges.put(middle, new HashMap<>());
}
edges.get(middle).put(start, this);

return Sets.newHashSet(
weighted(DirectedEdge.from(start).to(middle), -fragmentCost()),
weighted(DirectedEdge.from(middle).to(end), 0));
}

static void applyLabelsToTraversal(
Expand Down
84 changes: 42 additions & 42 deletions server/src/graql/gremlin/fragment/Fragment.java
Original file line number Diff line number Diff line change
Expand Up @@ -144,18 +144,58 @@ public Set<Variable> dependencies() {
return ImmutableSet.of();
}

/**
* When building the query plan spanning tree, every fragment has a start defined with a variable
* Some fragments are actually edges in JanusGraph (such as isa, sub, etc.)
* These require another variable for the end() variable, and to force the MST algorithm to
* traverse these JanusGraph edges too, we insert a fake middle node representing the edge
* @return
*/
public Set<Node> getNodes() {
NodeId startNodeId = NodeId.of(NodeId.NodeType.VAR, start());
return Collections.singleton(new Node(startNodeId));
}

/**
* Convert the fragment to a set of weighted edges for query planning
*
* @param nodes all nodes in the query
* @param edges a mapping from edge(child, parent) to its corresponding fragment
* @return a set of edges
*/
public Set<Weighted<DirectedEdge<Node>>> directedEdges(Map<NodeId, Node> nodes,
Map<Node, Map<Node, Fragment>> edges) {
public Set<Weighted<DirectedEdge>> directedEdges(Map<NodeId, Node> nodes,
Map<Node, Map<Node, Fragment>> edges) {
return Collections.emptySet();
}

final Set<Weighted<DirectedEdge>> directedEdges(NodeId.NodeType nodeType,
Map<NodeId, Node> nodes,
Map<Node, Map<Node, Fragment>> edgeToFragment) {

// this call to `directedEdges` handles converting janus edges that the user cannot address
// (ie. not role edges), into edges with a middle node to force the query planner to traverse to this middle
// node that represents the actual Janus edge
// since the middle node cannot be addressed it does not have a variable, so we create a new ID for it
// as the combination of start() and end() with the type

Node start = nodes.get(NodeId.of(NodeId.NodeType.VAR, start()));
Node end = nodes.get(NodeId.of(NodeId.NodeType.VAR, end()));
Node middle = nodes.get(NodeId.of(nodeType, Sets.newHashSet(start(), end())));

addEdgeToFragmentMapping(middle, start, edgeToFragment);
return Sets.newHashSet(
weighted(DirectedEdge.from(start).to(middle), -fragmentCost()),
weighted(DirectedEdge.from(middle).to(end), 0));
}

private void addEdgeToFragmentMapping(Node child, Node parent, Map<Node, Map<Node, Fragment>> edgeToFragment) {
if (!edgeToFragment.containsKey(child)) {
edgeToFragment.put(child, new HashMap<>());
}
edgeToFragment.get(child).put(parent, this);
}


/**
* @param traversal the traversal to extend with this Fragment
* @param tx the graph to execute the traversal on
Expand Down Expand Up @@ -251,10 +291,6 @@ public Fragment getInverse() {
return this;
}

public Long getShardCount(TransactionOLTP tx) {
return 0L;
}

/**
* Indicates whether the fragment can be used on an {@link org.apache.tinkerpop.gremlin.structure.Edge} as well as
* a {@link org.apache.tinkerpop.gremlin.structure.Vertex}.
Expand Down Expand Up @@ -286,40 +322,4 @@ public final String toString() {
return str;
}

final Set<Weighted<DirectedEdge<Node>>> directedEdges(NodeId.NodeType nodeType,
Map<NodeId, Node> nodes,
Map<Node, Map<Node, Fragment>> edgeToFragment) {

Node start = Node.addIfAbsent(NodeId.NodeType.VAR, start(), nodes);
Node end = Node.addIfAbsent(NodeId.NodeType.VAR, end(), nodes);
Node middle = Node.addIfAbsent(nodeType, Sets.newHashSet(start(), end()), nodes);
middle.setInvalidStartingPoint();

addEdgeToFragmentMapping(middle, start, edgeToFragment);
return Sets.newHashSet(
weighted(DirectedEdge.from(start).to(middle), -fragmentCost()),
weighted(DirectedEdge.from(middle).to(end), 0));
}

final Set<Weighted<DirectedEdge<Node>>> directedEdges(Variable edge,
Map<NodeId, Node> nodes,
Map<Node, Map<Node, Fragment>> edgeToFragment) {

Node start = Node.addIfAbsent(NodeId.NodeType.VAR, start(), nodes);
Node end = Node.addIfAbsent(NodeId.NodeType.VAR, end(), nodes);
Node middle = Node.addIfAbsent(NodeId.NodeType.VAR, edge, nodes);
middle.setInvalidStartingPoint();

addEdgeToFragmentMapping(middle, start, edgeToFragment);
return Sets.newHashSet(
weighted(DirectedEdge.from(start).to(middle), -fragmentCost()),
weighted(DirectedEdge.from(middle).to(end), 0));
}

private void addEdgeToFragmentMapping(Node child, Node parent, Map<Node, Map<Node, Fragment>> edgeToFragment) {
if (!edgeToFragment.containsKey(child)) {
edgeToFragment.put(child, new HashMap<>());
}
edgeToFragment.get(child).put(parent, this);
}
}
15 changes: 14 additions & 1 deletion server/src/graql/gremlin/fragment/InIsaFragment.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

import com.google.auto.value.AutoValue;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Sets;
import grakn.core.graql.gremlin.spanningtree.graph.DirectedEdge;
import grakn.core.graql.gremlin.spanningtree.graph.Node;
import grakn.core.graql.gremlin.spanningtree.graph.NodeId;
Expand All @@ -33,7 +34,9 @@
import org.apache.tinkerpop.gremlin.structure.Element;
import org.apache.tinkerpop.gremlin.structure.Vertex;

import java.util.Arrays;
import java.util.Collection;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;

Expand Down Expand Up @@ -135,8 +138,18 @@ public double internalFragmentCost() {
return COST_INSTANCES_PER_TYPE;
}


@Override
public Set<Node> getNodes() {
Node start = new Node(NodeId.of(NodeId.NodeType.VAR, start()));
Node end = new Node(NodeId.of(NodeId.NodeType.VAR, end()));
Node middle = new Node(NodeId.of(NodeId.NodeType.ISA, new HashSet<>(Arrays.asList(start(), end()))));
middle.setInvalidStartingPoint();
return Sets.newHashSet(start, end, middle);
}

@Override
public Set<Weighted<DirectedEdge<Node>>> directedEdges(Map<NodeId, Node> nodes,
public Set<Weighted<DirectedEdge>> directedEdges(Map<NodeId, Node> nodes,
Map<Node, Map<Node, Fragment>> edges) {
return directedEdges(NodeId.NodeType.ISA, nodes, edges);
}
Expand Down
Loading

0 comments on commit 58b6d60

Please sign in to comment.