Set whether to compact the changes sent downstream in row-time mini-batch. If true, Flink will compact changes and send only the latest change downstream. Note that if the downstream needs the details of versioned data, this optimization cannot be applied. If false, Flink will send all changes to downstream just like when the mini-batch is not enabled.
+
+
table.exec.delta-join.cache-enabled
Streaming
+
true
+
Boolean
+
Whether to enable the cache of delta join. If enabled, the delta join caches the records from remote dim table. Default is true.
+
+
+
table.exec.delta-join.left.cache-size
Streaming
+
10000
+
Long
+
The cache size used to cache the lookup results of the left table in delta join. This value must be positive when enabling cache. Default is 10000.
+
+
+
table.exec.delta-join.right.cache-size
Streaming
+
10000
+
Long
+
The cache size used to cache the lookup results of the right table in delta join. This value must be positive when enabling cache. Default is 10000.
+
table.exec.disabled-operators
Batch
(none)
diff --git a/flink-table/flink-table-api-java/src/main/java/org/apache/flink/table/api/config/ExecutionConfigOptions.java b/flink-table/flink-table-api-java/src/main/java/org/apache/flink/table/api/config/ExecutionConfigOptions.java
index 1e0fb43ebfec0..aca9853a38eeb 100644
--- a/flink-table/flink-table-api-java/src/main/java/org/apache/flink/table/api/config/ExecutionConfigOptions.java
+++ b/flink-table/flink-table-api-java/src/main/java/org/apache/flink/table/api/config/ExecutionConfigOptions.java
@@ -725,6 +725,33 @@ public class ExecutionConfigOptions {
"Set whether to use the SQL/Table operators based on the asynchronous state api. "
+ "Default value is false.");
+ @Documentation.TableOption(execMode = Documentation.ExecMode.STREAMING)
+ public static final ConfigOption TABLE_EXEC_DELTA_JOIN_CACHE_ENABLED =
+ key("table.exec.delta-join.cache-enabled")
+ .booleanType()
+ .defaultValue(true)
+ .withDescription(
+ "Whether to enable the cache of delta join. If enabled, the delta join caches the "
+ + "records from remote dim table. Default is true.");
+
+ @Documentation.TableOption(execMode = Documentation.ExecMode.STREAMING)
+ public static final ConfigOption TABLE_EXEC_DELTA_JOIN_LEFT_CACHE_SIZE =
+ key("table.exec.delta-join.left.cache-size")
+ .longType()
+ .defaultValue(10000L)
+ .withDescription(
+ "The cache size used to cache the lookup results of the left table in delta join. "
+ + "This value must be positive when enabling cache. Default is 10000.");
+
+ @Documentation.TableOption(execMode = Documentation.ExecMode.STREAMING)
+ public static final ConfigOption TABLE_EXEC_DELTA_JOIN_RIGHT_CACHE_SIZE =
+ key("table.exec.delta-join.right.cache-size")
+ .longType()
+ .defaultValue(10000L)
+ .withDescription(
+ "The cache size used to cache the lookup results of the right table in delta join. "
+ + "This value must be positive when enabling cache. Default is 10000.");
+
// ------------------------------------------------------------------------------------------
// Enum option types
// ------------------------------------------------------------------------------------------
diff --git a/flink-table/flink-table-planner/src/main/java/org/apache/flink/table/planner/plan/nodes/exec/stream/StreamExecDeltaJoin.java b/flink-table/flink-table-planner/src/main/java/org/apache/flink/table/planner/plan/nodes/exec/stream/StreamExecDeltaJoin.java
index 6d368618c9130..962cc5505414d 100644
--- a/flink-table/flink-table-planner/src/main/java/org/apache/flink/table/planner/plan/nodes/exec/stream/StreamExecDeltaJoin.java
+++ b/flink-table/flink-table-planner/src/main/java/org/apache/flink/table/planner/plan/nodes/exec/stream/StreamExecDeltaJoin.java
@@ -20,10 +20,12 @@
import org.apache.flink.FlinkVersion;
import org.apache.flink.api.dag.Transformation;
+import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.configuration.ReadableConfig;
import org.apache.flink.streaming.api.functions.async.AsyncFunction;
import org.apache.flink.streaming.api.operators.StreamOperatorFactory;
import org.apache.flink.streaming.api.transformations.TwoInputTransformation;
+import org.apache.flink.table.api.config.ExecutionConfigOptions;
import org.apache.flink.table.catalog.DataTypeFactory;
import org.apache.flink.table.data.RowData;
import org.apache.flink.table.data.conversion.DataStructureConverter;
@@ -83,6 +85,7 @@
import java.util.Map;
import java.util.Optional;
import java.util.stream.Collectors;
+import java.util.stream.IntStream;
import static org.apache.flink.table.planner.plan.nodes.exec.stream.StreamExecDeltaJoin.DELTA_JOIN_TRANSFORMATION;
import static org.apache.flink.table.planner.plan.utils.DeltaJoinUtil.getUnwrappedAsyncLookupFunction;
@@ -234,11 +237,17 @@ protected Transformation translateToPlanInternal(
RowDataKeySelector leftJoinKeySelector =
KeySelectorUtil.getRowDataSelector(
classLoader, leftJoinKeys, InternalTypeInfo.of(leftStreamType));
+ // currently, delta join only supports consuming INSERT-ONLY stream
+ RowDataKeySelector leftUpsertKeySelector =
+ getUpsertKeySelector(new int[0], leftStreamType, classLoader);
// right side selector
RowDataKeySelector rightJoinKeySelector =
KeySelectorUtil.getRowDataSelector(
classLoader, rightJoinKeys, InternalTypeInfo.of(rightStreamType));
+ // currently, delta join only supports consuming INSERT-ONLY stream
+ RowDataKeySelector rightUpsertKeySelector =
+ getUpsertKeySelector(new int[0], rightStreamType, classLoader);
StreamOperatorFactory operatorFactory =
createAsyncLookupDeltaJoin(
@@ -252,7 +261,9 @@ protected Transformation translateToPlanInternal(
leftStreamType,
rightStreamType,
leftJoinKeySelector,
+ leftUpsertKeySelector,
rightJoinKeySelector,
+ rightUpsertKeySelector,
classLoader);
final TwoInputTransformation transform =
@@ -282,7 +293,9 @@ private StreamOperatorFactory createAsyncLookupDeltaJoin(
RowType leftStreamType,
RowType rightStreamType,
RowDataKeySelector leftJoinKeySelector,
+ RowDataKeySelector leftUpsertKeySelector,
RowDataKeySelector rightJoinKeySelector,
+ RowDataKeySelector rightUpsertKeySelector,
ClassLoader classLoader) {
DataTypeFactory dataTypeFactory =
@@ -299,6 +312,10 @@ private StreamOperatorFactory createAsyncLookupDeltaJoin(
leftStreamType,
rightStreamType,
leftLookupKeys,
+ leftJoinKeySelector,
+ leftUpsertKeySelector,
+ rightJoinKeySelector,
+ rightUpsertKeySelector,
false);
AsyncDeltaJoinRunner rightLookupTableAsyncFunction =
@@ -312,8 +329,14 @@ private StreamOperatorFactory createAsyncLookupDeltaJoin(
leftStreamType,
rightStreamType,
rightLookupKeys,
+ leftJoinKeySelector,
+ leftUpsertKeySelector,
+ rightJoinKeySelector,
+ rightUpsertKeySelector,
true);
+ Tuple2 leftRightCacheSize = getCacheSize(config);
+
return new StreamingDeltaJoinOperatorFactory(
rightLookupTableAsyncFunction,
leftLookupTableAsyncFunction,
@@ -321,6 +344,8 @@ private StreamOperatorFactory createAsyncLookupDeltaJoin(
rightJoinKeySelector,
asyncLookupOptions.asyncTimeout,
asyncLookupOptions.asyncBufferCapacity,
+ leftRightCacheSize.f0,
+ leftRightCacheSize.f1,
leftStreamType,
rightStreamType);
}
@@ -336,6 +361,10 @@ private AsyncDeltaJoinRunner createAsyncDeltaJoinRunner(
RowType leftStreamSideType,
RowType rightStreamSideType,
Map lookupKeys,
+ RowDataKeySelector leftJoinKeySelector,
+ RowDataKeySelector leftUpsertKeySelector,
+ RowDataKeySelector rightJoinKeySelector,
+ RowDataKeySelector rightUpsertKeySelector,
boolean treatRightAsLookupTable) {
RelOptTable lookupTable = treatRightAsLookupTable ? rightTempTable : leftTempTable;
RowType streamSideType = treatRightAsLookupTable ? leftStreamSideType : rightStreamSideType;
@@ -409,8 +438,13 @@ private AsyncDeltaJoinRunner createAsyncDeltaJoinRunner(
(DataStructureConverter) lookupSideFetcherConverter,
lookupSideGeneratedResultFuture,
InternalSerializers.create(lookupTableSourceRowType),
+ leftJoinKeySelector,
+ leftUpsertKeySelector,
+ rightJoinKeySelector,
+ rightUpsertKeySelector,
asyncLookupOptions.asyncBufferCapacity,
- treatRightAsLookupTable);
+ treatRightAsLookupTable,
+ enableCache(config));
}
/**
@@ -449,4 +483,33 @@ public RexNode visitInputRef(RexInputRef inputRef) {
return condition.accept(converter);
}
+
+ private RowDataKeySelector getUpsertKeySelector(
+ int[] upsertKey, RowType rowType, ClassLoader classLoader) {
+ final int[] rightUpsertKeys;
+ if (upsertKey.length > 0) {
+ rightUpsertKeys = upsertKey;
+ } else {
+ rightUpsertKeys = IntStream.range(0, rowType.getFields().size()).toArray();
+ }
+ return KeySelectorUtil.getRowDataSelector(
+ classLoader, rightUpsertKeys, InternalTypeInfo.of(rowType));
+ }
+
+ private boolean enableCache(ReadableConfig config) {
+ return config.get(ExecutionConfigOptions.TABLE_EXEC_DELTA_JOIN_CACHE_ENABLED);
+ }
+
+ /** Get the left cache size and right size. */
+ private Tuple2 getCacheSize(ReadableConfig config) {
+ long leftCacheSize =
+ config.get(ExecutionConfigOptions.TABLE_EXEC_DELTA_JOIN_LEFT_CACHE_SIZE);
+ long rightCacheSize =
+ config.get(ExecutionConfigOptions.TABLE_EXEC_DELTA_JOIN_RIGHT_CACHE_SIZE);
+ if ((leftCacheSize <= 0 || rightCacheSize <= 0) && enableCache(config)) {
+ throw new IllegalArgumentException(
+ "Cache size in delta join must be positive when enabling cache.");
+ }
+ return Tuple2.of(leftCacheSize, rightCacheSize);
+ }
}
diff --git a/flink-table/flink-table-planner/src/test/scala/org/apache/flink/table/planner/runtime/stream/sql/DeltaJoinITCase.scala b/flink-table/flink-table-planner/src/test/scala/org/apache/flink/table/planner/runtime/stream/sql/DeltaJoinITCase.scala
index e4b15abb463cf..063fe973c67e9 100644
--- a/flink-table/flink-table-planner/src/test/scala/org/apache/flink/table/planner/runtime/stream/sql/DeltaJoinITCase.scala
+++ b/flink-table/flink-table-planner/src/test/scala/org/apache/flink/table/planner/runtime/stream/sql/DeltaJoinITCase.scala
@@ -20,17 +20,19 @@ package org.apache.flink.table.planner.runtime.stream.sql
import org.apache.flink.core.execution.CheckpointingMode
import org.apache.flink.table.api.Schema
import org.apache.flink.table.api.bridge.scala.internal.StreamTableEnvironmentImpl
-import org.apache.flink.table.api.config.OptimizerConfigOptions
+import org.apache.flink.table.api.config.{ExecutionConfigOptions, OptimizerConfigOptions}
import org.apache.flink.table.api.config.OptimizerConfigOptions.DeltaJoinStrategy
import org.apache.flink.table.catalog.{CatalogTable, ObjectPath, ResolvedCatalogTable}
import org.apache.flink.table.planner.factories.TestValuesRuntimeFunctions.AsyncTestValueLookupFunction
import org.apache.flink.table.planner.factories.TestValuesTableFactory
import org.apache.flink.table.planner.factories.TestValuesTableFactory.changelogRow
import org.apache.flink.table.planner.runtime.utils.{FailingCollectionSource, StreamingTestBase}
+import org.apache.flink.testutils.junit.extensions.parameterized.{ParameterizedTestExtension, Parameters}
import org.apache.flink.types.Row
import org.assertj.core.api.Assertions.{assertThat, assertThatThrownBy}
-import org.junit.jupiter.api.{BeforeEach, Test}
+import org.junit.jupiter.api.{BeforeEach, TestTemplate}
+import org.junit.jupiter.api.extension.ExtendWith
import javax.annotation.Nullable
@@ -39,7 +41,8 @@ import java.util.concurrent.TimeUnit
import scala.collection.JavaConversions._
-class DeltaJoinITCase extends StreamingTestBase {
+@ExtendWith(Array(classOf[ParameterizedTestExtension]))
+class DeltaJoinITCase(enableCache: Boolean) extends StreamingTestBase {
@BeforeEach
override def before(): Unit = {
@@ -49,10 +52,14 @@ class DeltaJoinITCase extends StreamingTestBase {
OptimizerConfigOptions.TABLE_OPTIMIZER_DELTA_JOIN_STRATEGY,
DeltaJoinStrategy.FORCE)
+ tEnv.getConfig.set(
+ ExecutionConfigOptions.TABLE_EXEC_DELTA_JOIN_CACHE_ENABLED,
+ Boolean.box(enableCache))
+
AsyncTestValueLookupFunction.invokeCount.set(0)
}
- @Test
+ @TestTemplate
def testJoinKeyEqualsIndex(): Unit = {
val data1 = List(
changelogRow("+I", Double.box(1.0), Int.box(1), LocalDateTime.of(2021, 1, 1, 1, 1, 1)),
@@ -77,7 +84,7 @@ class DeltaJoinITCase extends StreamingTestBase {
testUpsertResult(List("a1"), List("b1"), data1, data2, "a1 = b1", expected, 6)
}
- @Test
+ @TestTemplate
def testJoinKeyContainsIndex(): Unit = {
val data1 = List(
changelogRow("+I", Double.box(1.0), Int.box(1), LocalDateTime.of(2021, 1, 1, 1, 1, 1)),
@@ -102,7 +109,7 @@ class DeltaJoinITCase extends StreamingTestBase {
testUpsertResult(List("a1"), List("b1"), data1, data2, "a1 = b1 and a2 = b2", expected, 6)
}
- @Test
+ @TestTemplate
def testJoinKeyNotContainsIndex(): Unit = {
val data1 = List(
changelogRow("+I", Double.box(1.0), Int.box(1), LocalDateTime.of(2021, 1, 1, 1, 1, 1)),
@@ -129,7 +136,72 @@ class DeltaJoinITCase extends StreamingTestBase {
.hasMessageContaining("The current sql doesn't support to do delta join optimization.")
}
- @Test
+ @TestTemplate
+ def testSameJoinKeyColValuesWhileJoinKeyEqualsIndex(): Unit = {
+ val data1 = List(
+ changelogRow("+I", Double.box(1.0), Int.box(1), LocalDateTime.of(2021, 1, 1, 1, 1, 1)),
+ changelogRow("+I", Double.box(1.0), Int.box(1), LocalDateTime.of(2022, 2, 2, 2, 2, 2)),
+ // mismatch
+ changelogRow("+I", Double.box(3.0), Int.box(3), LocalDateTime.of(2023, 3, 3, 3, 3, 3))
+ )
+
+ val data2 = List(
+ changelogRow("+I", Int.box(1), Double.box(1.0), LocalDateTime.of(2021, 1, 1, 1, 1, 1)),
+ changelogRow("+I", Int.box(1), Double.box(1.0), LocalDateTime.of(2022, 2, 2, 2, 2, 22)),
+ // mismatch
+ changelogRow("+I", Int.box(99), Double.box(99.0), LocalDateTime.of(2099, 2, 2, 2, 2, 2))
+ )
+
+ // TestValuesRuntimeFunctions#KeyedUpsertingSinkFunction will change the RowKind from
+ // "+U" to "+I"
+ val expected = List(
+ "+I[1.0, 1, 2022-02-02T02:02:02, 1, 1.0, 2022-02-02T02:02:22]"
+ )
+ testUpsertResult(
+ List("a1"),
+ List("b1"),
+ data1,
+ data2,
+ "a1 = b1",
+ expected,
+ if (enableCache) 4 else 6)
+ }
+
+ @TestTemplate
+ def testSameJoinKeyColValuesWhileJoinKeyContainsIndex(): Unit = {
+ val data1 = List(
+ changelogRow("+I", Double.box(1.0), Int.box(1), LocalDateTime.of(2021, 1, 1, 1, 1, 1)),
+ changelogRow("+I", Double.box(1.0), Int.box(2), LocalDateTime.of(2021, 1, 1, 1, 1, 1)),
+ // mismatch
+ changelogRow("+I", Double.box(3.0), Int.box(3), LocalDateTime.of(2023, 3, 3, 3, 3, 3))
+ )
+
+ val data2 = List(
+ changelogRow("+I", Int.box(1), Double.box(1.0), LocalDateTime.of(2021, 1, 1, 1, 1, 1)),
+ changelogRow("+I", Int.box(2), Double.box(1.0), LocalDateTime.of(2021, 1, 1, 1, 1, 1)),
+ // mismatch
+ changelogRow("+I", Int.box(99), Double.box(99.0), LocalDateTime.of(2099, 2, 2, 2, 2, 2))
+ )
+
+ // TestValuesRuntimeFunctions#KeyedUpsertingSinkFunction will change the RowKind from
+ // "+U" to "+I"
+ val expected = List(
+ "+I[1.0, 1, 2021-01-01T01:01:01, 1, 1.0, 2021-01-01T01:01:01]",
+ "+I[1.0, 1, 2021-01-01T01:01:01, 2, 1.0, 2021-01-01T01:01:01]",
+ "+I[1.0, 2, 2021-01-01T01:01:01, 1, 1.0, 2021-01-01T01:01:01]",
+ "+I[1.0, 2, 2021-01-01T01:01:01, 2, 1.0, 2021-01-01T01:01:01]"
+ )
+ testUpsertResult(
+ List("a1"),
+ List("b1"),
+ data1,
+ data2,
+ "a1 = b1 and a2 = b2",
+ expected,
+ if (enableCache) 4 else 6)
+ }
+
+ @TestTemplate
def testWithNonEquiCondition1(): Unit = {
val data1 = List(
changelogRow("+I", Double.box(1.0), Int.box(1), LocalDateTime.of(2021, 1, 1, 1, 1, 1)),
@@ -159,7 +231,7 @@ class DeltaJoinITCase extends StreamingTestBase {
6)
}
- @Test
+ @TestTemplate
def testWithNonEquiCondition2(): Unit = {
val data1 = List(
changelogRow("+I", Double.box(1.0), Int.box(1), LocalDateTime.of(2021, 1, 1, 1, 1, 1)),
@@ -206,7 +278,7 @@ class DeltaJoinITCase extends StreamingTestBase {
.hasMessageContaining("The current sql doesn't support to do delta join optimization.")
}
- @Test
+ @TestTemplate
def testFilterFieldsBeforeJoin(): Unit = {
val data1 = List(
changelogRow("+I", Double.box(1.0), Int.box(1), LocalDateTime.of(2021, 1, 1, 1, 1, 1)),
@@ -245,7 +317,7 @@ class DeltaJoinITCase extends StreamingTestBase {
.hasMessageContaining("The current sql doesn't support to do delta join optimization.")
}
- @Test
+ @TestTemplate
def testProjectFieldsAfterJoin(): Unit = {
val data1 = List(
changelogRow("+I", Double.box(1.0), Int.box(1), LocalDateTime.of(2021, 1, 1, 1, 1, 1)),
@@ -271,8 +343,19 @@ class DeltaJoinITCase extends StreamingTestBase {
)
tEnv
- .executeSql(
- s"insert into testSnk select a1, a0 + 1, a2, b0 + 2, b1, b2 from testLeft join testRight on a0 = b0")
+ .executeSql("""
+ |insert into testSnk
+ | select
+ | a1,
+ | a0 + 1,
+ | a2,
+ | b0 + 2,
+ | b1,
+ | b2
+ | from testLeft
+ | join testRight
+ | on a0 = b0
+ |""".stripMargin)
.await()
val result = TestValuesTableFactory.getResultsAsStrings("testSnk")
@@ -280,7 +363,7 @@ class DeltaJoinITCase extends StreamingTestBase {
assertThat(AsyncTestValueLookupFunction.invokeCount.get()).isEqualTo(6)
}
- @Test
+ @TestTemplate
def testProjectFieldsBeforeJoin(): Unit = {
val data1 = List(
changelogRow("+I", Double.box(1.0), Int.box(1), LocalDateTime.of(2021, 1, 1, 1, 1, 1)),
@@ -311,18 +394,23 @@ class DeltaJoinITCase extends StreamingTestBase {
|)
|""".stripMargin)
- // could not optimize into delta join because there is ProjectPushDownSpec between join and source
- assertThatThrownBy(
- () =>
- tEnv
- .executeSql(
- s"insert into projectedSink select testLeft.a0, testRight.b0, testLeft.a1, testLeft.a2 " +
- s"from testLeft join testRight on a0 = b0")
- .await())
+ // could not optimize into delta join
+ // because there is ProjectPushDownSpec between join and source
+ assertThatThrownBy(() => tEnv.executeSql("""
+ |insert into projectedSink
+ | select
+ | testLeft.a0,
+ | testRight.b0,
+ | testLeft.a1,
+ | testLeft.a2
+ | from testLeft
+ | join testRight
+ | on a0 = b0
+ |""".stripMargin))
.hasMessageContaining("The current sql doesn't support to do delta join optimization.")
}
- @Test
+ @TestTemplate
def testProjectFieldsBeforeJoin2(): Unit = {
val data1 = List(
changelogRow("+I", Double.box(1.0), Int.box(1), LocalDateTime.of(2021, 1, 1, 1, 1, 1)),
@@ -354,7 +442,7 @@ class DeltaJoinITCase extends StreamingTestBase {
.hasMessageContaining("The current sql doesn't support to do delta join optimization.")
}
- @Test
+ @TestTemplate
def testFailOverAndRestore(): Unit = {
// enable checkpoint, we are using failing source to force have a complete checkpoint
// and cover restore path
@@ -504,3 +592,10 @@ class DeltaJoinITCase extends StreamingTestBase {
}
}
+
+object DeltaJoinITCase {
+ @Parameters(name = "EnableCache={0}")
+ def parameters(): java.util.Collection[Boolean] = {
+ Seq[Boolean](true, false)
+ }
+}
diff --git a/flink-table/flink-table-runtime/src/main/java/org/apache/flink/table/runtime/operators/StreamingDeltaJoinOperatorFactory.java b/flink-table/flink-table-runtime/src/main/java/org/apache/flink/table/runtime/operators/StreamingDeltaJoinOperatorFactory.java
index c3d2c97daf5a7..9dabd3176780f 100644
--- a/flink-table/flink-table-runtime/src/main/java/org/apache/flink/table/runtime/operators/StreamingDeltaJoinOperatorFactory.java
+++ b/flink-table/flink-table-runtime/src/main/java/org/apache/flink/table/runtime/operators/StreamingDeltaJoinOperatorFactory.java
@@ -36,13 +36,17 @@ public class StreamingDeltaJoinOperatorFactory extends AbstractStreamOperatorFac
YieldingOperatorFactory {
private final AsyncDeltaJoinRunner rightLookupTableAsyncFunction;
- private final RowDataKeySelector rightJoinKeySelector;
private final AsyncDeltaJoinRunner leftLookupTableAsyncFunction;
+
private final RowDataKeySelector leftJoinKeySelector;
+ private final RowDataKeySelector rightJoinKeySelector;
private final long timeout;
private final int capacity;
+ private final long leftSideCacheSize;
+ private final long rightSideCacheSize;
+
private final RowType leftStreamType;
private final RowType rightStreamType;
@@ -53,6 +57,8 @@ public StreamingDeltaJoinOperatorFactory(
RowDataKeySelector rightJoinKeySelector,
long timeout,
int capacity,
+ long leftSideCacheSize,
+ long rightSideCacheSize,
RowType leftStreamType,
RowType rightStreamType) {
this.rightLookupTableAsyncFunction = rightLookupTableAsyncFunction;
@@ -61,6 +67,8 @@ public StreamingDeltaJoinOperatorFactory(
this.rightJoinKeySelector = rightJoinKeySelector;
this.timeout = timeout;
this.capacity = capacity;
+ this.leftSideCacheSize = leftSideCacheSize;
+ this.rightSideCacheSize = rightSideCacheSize;
this.leftStreamType = leftStreamType;
this.rightStreamType = rightStreamType;
}
@@ -79,6 +87,8 @@ public > T createStreamOperator(
capacity,
processingTimeService,
mailboxExecutor,
+ leftSideCacheSize,
+ rightSideCacheSize,
leftStreamType,
rightStreamType);
deltaJoinOperator.setup(
diff --git a/flink-table/flink-table-runtime/src/main/java/org/apache/flink/table/runtime/operators/join/deltajoin/AsyncDeltaJoinRunner.java b/flink-table/flink-table-runtime/src/main/java/org/apache/flink/table/runtime/operators/join/deltajoin/AsyncDeltaJoinRunner.java
index 6882314bc75ac..d73bfd315db35 100644
--- a/flink-table/flink-table-runtime/src/main/java/org/apache/flink/table/runtime/operators/join/deltajoin/AsyncDeltaJoinRunner.java
+++ b/flink-table/flink-table-runtime/src/main/java/org/apache/flink/table/runtime/operators/join/deltajoin/AsyncDeltaJoinRunner.java
@@ -31,15 +31,21 @@
import org.apache.flink.table.runtime.collector.TableFunctionResultFuture;
import org.apache.flink.table.runtime.generated.GeneratedFunction;
import org.apache.flink.table.runtime.generated.GeneratedResultFuture;
+import org.apache.flink.table.runtime.keyselector.RowDataKeySelector;
import org.apache.flink.table.runtime.typeutils.RowDataSerializer;
+import org.apache.flink.util.Preconditions;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import javax.annotation.Nullable;
+
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
+import java.util.LinkedHashMap;
import java.util.List;
+import java.util.Optional;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.BlockingQueue;
@@ -64,6 +70,22 @@ public class AsyncDeltaJoinRunner extends RichAsyncFunction {
private final boolean treatRightAsLookupTable;
+ private final boolean enableCache;
+
+ /** Selector to get join key from left input. */
+ private final RowDataKeySelector leftJoinKeySelector;
+
+ /** Selector to get upsert key from left input. */
+ private final RowDataKeySelector leftUpsertKeySelector;
+
+ /** Selector to get join key from right input. */
+ private final RowDataKeySelector rightJoinKeySelector;
+
+ /** Selector to get upsert key from right input. */
+ private final RowDataKeySelector rightUpsertKeySelector;
+
+ private transient DeltaJoinCache cache;
+
/**
* Buffers {@link ResultFuture} to avoid newInstance cost when processing elements every time.
* We use {@link BlockingQueue} to make sure the head {@link ResultFuture}s are available.
@@ -85,14 +107,28 @@ public AsyncDeltaJoinRunner(
DataStructureConverter fetcherConverter,
GeneratedResultFuture> generatedResultFuture,
RowDataSerializer lookupSideRowSerializer,
+ RowDataKeySelector leftJoinKeySelector,
+ RowDataKeySelector leftUpsertKeySelector,
+ RowDataKeySelector rightJoinKeySelector,
+ RowDataKeySelector rightUpsertKeySelector,
int asyncBufferCapacity,
- boolean treatRightAsLookupTable) {
+ boolean treatRightAsLookupTable,
+ boolean enableCache) {
this.generatedFetcher = generatedFetcher;
this.fetcherConverter = fetcherConverter;
this.generatedResultFuture = generatedResultFuture;
this.lookupSideRowSerializer = lookupSideRowSerializer;
+ this.leftJoinKeySelector = leftJoinKeySelector;
+ this.leftUpsertKeySelector = leftUpsertKeySelector;
+ this.rightJoinKeySelector = rightJoinKeySelector;
+ this.rightUpsertKeySelector = rightUpsertKeySelector;
this.asyncBufferCapacity = asyncBufferCapacity;
this.treatRightAsLookupTable = treatRightAsLookupTable;
+ this.enableCache = enableCache;
+ }
+
+ public void setCache(DeltaJoinCache cache) {
+ this.cache = cache;
}
@Override
@@ -121,7 +157,11 @@ public void open(OpenContext openContext) throws Exception {
resultFutureBuffer,
createFetcherResultFuture(openContext),
fetcherConverter,
- treatRightAsLookupTable);
+ treatRightAsLookupTable,
+ leftUpsertKeySelector,
+ rightUpsertKeySelector,
+ enableCache,
+ cache);
// add will throw exception immediately if the queue is full which should never happen
resultFutureBuffer.add(rf);
allResultFutures.add(rf);
@@ -141,8 +181,27 @@ public void open(OpenContext openContext) throws Exception {
public void asyncInvoke(RowData input, ResultFuture resultFuture) throws Exception {
JoinedRowResultFuture outResultFuture = resultFutureBuffer.take();
+ RowData streamJoinKey = null;
+ if (enableCache) {
+ if (treatRightAsLookupTable) {
+ streamJoinKey = leftJoinKeySelector.getKey(input);
+ cache.requestRightCache();
+ } else {
+ streamJoinKey = rightJoinKeySelector.getKey(input);
+ cache.requestLeftCache();
+ }
+ }
+
// the input row is copied when object reuse in StreamDeltaJoinOperator
- outResultFuture.reset(input, resultFuture);
+ outResultFuture.reset(streamJoinKey, input, resultFuture);
+
+ if (enableCache) {
+ Optional> dataFromCache = tryGetDataFromCache(streamJoinKey);
+ if (dataFromCache.isPresent()) {
+ outResultFuture.complete(dataFromCache.get());
+ return;
+ }
+ }
long startTime = System.currentTimeMillis();
// fetcher has copied the input field when object reuse is enabled
@@ -177,6 +236,30 @@ public AsyncFunction getFetcher() {
return fetcher;
}
+ @VisibleForTesting
+ public DeltaJoinCache getCache() {
+ return cache;
+ }
+
+ private Optional> tryGetDataFromCache(RowData joinKey) {
+ Preconditions.checkState(enableCache);
+
+ if (treatRightAsLookupTable) {
+ LinkedHashMap rightRows = cache.getData(joinKey, true);
+ if (rightRows != null) {
+ cache.hitRightCache();
+ return Optional.of(rightRows.values());
+ }
+ } else {
+ LinkedHashMap leftRows = cache.getData(joinKey, false);
+ if (leftRows != null) {
+ cache.hitLeftCache();
+ return Optional.of(leftRows.values());
+ }
+ }
+ return Optional.empty();
+ }
+
/**
* The {@link JoinedRowResultFuture} is used to combine left {@link RowData} and right {@link
* RowData} into {@link JoinedRowData}.
@@ -191,9 +274,16 @@ public static final class JoinedRowResultFuture implements ResultFuture