diff --git a/.scannerwork/.sonar_lock b/.scannerwork/.sonar_lock new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/.scannerwork/report-task.txt b/.scannerwork/report-task.txt new file mode 100644 index 0000000000000..fe8075ff627b3 --- /dev/null +++ b/.scannerwork/report-task.txt @@ -0,0 +1,6 @@ +projectKey=kafka +serverUrl=http://35.90.107.202:9000 +serverVersion=9.5.0.56709 +dashboardUrl=http://35.90.107.202:9000/dashboard?id=kafka +ceTaskId=AYLXVHbzGGiZuPHBoHgz +ceTaskUrl=http://35.90.107.202:9000/api/ce/task?id=AYLXVHbzGGiZuPHBoHgz diff --git a/Jenkinsfile b/Jenkinsfile index e59183cbaa6d8..62e58cf71c4d0 100755 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -28,6 +28,12 @@ def config = jobConfig { downStreamRepos = ["common",] nanoVersion = true disableConcurrentBuilds = true + sonarqubeScannerEnable=true + sonarqubeXmlTestReportPath="**/build/test-results/**/TEST-*.xml" + sonarqubeXmlCoveragePath="coverage.xml" + sonarqubeCodeLanguage="java" + sonarqubeCompiledClassesPath="./build/libs/" + sonarqubeExclusions="**/*test*/**/*" } def retryFlagsString(jobConfig) { @@ -135,11 +141,11 @@ def job = { } } ] - + sh "ls build" + sh "./gradlew jar" result = parallel testTargets // combine results of the two targets into one result string return result.runTestsStepName + "\n" + result.downstreamBuildsStepName } - runJob config, job echo downstreamBuildFailureOutput diff --git a/connect/mirror/bin/test/log4j.properties b/connect/mirror/bin/test/log4j.properties new file mode 100644 index 0000000000000..a2ac021dfab98 --- /dev/null +++ b/connect/mirror/bin/test/log4j.properties @@ -0,0 +1,34 @@ +## +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +## +log4j.rootLogger=ERROR, stdout + +log4j.appender.stdout=org.apache.log4j.ConsoleAppender +log4j.appender.stdout.layout=org.apache.log4j.PatternLayout +# +# The `%X{connector.context}` parameter in the layout includes connector-specific and task-specific information +# in the log message, where appropriate. This makes it easier to identify those log messages that apply to a +# specific connector. Simply add this parameter to the log layout configuration below to include the contextual information. +# +log4j.appender.stdout.layout.ConversionPattern=[%d] %p %X{connector.context}%m (%c:%L)%n +# +# The following line includes no MDC context parameters: +#log4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c:%L)%n (%t) + +log4j.logger.org.reflections=OFF +log4j.logger.kafka=OFF +log4j.logger.state.change.logger=OFF +log4j.logger.org.apache.kafka.connect.mirror=INFO diff --git a/connect/runtime/bin/main/META-INF/services/org.apache.kafka.connect.connector.policy.ConnectorClientConfigOverridePolicy b/connect/runtime/bin/main/META-INF/services/org.apache.kafka.connect.connector.policy.ConnectorClientConfigOverridePolicy new file mode 100644 index 0000000000000..8b76ce452b659 --- /dev/null +++ b/connect/runtime/bin/main/META-INF/services/org.apache.kafka.connect.connector.policy.ConnectorClientConfigOverridePolicy @@ -0,0 +1,18 @@ + # Licensed to the Apache Software Foundation (ASF) under one or more + # contributor license agreements. See the NOTICE file distributed with + # this work for additional information regarding copyright ownership. + # The ASF licenses this file to You under the Apache License, Version 2.0 + # (the "License"); you may not use this file except in compliance with + # the License. You may obtain a copy of the License at + # + # http://www.apache.org/licenses/LICENSE-2.0 + # + # Unless required by applicable law or agreed to in writing, software + # distributed under the License is distributed on an "AS IS" BASIS, + # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + # See the License for the specific language governing permissions and + # limitations under the License. + +org.apache.kafka.connect.connector.policy.AllConnectorClientConfigOverridePolicy +org.apache.kafka.connect.connector.policy.PrincipalConnectorClientConfigOverridePolicy +org.apache.kafka.connect.connector.policy.NoneConnectorClientConfigOverridePolicy \ No newline at end of file diff --git a/connect/runtime/bin/test/META-INF/services/org.apache.kafka.connect.rest.ConnectRestExtension b/connect/runtime/bin/test/META-INF/services/org.apache.kafka.connect.rest.ConnectRestExtension new file mode 100644 index 0000000000000..0a1ef88924e1e --- /dev/null +++ b/connect/runtime/bin/test/META-INF/services/org.apache.kafka.connect.rest.ConnectRestExtension @@ -0,0 +1,16 @@ + # Licensed to the Apache Software Foundation (ASF) under one or more + # contributor license agreements. See the NOTICE file distributed with + # this work for additional information regarding copyright ownership. + # The ASF licenses this file to You under the Apache License, Version 2.0 + # (the "License"); you may not use this file except in compliance with + # the License. You may obtain a copy of the License at + # + # http://www.apache.org/licenses/LICENSE-2.0 + # + # Unless required by applicable law or agreed to in writing, software + # distributed under the License is distributed on an "AS IS" BASIS, + # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + # See the License for the specific language governing permissions and + # limitations under the License. + +org.apache.kafka.connect.runtime.isolation.PluginsTest$TestConnectRestExtension \ No newline at end of file diff --git a/connect/runtime/bin/test/log4j.properties b/connect/runtime/bin/test/log4j.properties new file mode 100644 index 0000000000000..176692deb7b2b --- /dev/null +++ b/connect/runtime/bin/test/log4j.properties @@ -0,0 +1,35 @@ +## +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +## +log4j.rootLogger=INFO, stdout + +log4j.appender.stdout=org.apache.log4j.ConsoleAppender +log4j.appender.stdout.layout=org.apache.log4j.PatternLayout +# +# The `%X{connector.context}` parameter in the layout includes connector-specific and task-specific information +# in the log message, where appropriate. This makes it easier to identify those log messages that apply to a +# specific connector. Simply add this parameter to the log layout configuration below to include the contextual information. +# +log4j.appender.stdout.layout.ConversionPattern=[%d] %p %X{connector.context}%m (%c:%L)%n +# +# The following line includes no MDC context parameters: +#log4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c:%L)%n (%t) + +log4j.logger.org.reflections=ERROR +log4j.logger.kafka=WARN +log4j.logger.org.apache.kafka.connect=DEBUG +log4j.logger.org.apache.kafka.connect.runtime.distributed=DEBUG +log4j.logger.org.apache.kafka.connect.integration=DEBUG diff --git a/connect/runtime/bin/test/test-plugins/sampling-config-provider/META-INF/services/org.apache.kafka.common.config.provider.ConfigProvider b/connect/runtime/bin/test/test-plugins/sampling-config-provider/META-INF/services/org.apache.kafka.common.config.provider.ConfigProvider new file mode 100644 index 0000000000000..62d8df254bbc3 --- /dev/null +++ b/connect/runtime/bin/test/test-plugins/sampling-config-provider/META-INF/services/org.apache.kafka.common.config.provider.ConfigProvider @@ -0,0 +1,16 @@ + # Licensed to the Apache Software Foundation (ASF) under one or more + # contributor license agreements. See the NOTICE file distributed with + # this work for additional information regarding copyright ownership. + # The ASF licenses this file to You under the Apache License, Version 2.0 + # (the "License"); you may not use this file except in compliance with + # the License. You may obtain a copy of the License at + # + # http://www.apache.org/licenses/LICENSE-2.0 + # + # Unless required by applicable law or agreed to in writing, software + # distributed under the License is distributed on an "AS IS" BASIS, + # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + # See the License for the specific language governing permissions and + # limitations under the License. + +test.plugins.SamplingConfigProvider diff --git a/connect/runtime/bin/test/test-plugins/service-loader/META-INF/services/test.plugins.ServiceLoadedClass b/connect/runtime/bin/test/test-plugins/service-loader/META-INF/services/test.plugins.ServiceLoadedClass new file mode 100644 index 0000000000000..b8db8656487d2 --- /dev/null +++ b/connect/runtime/bin/test/test-plugins/service-loader/META-INF/services/test.plugins.ServiceLoadedClass @@ -0,0 +1,16 @@ + # Licensed to the Apache Software Foundation (ASF) under one or more + # contributor license agreements. See the NOTICE file distributed with + # this work for additional information regarding copyright ownership. + # The ASF licenses this file to You under the Apache License, Version 2.0 + # (the "License"); you may not use this file except in compliance with + # the License. You may obtain a copy of the License at + # + # http://www.apache.org/licenses/LICENSE-2.0 + # + # Unless required by applicable law or agreed to in writing, software + # distributed under the License is distributed on an "AS IS" BASIS, + # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + # See the License for the specific language governing permissions and + # limitations under the License. + +test.plugins.ServiceLoadedSubclass \ No newline at end of file diff --git a/storage/bin/main/message/RemoteLogSegmentMetadataRecord.json b/storage/bin/main/message/RemoteLogSegmentMetadataRecord.json new file mode 100644 index 0000000000000..d18144e4dfe12 --- /dev/null +++ b/storage/bin/main/message/RemoteLogSegmentMetadataRecord.json @@ -0,0 +1,126 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +{ + "apiKey": 0, + "type": "metadata", + "name": "RemoteLogSegmentMetadataRecord", + "validVersions": "0", + "flexibleVersions": "0+", + "fields": [ + { + "name": "RemoteLogSegmentId", + "type": "RemoteLogSegmentIdEntry", + "versions": "0+", + "about": "Unique representation of the remote log segment.", + "fields": [ + { + "name": "TopicIdPartition", + "type": "TopicIdPartitionEntry", + "versions": "0+", + "about": "Represents unique topic partition.", + "fields": [ + { + "name": "Name", + "type": "string", + "versions": "0+", + "about": "Topic name." + }, + { + "name": "Id", + "type": "uuid", + "versions": "0+", + "about": "Unique identifier of the topic." + }, + { + "name": "Partition", + "type": "int32", + "versions": "0+", + "about": "Partition number." + } + ] + }, + { + "name": "Id", + "type": "uuid", + "versions": "0+", + "about": "Unique identifier of the remote log segment." + } + ] + }, + { + "name": "StartOffset", + "type": "int64", + "versions": "0+", + "about": "Start offset of the segment." + }, + { + "name": "EndOffset", + "type": "int64", + "versions": "0+", + "about": "End offset of the segment." + }, + { + "name": "BrokerId", + "type": "int32", + "versions": "0+", + "about": "Broker id from which this event is generated." + }, + { + "name": "MaxTimestampMs", + "type": "int64", + "versions": "0+", + "about": "Maximum timestamp in milli seconds with in this segment." + }, + { + "name": "EventTimestampMs", + "type": "int64", + "versions": "0+", + "about": "Epoch time in milli seconds at which this event is generated." + }, + { + "name": "SegmentLeaderEpochs", + "type": "[]SegmentLeaderEpochEntry", + "versions": "0+", + "about": "Leader epoch to start-offset mappings for the records with in this segment.", + "fields": [ + { + "name": "LeaderEpoch", + "type": "int32", + "versions": "0+", + "about": "Leader epoch" + }, + { + "name": "Offset", + "type": "int64", + "versions": "0+", + "about": "Start offset for the leader epoch." + } + ] + }, + { + "name": "SegmentSizeInBytes", + "type": "int32", + "versions": "0+", + "about": "Segment size in bytes." + }, + { + "name": "RemoteLogSegmentState", + "type": "int8", + "versions": "0+", + "about": "State identifier of the remote log segment, which is RemoteLogSegmentState.id()." + } + ] +} \ No newline at end of file diff --git a/storage/bin/main/message/RemoteLogSegmentMetadataSnapshotRecord.json b/storage/bin/main/message/RemoteLogSegmentMetadataSnapshotRecord.json new file mode 100644 index 0000000000000..dbb29139c19c1 --- /dev/null +++ b/storage/bin/main/message/RemoteLogSegmentMetadataSnapshotRecord.json @@ -0,0 +1,92 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +{ + "apiKey": 3, + "type": "metadata", + "name": "RemoteLogSegmentMetadataSnapshotRecord", + "validVersions": "0", + "flexibleVersions": "0+", + "fields": [ + { + "name": "SegmentId", + "type": "uuid", + "versions": "0+", + "about": "Unique identifier of the log segment" + }, + { + "name": "StartOffset", + "type": "int64", + "versions": "0+", + "about": "Start offset of the segment." + }, + { + "name": "EndOffset", + "type": "int64", + "versions": "0+", + "about": "End offset of the segment." + }, + { + "name": "BrokerId", + "type": "int32", + "versions": "0+", + "about": "Broker (controller or leader) id from which this event is created or updated." + }, + { + "name": "MaxTimestampMs", + "type": "int64", + "versions": "0+", + "about": "Maximum timestamp with in this segment." + }, + { + "name": "EventTimestampMs", + "type": "int64", + "versions": "0+", + "about": "Event timestamp of this segment." + }, + { + "name": "SegmentLeaderEpochs", + "type": "[]SegmentLeaderEpochEntry", + "versions": "0+", + "about": "Leader epochs of this segment.", + "fields": [ + { + "name": "LeaderEpoch", + "type": "int32", + "versions": "0+", + "about": "Leader epoch" + }, + { + "name": "Offset", + "type": "int64", + "versions": "0+", + "about": "Start offset for the leader epoch" + } + ] + }, + { + "name": "SegmentSizeInBytes", + "type": "int32", + "versions": "0+", + "about": "Segment size in bytes" + }, + { + "name": "RemoteLogSegmentState", + "type": "int8", + "versions": "0+", + "about": "State of the remote log segment" + } + ] +} \ No newline at end of file diff --git a/storage/bin/main/message/RemoteLogSegmentMetadataUpdateRecord.json b/storage/bin/main/message/RemoteLogSegmentMetadataUpdateRecord.json new file mode 100644 index 0000000000000..24003dcbce849 --- /dev/null +++ b/storage/bin/main/message/RemoteLogSegmentMetadataUpdateRecord.json @@ -0,0 +1,82 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +{ + "apiKey": 1, + "type": "metadata", + "name": "RemoteLogSegmentMetadataUpdateRecord", + "validVersions": "0", + "flexibleVersions": "0+", + "fields": [ + { + "name": "RemoteLogSegmentId", + "type": "RemoteLogSegmentIdEntry", + "versions": "0+", + "about": "Unique representation of the remote log segment.", + "fields": [ + { + "name": "TopicIdPartition", + "type": "TopicIdPartitionEntry", + "versions": "0+", + "about": "Represents unique topic partition.", + "fields": [ + { + "name": "Name", + "type": "string", + "versions": "0+", + "about": "Topic name." + }, + { + "name": "Id", + "type": "uuid", + "versions": "0+", + "about": "Unique identifier of the topic." + }, + { + "name": "Partition", + "type": "int32", + "versions": "0+", + "about": "Partition number." + } + ] + }, + { + "name": "Id", + "type": "uuid", + "versions": "0+", + "about": "Unique identifier of the remote log segment." + } + ] + }, + { + "name": "BrokerId", + "type": "int32", + "versions": "0+", + "about": "Broker id from which this event is generated." + }, + { + "name": "EventTimestampMs", + "type": "int64", + "versions": "0+", + "about": "Epoch time in milli seconds at which this event is generated." + }, + { + "name": "RemoteLogSegmentState", + "type": "int8", + "versions": "0+", + "about": "State identifier of the remote log segment, which is RemoteLogSegmentState.id()." + } + ] +} \ No newline at end of file diff --git a/storage/bin/main/message/RemotePartitionDeleteMetadataRecord.json b/storage/bin/main/message/RemotePartitionDeleteMetadataRecord.json new file mode 100644 index 0000000000000..f5e955bceaca8 --- /dev/null +++ b/storage/bin/main/message/RemotePartitionDeleteMetadataRecord.json @@ -0,0 +1,68 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +{ + "apiKey": 2, + "type": "metadata", + "name": "RemotePartitionDeleteMetadataRecord", + "validVersions": "0", + "flexibleVersions": "0+", + "fields": [ + { + "name": "TopicIdPartition", + "type": "TopicIdPartitionEntry", + "versions": "0+", + "about": "Represents unique topic partition.", + "fields": [ + { + "name": "Name", + "type": "string", + "versions": "0+", + "about": "Topic name." + }, + { + "name": "Id", + "type": "uuid", + "versions": "0+", + "about": "Unique identifier of the topic." + }, + { + "name": "Partition", + "type": "int32", + "versions": "0+", + "about": "Partition number." + } + ] + }, + { + "name": "BrokerId", + "type": "int32", + "versions": "0+", + "about": "Broker (controller or leader) id from which this event is created. DELETE_PARTITION_MARKED is sent by the controller. DELETE_PARTITION_STARTED and DELETE_PARTITION_FINISHED are sent by remote log metadata topic partition leader." + }, + { + "name": "EventTimestampMs", + "type": "int64", + "versions": "0+", + "about": "Epoch time in milli seconds at which this event is generated." + }, + { + "name": "RemotePartitionDeleteState", + "type": "int8", + "versions": "0+", + "about": "Deletion state identifier of the remote partition, which is RemotePartitionDeleteState.id()." + } + ] +} \ No newline at end of file diff --git a/storage/bin/test/log4j.properties b/storage/bin/test/log4j.properties new file mode 100644 index 0000000000000..113e15e22f633 --- /dev/null +++ b/storage/bin/test/log4j.properties @@ -0,0 +1,22 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +log4j.rootLogger=OFF, stdout + +log4j.appender.stdout=org.apache.log4j.ConsoleAppender +log4j.appender.stdout.layout=org.apache.log4j.PatternLayout +log4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c:%L)%n + +log4j.logger.org.apache.kafka.server.log.remote.storage=INFO +log4j.logger.org.apache.kafka.server.log.remote.metadata.storage=INFO diff --git a/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/FunctionsCompatConversions.scala b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/FunctionsCompatConversions.scala new file mode 100644 index 0000000000000..c3c6403dd39a0 --- /dev/null +++ b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/FunctionsCompatConversions.scala @@ -0,0 +1,128 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.streams.scala + +import org.apache.kafka.streams.KeyValue +import org.apache.kafka.streams.kstream._ +import scala.jdk.CollectionConverters._ +import java.lang.{Iterable => JIterable} + +import org.apache.kafka.streams.processor.ProcessorContext + +/** + * Implicit classes that offer conversions of Scala function literals to + * SAM (Single Abstract Method) objects in Java. These make the Scala APIs much + * more expressive, with less boilerplate and more succinct. + */ +private[scala] object FunctionsCompatConversions { + + implicit class ForeachActionFromFunction[K, V](val p: (K, V) => Unit) extends AnyVal { + def asForeachAction: ForeachAction[K, V] = (key: K, value: V) => p(key, value) + } + + implicit class PredicateFromFunction[K, V](val p: (K, V) => Boolean) extends AnyVal { + def asPredicate: Predicate[K, V] = (key: K, value: V) => p(key, value) + } + + implicit class MapperFromFunction[T, U, VR](val f: (T, U) => VR) extends AnyVal { + def asKeyValueMapper: KeyValueMapper[T, U, VR] = (key: T, value: U) => f(key, value) + def asValueJoiner: ValueJoiner[T, U, VR] = (value1: T, value2: U) => f(value1, value2) + } + + implicit class KeyValueMapperFromFunction[K, V, KR, VR](val f: (K, V) => (KR, VR)) extends AnyVal { + def asKeyValueMapper: KeyValueMapper[K, V, KeyValue[KR, VR]] = (key: K, value: V) => { + val (kr, vr) = f(key, value) + KeyValue.pair(kr, vr) + } + } + + implicit class FunctionFromFunction[V, VR](val f: V => VR) extends AnyVal { + def asJavaFunction: java.util.function.Function[V, VR] = (value: V) => f(value) + } + + implicit class ValueMapperFromFunction[V, VR](val f: V => VR) extends AnyVal { + def asValueMapper: ValueMapper[V, VR] = (value: V) => f(value) + } + + implicit class FlatValueMapperFromFunction[V, VR](val f: V => Iterable[VR]) extends AnyVal { + def asValueMapper: ValueMapper[V, JIterable[VR]] = (value: V) => f(value).asJava + } + + implicit class ValueMapperWithKeyFromFunction[K, V, VR](val f: (K, V) => VR) extends AnyVal { + def asValueMapperWithKey: ValueMapperWithKey[K, V, VR] = (readOnlyKey: K, value: V) => f(readOnlyKey, value) + } + + implicit class FlatValueMapperWithKeyFromFunction[K, V, VR](val f: (K, V) => Iterable[VR]) extends AnyVal { + def asValueMapperWithKey: ValueMapperWithKey[K, V, JIterable[VR]] = + (readOnlyKey: K, value: V) => f(readOnlyKey, value).asJava + } + + implicit class AggregatorFromFunction[K, V, VA](val f: (K, V, VA) => VA) extends AnyVal { + def asAggregator: Aggregator[K, V, VA] = (key: K, value: V, aggregate: VA) => f(key, value, aggregate) + } + + implicit class MergerFromFunction[K, VR](val f: (K, VR, VR) => VR) extends AnyVal { + def asMerger: Merger[K, VR] = (aggKey: K, aggOne: VR, aggTwo: VR) => f(aggKey, aggOne, aggTwo) + } + + implicit class ReducerFromFunction[V](val f: (V, V) => V) extends AnyVal { + def asReducer: Reducer[V] = (value1: V, value2: V) => f(value1, value2) + } + + implicit class InitializerFromFunction[VA](val f: () => VA) extends AnyVal { + def asInitializer: Initializer[VA] = () => f() + } + + implicit class TransformerSupplierFromFunction[K, V, VO](val f: () => Transformer[K, V, VO]) extends AnyVal { + def asTransformerSupplier: TransformerSupplier[K, V, VO] = () => f() + } + + implicit class TransformerSupplierAsJava[K, V, VO](val supplier: TransformerSupplier[K, V, Iterable[VO]]) + extends AnyVal { + def asJava: TransformerSupplier[K, V, JIterable[VO]] = () => { + val innerTransformer = supplier.get() + new Transformer[K, V, JIterable[VO]] { + override def transform(key: K, value: V): JIterable[VO] = innerTransformer.transform(key, value).asJava + override def init(context: ProcessorContext): Unit = innerTransformer.init(context) + override def close(): Unit = innerTransformer.close() + } + } + } + implicit class ValueTransformerSupplierAsJava[V, VO](val supplier: ValueTransformerSupplier[V, Iterable[VO]]) + extends AnyVal { + def asJava: ValueTransformerSupplier[V, JIterable[VO]] = () => { + val innerTransformer = supplier.get() + new ValueTransformer[V, JIterable[VO]] { + override def transform(value: V): JIterable[VO] = innerTransformer.transform(value).asJava + override def init(context: ProcessorContext): Unit = innerTransformer.init(context) + override def close(): Unit = innerTransformer.close() + } + } + } + implicit class ValueTransformerSupplierWithKeyAsJava[K, V, VO]( + val supplier: ValueTransformerWithKeySupplier[K, V, Iterable[VO]] + ) extends AnyVal { + def asJava: ValueTransformerWithKeySupplier[K, V, JIterable[VO]] = () => { + val innerTransformer = supplier.get() + new ValueTransformerWithKey[K, V, JIterable[VO]] { + override def transform(key: K, value: V): JIterable[VO] = innerTransformer.transform(key, value).asJava + override def init(context: ProcessorContext): Unit = innerTransformer.init(context) + override def close(): Unit = innerTransformer.close() + } + } + } +} diff --git a/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/ImplicitConversions.scala b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/ImplicitConversions.scala new file mode 100644 index 0000000000000..5f7064be14eb0 --- /dev/null +++ b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/ImplicitConversions.scala @@ -0,0 +1,108 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.streams.scala + +import org.apache.kafka.common.serialization.Serde +import org.apache.kafka.streams.KeyValue +import org.apache.kafka.streams.kstream.{ + KStream => KStreamJ, + KGroupedStream => KGroupedStreamJ, + TimeWindowedKStream => TimeWindowedKStreamJ, + SessionWindowedKStream => SessionWindowedKStreamJ, + CogroupedKStream => CogroupedKStreamJ, + TimeWindowedCogroupedKStream => TimeWindowedCogroupedKStreamJ, + SessionWindowedCogroupedKStream => SessionWindowedCogroupedKStreamJ, + KTable => KTableJ, + KGroupedTable => KGroupedTableJ +} +import org.apache.kafka.streams.processor.StateStore +import org.apache.kafka.streams.scala.kstream._ + +/** + * Implicit conversions between the Scala wrapper objects and the underlying Java + * objects. + */ +object ImplicitConversions { + + implicit def wrapKStream[K, V](inner: KStreamJ[K, V]): KStream[K, V] = + new KStream[K, V](inner) + + implicit def wrapKGroupedStream[K, V](inner: KGroupedStreamJ[K, V]): KGroupedStream[K, V] = + new KGroupedStream[K, V](inner) + + implicit def wrapTimeWindowedKStream[K, V](inner: TimeWindowedKStreamJ[K, V]): TimeWindowedKStream[K, V] = + new TimeWindowedKStream[K, V](inner) + + implicit def wrapSessionWindowedKStream[K, V](inner: SessionWindowedKStreamJ[K, V]): SessionWindowedKStream[K, V] = + new SessionWindowedKStream[K, V](inner) + + implicit def wrapCogroupedKStream[K, V](inner: CogroupedKStreamJ[K, V]): CogroupedKStream[K, V] = + new CogroupedKStream[K, V](inner) + + implicit def wrapTimeWindowedCogroupedKStream[K, V]( + inner: TimeWindowedCogroupedKStreamJ[K, V] + ): TimeWindowedCogroupedKStream[K, V] = + new TimeWindowedCogroupedKStream[K, V](inner) + + implicit def wrapSessionWindowedCogroupedKStream[K, V]( + inner: SessionWindowedCogroupedKStreamJ[K, V] + ): SessionWindowedCogroupedKStream[K, V] = + new SessionWindowedCogroupedKStream[K, V](inner) + + implicit def wrapKTable[K, V](inner: KTableJ[K, V]): KTable[K, V] = + new KTable[K, V](inner) + + implicit def wrapKGroupedTable[K, V](inner: KGroupedTableJ[K, V]): KGroupedTable[K, V] = + new KGroupedTable[K, V](inner) + + implicit def tuple2ToKeyValue[K, V](tuple: (K, V)): KeyValue[K, V] = new KeyValue(tuple._1, tuple._2) + + // we would also like to allow users implicit serdes + // and these implicits will convert them to `Grouped`, `Produced` or `Consumed` + + implicit def consumedFromSerde[K, V](implicit keySerde: Serde[K], valueSerde: Serde[V]): Consumed[K, V] = + Consumed.`with`[K, V] + + implicit def groupedFromSerde[K, V](implicit keySerde: Serde[K], valueSerde: Serde[V]): Grouped[K, V] = + Grouped.`with`[K, V] + + implicit def joinedFromKeyValueOtherSerde[K, V, VO](implicit + keySerde: Serde[K], + valueSerde: Serde[V], + otherValueSerde: Serde[VO] + ): Joined[K, V, VO] = + Joined.`with`[K, V, VO] + + implicit def materializedFromSerde[K, V, S <: StateStore](implicit + keySerde: Serde[K], + valueSerde: Serde[V] + ): Materialized[K, V, S] = + Materialized.`with`[K, V, S] + + implicit def producedFromSerde[K, V](implicit keySerde: Serde[K], valueSerde: Serde[V]): Produced[K, V] = + Produced.`with`[K, V] + + implicit def repartitionedFromSerde[K, V](implicit keySerde: Serde[K], valueSerde: Serde[V]): Repartitioned[K, V] = + Repartitioned.`with`[K, V] + + implicit def streamJoinFromKeyValueOtherSerde[K, V, VO](implicit + keySerde: Serde[K], + valueSerde: Serde[V], + otherValueSerde: Serde[VO] + ): StreamJoined[K, V, VO] = + StreamJoined.`with`[K, V, VO] +} diff --git a/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/Serdes.scala b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/Serdes.scala new file mode 100644 index 0000000000000..2e42090d13deb --- /dev/null +++ b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/Serdes.scala @@ -0,0 +1,77 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.streams.scala + +import java.util + +import org.apache.kafka.common.serialization.{Deserializer, Serde, Serdes => JSerdes, Serializer} +import org.apache.kafka.streams.kstream.WindowedSerdes + +@deprecated( + "Use org.apache.kafka.streams.scala.serialization.Serdes. For WindowedSerdes.TimeWindowedSerde, use explicit constructors.", + "2.7.0" +) +object Serdes { + implicit def String: Serde[String] = JSerdes.String() + implicit def Long: Serde[Long] = JSerdes.Long().asInstanceOf[Serde[Long]] + implicit def JavaLong: Serde[java.lang.Long] = JSerdes.Long() + implicit def ByteArray: Serde[Array[Byte]] = JSerdes.ByteArray() + implicit def Bytes: Serde[org.apache.kafka.common.utils.Bytes] = JSerdes.Bytes() + implicit def Float: Serde[Float] = JSerdes.Float().asInstanceOf[Serde[Float]] + implicit def JavaFloat: Serde[java.lang.Float] = JSerdes.Float() + implicit def Double: Serde[Double] = JSerdes.Double().asInstanceOf[Serde[Double]] + implicit def JavaDouble: Serde[java.lang.Double] = JSerdes.Double() + implicit def Integer: Serde[Int] = JSerdes.Integer().asInstanceOf[Serde[Int]] + implicit def JavaInteger: Serde[java.lang.Integer] = JSerdes.Integer() + + implicit def timeWindowedSerde[T](implicit tSerde: Serde[T]): WindowedSerdes.TimeWindowedSerde[T] = + new WindowedSerdes.TimeWindowedSerde[T](tSerde) + + implicit def sessionWindowedSerde[T](implicit tSerde: Serde[T]): WindowedSerdes.SessionWindowedSerde[T] = + new WindowedSerdes.SessionWindowedSerde[T](tSerde) + + def fromFn[T >: Null](serializer: T => Array[Byte], deserializer: Array[Byte] => Option[T]): Serde[T] = + JSerdes.serdeFrom( + new Serializer[T] { + override def serialize(topic: String, data: T): Array[Byte] = serializer(data) + override def configure(configs: util.Map[String, _], isKey: Boolean): Unit = () + override def close(): Unit = () + }, + new Deserializer[T] { + override def deserialize(topic: String, data: Array[Byte]): T = deserializer(data).orNull + override def configure(configs: util.Map[String, _], isKey: Boolean): Unit = () + override def close(): Unit = () + } + ) + + def fromFn[T >: Null]( + serializer: (String, T) => Array[Byte], + deserializer: (String, Array[Byte]) => Option[T] + ): Serde[T] = + JSerdes.serdeFrom( + new Serializer[T] { + override def serialize(topic: String, data: T): Array[Byte] = serializer(topic, data) + override def configure(configs: util.Map[String, _], isKey: Boolean): Unit = () + override def close(): Unit = () + }, + new Deserializer[T] { + override def deserialize(topic: String, data: Array[Byte]): T = deserializer(topic, data).orNull + override def configure(configs: util.Map[String, _], isKey: Boolean): Unit = () + override def close(): Unit = () + } + ) +} diff --git a/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/StreamsBuilder.scala b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/StreamsBuilder.scala new file mode 100644 index 0000000000000..9430a511f71a4 --- /dev/null +++ b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/StreamsBuilder.scala @@ -0,0 +1,216 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.streams.scala + +import java.util.Properties +import java.util.regex.Pattern + +import org.apache.kafka.streams.kstream.GlobalKTable +import org.apache.kafka.streams.processor.{ProcessorSupplier, StateStore} +import org.apache.kafka.streams.state.StoreBuilder +import org.apache.kafka.streams.{StreamsBuilder => StreamsBuilderJ, Topology} +import org.apache.kafka.streams.scala.kstream.{Consumed, KStream, KTable, Materialized} + +import scala.jdk.CollectionConverters._ + +/** + * Wraps the Java class StreamsBuilder and delegates method calls to the underlying Java object. + */ +class StreamsBuilder(inner: StreamsBuilderJ = new StreamsBuilderJ) { + + /** + * Create a [[kstream.KStream]] from the specified topic. + *

+ * The `implicit Consumed` instance provides the values of `auto.offset.reset` strategy, `TimestampExtractor`, + * key and value deserializers etc. If the implicit is not found in scope, compiler error will result. + *

+ * A convenient alternative is to have the necessary implicit serdes in scope, which will be implicitly + * converted to generate an instance of `Consumed`. @see [[ImplicitConversions]]. + * {{{ + * // Brings all implicit conversions in scope + * import ImplicitConversions._ + * + * // Bring implicit default serdes in scope + * import Serdes._ + * + * val builder = new StreamsBuilder() + * + * // stream function gets the implicit Consumed which is constructed automatically + * // from the serdes through the implicits in ImplicitConversions#consumedFromSerde + * val userClicksStream: KStream[String, Long] = builder.stream(userClicksTopic) + * }}} + * + * @param topic the topic name + * @return a [[kstream.KStream]] for the specified topic + */ + def stream[K, V](topic: String)(implicit consumed: Consumed[K, V]): KStream[K, V] = + new KStream(inner.stream[K, V](topic, consumed)) + + /** + * Create a [[kstream.KStream]] from the specified topics. + * + * @param topics the topic names + * @return a [[kstream.KStream]] for the specified topics + * @see #stream(String) + * @see `org.apache.kafka.streams.StreamsBuilder#stream` + */ + def stream[K, V](topics: Set[String])(implicit consumed: Consumed[K, V]): KStream[K, V] = + new KStream(inner.stream[K, V](topics.asJava, consumed)) + + /** + * Create a [[kstream.KStream]] from the specified topic pattern. + * + * @param topicPattern the topic name pattern + * @return a [[kstream.KStream]] for the specified topics + * @see #stream(String) + * @see `org.apache.kafka.streams.StreamsBuilder#stream` + */ + def stream[K, V](topicPattern: Pattern)(implicit consumed: Consumed[K, V]): KStream[K, V] = + new KStream(inner.stream[K, V](topicPattern, consumed)) + + /** + * Create a [[kstream.KTable]] from the specified topic. + *

+ * The `implicit Consumed` instance provides the values of `auto.offset.reset` strategy, `TimestampExtractor`, + * key and value deserializers etc. If the implicit is not found in scope, compiler error will result. + *

+ * A convenient alternative is to have the necessary implicit serdes in scope, which will be implicitly + * converted to generate an instance of `Consumed`. @see [[ImplicitConversions]]. + * {{{ + * // Brings all implicit conversions in scope + * import ImplicitConversions._ + * + * // Bring implicit default serdes in scope + * import Serdes._ + * + * val builder = new StreamsBuilder() + * + * // stream function gets the implicit Consumed which is constructed automatically + * // from the serdes through the implicits in ImplicitConversions#consumedFromSerde + * val userClicksStream: KTable[String, Long] = builder.table(userClicksTopic) + * }}} + * + * @param topic the topic name + * @return a [[kstream.KTable]] for the specified topic + * @see `org.apache.kafka.streams.StreamsBuilder#table` + */ + def table[K, V](topic: String)(implicit consumed: Consumed[K, V]): KTable[K, V] = + new KTable(inner.table[K, V](topic, consumed)) + + /** + * Create a [[kstream.KTable]] from the specified topic. + * + * @param topic the topic name + * @param materialized the instance of `Materialized` used to materialize a state store + * @return a [[kstream.KTable]] for the specified topic + * @see #table(String) + * @see `org.apache.kafka.streams.StreamsBuilder#table` + */ + def table[K, V](topic: String, materialized: Materialized[K, V, ByteArrayKeyValueStore])(implicit + consumed: Consumed[K, V] + ): KTable[K, V] = + new KTable(inner.table[K, V](topic, consumed, materialized)) + + /** + * Create a `GlobalKTable` from the specified topic. The serializers from the implicit `Consumed` + * instance will be used. Input records with `null` key will be dropped. + * + * @param topic the topic name + * @return a `GlobalKTable` for the specified topic + * @see `org.apache.kafka.streams.StreamsBuilder#globalTable` + */ + def globalTable[K, V](topic: String)(implicit consumed: Consumed[K, V]): GlobalKTable[K, V] = + inner.globalTable(topic, consumed) + + /** + * Create a `GlobalKTable` from the specified topic. The resulting `GlobalKTable` will be materialized + * in a local `KeyValueStore` configured with the provided instance of `Materialized`. The serializers + * from the implicit `Consumed` instance will be used. + * + * @param topic the topic name + * @param materialized the instance of `Materialized` used to materialize a state store + * @return a `GlobalKTable` for the specified topic + * @see `org.apache.kafka.streams.StreamsBuilder#globalTable` + */ + def globalTable[K, V](topic: String, materialized: Materialized[K, V, ByteArrayKeyValueStore])(implicit + consumed: Consumed[K, V] + ): GlobalKTable[K, V] = + inner.globalTable(topic, consumed, materialized) + + /** + * Adds a state store to the underlying `Topology`. The store must still be "connected" to a `Processor`, + * `Transformer`, or `ValueTransformer` before it can be used. + *

+ * It is required to connect state stores to `Processor`, `Transformer`, or `ValueTransformer` before they can be used. + * + * @param builder the builder used to obtain this state store `StateStore` instance + * @return the underlying Java abstraction `StreamsBuilder` after adding the `StateStore` + * @throws org.apache.kafka.streams.errors.TopologyException if state store supplier is already added + * @see `org.apache.kafka.streams.StreamsBuilder#addStateStore` + */ + def addStateStore(builder: StoreBuilder[_ <: StateStore]): StreamsBuilderJ = inner.addStateStore(builder) + + /** + * Adds a global `StateStore` to the topology. Global stores should not be added to `Processor`, `Transformer`, + * or `ValueTransformer` (in contrast to regular stores). + *

+ * It is not required to connect a global store to `Processor`, `Transformer`, or `ValueTransformer`; + * those have read-only access to all global stores by default. + * + * @see `org.apache.kafka.streams.StreamsBuilder#addGlobalStore` + */ + @deprecated( + "Use #addGlobalStore(StoreBuilder, String, Consumed, org.apache.kafka.streams.processor.api.ProcessorSupplier) instead.", + "2.7.0" + ) + def addGlobalStore[K, V]( + storeBuilder: StoreBuilder[_ <: StateStore], + topic: String, + consumed: Consumed[K, V], + stateUpdateSupplier: ProcessorSupplier[K, V] + ): StreamsBuilderJ = + inner.addGlobalStore(storeBuilder, topic, consumed, stateUpdateSupplier) + + /** + * Adds a global `StateStore` to the topology. Global stores should not be added to `Processor`, `Transformer`, + * or `ValueTransformer` (in contrast to regular stores). + *

+ * It is not required to connect a global store to `Processor`, `Transformer`, or `ValueTransformer`; + * those have read-only access to all global stores by default. + * + * @see `org.apache.kafka.streams.StreamsBuilder#addGlobalStore` + */ + def addGlobalStore[K, V]( + storeBuilder: StoreBuilder[_ <: StateStore], + topic: String, + consumed: Consumed[K, V], + stateUpdateSupplier: org.apache.kafka.streams.processor.api.ProcessorSupplier[K, V, Void, Void] + ): StreamsBuilderJ = + inner.addGlobalStore(storeBuilder, topic, consumed, stateUpdateSupplier) + + def build(): Topology = inner.build() + + /** + * Returns the `Topology` that represents the specified processing logic and accepts + * a `Properties` instance used to indicate whether to optimize topology or not. + * + * @param props the `Properties` used for building possibly optimized topology + * @return the `Topology` that represents the specified processing logic + * @see `org.apache.kafka.streams.StreamsBuilder#build` + */ + def build(props: Properties): Topology = inner.build(props) +} diff --git a/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/Branched.scala b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/Branched.scala new file mode 100644 index 0000000000000..6ac13710d4b70 --- /dev/null +++ b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/Branched.scala @@ -0,0 +1,67 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.streams.scala.kstream + +import org.apache.kafka.streams.kstream.{Branched => BranchedJ, KStream => KStreamJ} + +object Branched { + + /** + * Create an instance of `Branched` with provided branch name suffix. + * + * @param name the branch name suffix to be used (see [[BranchedKStream]] description for details) + * @tparam K key type + * @tparam V value type + * @return a new instance of `Branched` + */ + def as[K, V](name: String): BranchedJ[K, V] = + BranchedJ.as[K, V](name) + + /** + * Create an instance of `Branched` with provided chain function and branch name suffix. + * + * @param chain A function that will be applied to the branch. If the provided function returns + * `null`, its result is ignored, otherwise it is added to the Map returned + * by [[BranchedKStream.defaultBranch]] or [[BranchedKStream.noDefaultBranch]] (see + * [[BranchedKStream]] description for details). + * @param name the branch name suffix to be used. If `null`, a default branch name suffix will be generated + * (see [[BranchedKStream]] description for details) + * @tparam K key type + * @tparam V value type + * @return a new instance of `Branched` + * @see `org.apache.kafka.streams.kstream.Branched#withFunction(java.util.function.Function, java.lang.String)` + */ + def withFunction[K, V](chain: KStream[K, V] => KStream[K, V], name: String = null): BranchedJ[K, V] = + BranchedJ.withFunction((f: KStreamJ[K, V]) => chain.apply(new KStream[K, V](f)).inner, name) + + /** + * Create an instance of `Branched` with provided chain consumer and branch name suffix. + * + * @param chain A consumer to which the branch will be sent. If a non-null consumer is provided here, + * the respective branch will not be added to the resulting Map returned + * by [[BranchedKStream.defaultBranch]] or [[BranchedKStream.noDefaultBranch]] (see + * [[BranchedKStream]] description for details). + * @param name the branch name suffix to be used. If `null`, a default branch name suffix will be generated + * (see [[BranchedKStream]] description for details) + * @tparam K key type + * @tparam V value type + * @return a new instance of `Branched` + * @see `org.apache.kafka.streams.kstream.Branched#withConsumer(java.util.function.Consumer, java.lang.String)` + */ + def withConsumer[K, V](chain: KStream[K, V] => Unit, name: String = null): BranchedJ[K, V] = + BranchedJ.withConsumer((c: KStreamJ[K, V]) => chain.apply(new KStream[K, V](c)), name) +} diff --git a/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/BranchedKStream.scala b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/BranchedKStream.scala new file mode 100644 index 0000000000000..c606c0096898c --- /dev/null +++ b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/BranchedKStream.scala @@ -0,0 +1,116 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.streams.scala.kstream + +import java.util + +import org.apache.kafka.streams.kstream +import org.apache.kafka.streams.kstream.{BranchedKStream => BranchedKStreamJ} +import org.apache.kafka.streams.scala.FunctionsCompatConversions.PredicateFromFunction + +import scala.jdk.CollectionConverters._ + +/** + * Branches the records in the original stream based on the predicates supplied for the branch definitions. + *

+ * Branches are defined with [[branch]] or [[defaultBranch]] methods. Each record is evaluated against the predicates + * supplied via [[Branched]] parameters, and is routed to the first branch for which its respective predicate + * evaluates to `true`. If a record does not match any predicates, it will be routed to the default branch, + * or dropped if no default branch is created. + *

+ * + * Each branch (which is a [[KStream]] instance) then can be processed either by + * a function or a consumer provided via a [[Branched]] + * parameter. If certain conditions are met, it also can be accessed from the `Map` returned by + * an optional [[defaultBranch]] or [[noDefaultBranch]] method call. + *

+ * The branching happens on a first match basis: A record in the original stream is assigned to the corresponding result + * stream for the first predicate that evaluates to true, and is assigned to this stream only. If you need + * to route a record to multiple streams, you can apply multiple [[KStream.filter]] operators to the same [[KStream]] + * instance, one for each predicate, instead of branching. + *

+ * The process of routing the records to different branches is a stateless record-by-record operation. + * + * @tparam K Type of keys + * @tparam V Type of values + */ +class BranchedKStream[K, V](val inner: BranchedKStreamJ[K, V]) { + + /** + * Define a branch for records that match the predicate. + * + * @param predicate A predicate against which each record will be evaluated. + * If this predicate returns `true` for a given record, the record will be + * routed to the current branch and will not be evaluated against the predicates + * for the remaining branches. + * @return `this` to facilitate method chaining + */ + def branch(predicate: (K, V) => Boolean): BranchedKStream[K, V] = { + inner.branch(predicate.asPredicate) + this + } + + /** + * Define a branch for records that match the predicate. + * + * @param predicate A predicate against which each record will be evaluated. + * If this predicate returns `true` for a given record, the record will be + * routed to the current branch and will not be evaluated against the predicates + * for the remaining branches. + * @param branched A [[Branched]] parameter, that allows to define a branch name, an in-place + * branch consumer or branch mapper (see code examples + * for [[BranchedKStream]]) + * @return `this` to facilitate method chaining + */ + def branch(predicate: (K, V) => Boolean, branched: Branched[K, V]): BranchedKStream[K, V] = { + inner.branch(predicate.asPredicate, branched) + this + } + + /** + * Finalize the construction of branches and defines the default branch for the messages not intercepted + * by other branches. Calling [[defaultBranch]] or [[noDefaultBranch]] is optional. + * + * @return Map of named branches. For rules of forming the resulting map, see [[BranchedKStream]] + * description. + */ + def defaultBranch(): Map[String, KStream[K, V]] = toScalaMap(inner.defaultBranch()) + + /** + * Finalize the construction of branches and defines the default branch for the messages not intercepted + * by other branches. Calling [[defaultBranch]] or [[noDefaultBranch]] is optional. + * + * @param branched A [[Branched]] parameter, that allows to define a branch name, an in-place + * branch consumer or branch mapper for [[BranchedKStream]]. + * @return Map of named branches. For rules of forming the resulting map, see [[BranchedKStream]] + * description. + */ + def defaultBranch(branched: Branched[K, V]): Map[String, KStream[K, V]] = toScalaMap(inner.defaultBranch(branched)) + + /** + * Finalizes the construction of branches without forming a default branch. + * + * @return Map of named branches. For rules of forming the resulting map, see [[BranchedKStream]] + * description. + */ + def noDefaultBranch(): Map[String, KStream[K, V]] = toScalaMap(inner.noDefaultBranch()) + + private def toScalaMap(m: util.Map[String, kstream.KStream[K, V]]): collection.immutable.Map[String, KStream[K, V]] = + m.asScala.map { case (name, kStreamJ) => + (name, new KStream(kStreamJ)) + }.toMap +} diff --git a/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/CogroupedKStream.scala b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/CogroupedKStream.scala new file mode 100644 index 0000000000000..2bf58ca0e5670 --- /dev/null +++ b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/CogroupedKStream.scala @@ -0,0 +1,113 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.streams.scala +package kstream + +import org.apache.kafka.streams.kstream.{ + SessionWindows, + SlidingWindows, + Window, + Windows, + CogroupedKStream => CogroupedKStreamJ +} +import org.apache.kafka.streams.scala.FunctionsCompatConversions.{AggregatorFromFunction, InitializerFromFunction} + +/** + * Wraps the Java class CogroupedKStream and delegates method calls to the underlying Java object. + * + * @tparam KIn Type of keys + * @tparam VOut Type of values + * @param inner The underlying Java abstraction for CogroupedKStream + * @see `org.apache.kafka.streams.kstream.CogroupedKStream` + */ +class CogroupedKStream[KIn, VOut](val inner: CogroupedKStreamJ[KIn, VOut]) { + + /** + * Add an already [[KGroupedStream]] to this [[CogroupedKStream]]. + * + * @param groupedStream a group stream + * @param aggregator a function that computes a new aggregate result + * @return a [[CogroupedKStream]] + */ + def cogroup[VIn]( + groupedStream: KGroupedStream[KIn, VIn], + aggregator: (KIn, VIn, VOut) => VOut + ): CogroupedKStream[KIn, VOut] = + new CogroupedKStream(inner.cogroup(groupedStream.inner, aggregator.asAggregator)) + + /** + * Aggregate the values of records in these streams by the grouped key and defined window. + * + * @param initializer an `Initializer` that computes an initial intermediate aggregation result. + * Cannot be { @code null}. + * @param materialized an instance of `Materialized` used to materialize a state store. + * Cannot be { @code null}. + * @return a [[KTable]] that contains "update" records with unmodified keys, and values that represent the latest + * (rolling) aggregate for each key + * @see `org.apache.kafka.streams.kstream.CogroupedKStream#aggregate` + */ + def aggregate(initializer: => VOut)(implicit + materialized: Materialized[KIn, VOut, ByteArrayKeyValueStore] + ): KTable[KIn, VOut] = new KTable(inner.aggregate((() => initializer).asInitializer, materialized)) + + /** + * Aggregate the values of records in these streams by the grouped key and defined window. + * + * @param initializer an `Initializer` that computes an initial intermediate aggregation result. + * Cannot be { @code null}. + * @param named a [[Named]] config used to name the processor in the topology + * @param materialized an instance of `Materialized` used to materialize a state store. + * Cannot be { @code null}. + * @return a [[KTable]] that contains "update" records with unmodified keys, and values that represent the latest + * (rolling) aggregate for each key + * @see `org.apache.kafka.streams.kstream.CogroupedKStream#aggregate` + */ + def aggregate(initializer: => VOut, named: Named)(implicit + materialized: Materialized[KIn, VOut, ByteArrayKeyValueStore] + ): KTable[KIn, VOut] = new KTable(inner.aggregate((() => initializer).asInitializer, named, materialized)) + + /** + * Create a new [[TimeWindowedCogroupedKStream]] instance that can be used to perform windowed aggregations. + * + * @param windows the specification of the aggregation `Windows` + * @return an instance of [[TimeWindowedCogroupedKStream]] + * @see `org.apache.kafka.streams.kstream.CogroupedKStream#windowedBy` + */ + def windowedBy[W <: Window](windows: Windows[W]): TimeWindowedCogroupedKStream[KIn, VOut] = + new TimeWindowedCogroupedKStream(inner.windowedBy(windows)) + + /** + * Create a new [[TimeWindowedCogroupedKStream]] instance that can be used to perform sliding windowed aggregations. + * + * @param windows the specification of the aggregation `SlidingWindows` + * @return an instance of [[TimeWindowedCogroupedKStream]] + * @see `org.apache.kafka.streams.kstream.CogroupedKStream#windowedBy` + */ + def windowedBy(windows: SlidingWindows): TimeWindowedCogroupedKStream[KIn, VOut] = + new TimeWindowedCogroupedKStream(inner.windowedBy(windows)) + + /** + * Create a new [[SessionWindowedKStream]] instance that can be used to perform session windowed aggregations. + * + * @param windows the specification of the aggregation `SessionWindows` + * @return an instance of [[SessionWindowedKStream]] + * @see `org.apache.kafka.streams.kstream.KGroupedStream#windowedBy` + */ + def windowedBy(windows: SessionWindows): SessionWindowedCogroupedKStream[KIn, VOut] = + new SessionWindowedCogroupedKStream(inner.windowedBy(windows)) + +} diff --git a/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/Consumed.scala b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/Consumed.scala new file mode 100644 index 0000000000000..714df97c17595 --- /dev/null +++ b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/Consumed.scala @@ -0,0 +1,81 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.streams.scala.kstream + +import org.apache.kafka.common.serialization.Serde +import org.apache.kafka.streams.kstream.{Consumed => ConsumedJ} +import org.apache.kafka.streams.Topology +import org.apache.kafka.streams.processor.TimestampExtractor + +object Consumed { + + /** + * Create an instance of [[Consumed]] with the supplied arguments. `null` values are acceptable. + * + * @tparam K key type + * @tparam V value type + * @param timestampExtractor the timestamp extractor to used. If `null` the default timestamp extractor from + * config will be used + * @param resetPolicy the offset reset policy to be used. If `null` the default reset policy from config + * will be used + * @param keySerde the key serde to use. + * @param valueSerde the value serde to use. + * @return a new instance of [[Consumed]] + */ + def `with`[K, V]( + timestampExtractor: TimestampExtractor, + resetPolicy: Topology.AutoOffsetReset + )(implicit keySerde: Serde[K], valueSerde: Serde[V]): ConsumedJ[K, V] = + ConsumedJ.`with`(keySerde, valueSerde, timestampExtractor, resetPolicy) + + /** + * Create an instance of [[Consumed]] with key and value [[Serde]]s. + * + * @tparam K key type + * @tparam V value type + * @return a new instance of [[Consumed]] + */ + def `with`[K, V](implicit keySerde: Serde[K], valueSerde: Serde[V]): ConsumedJ[K, V] = + ConsumedJ.`with`(keySerde, valueSerde) + + /** + * Create an instance of [[Consumed]] with a [[TimestampExtractor]]. + * + * @param timestampExtractor the timestamp extractor to used. If `null` the default timestamp extractor from + * config will be used + * @tparam K key type + * @tparam V value type + * @return a new instance of [[Consumed]] + */ + def `with`[K, V]( + timestampExtractor: TimestampExtractor + )(implicit keySerde: Serde[K], valueSerde: Serde[V]): ConsumedJ[K, V] = + ConsumedJ.`with`(timestampExtractor).withKeySerde(keySerde).withValueSerde(valueSerde) + + /** + * Create an instance of [[Consumed]] with a [[Topology.AutoOffsetReset]]. + * + * @tparam K key type + * @tparam V value type + * @param resetPolicy the offset reset policy to be used. If `null` the default reset policy from config will be used + * @return a new instance of [[Consumed]] + */ + def `with`[K, V]( + resetPolicy: Topology.AutoOffsetReset + )(implicit keySerde: Serde[K], valueSerde: Serde[V]): ConsumedJ[K, V] = + ConsumedJ.`with`(resetPolicy).withKeySerde(keySerde).withValueSerde(valueSerde) +} diff --git a/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/Grouped.scala b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/Grouped.scala new file mode 100644 index 0000000000000..03dde1617cf51 --- /dev/null +++ b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/Grouped.scala @@ -0,0 +1,51 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.streams.scala.kstream + +import org.apache.kafka.common.serialization.Serde +import org.apache.kafka.streams.kstream.{Grouped => GroupedJ} + +object Grouped { + + /** + * Construct a `Grouped` instance with the provided key and value [[Serde]]s. + * If the [[Serde]] params are `null` the default serdes defined in the configs will be used. + * + * @tparam K the key type + * @tparam V the value type + * @param keySerde keySerde that will be used to materialize a stream + * @param valueSerde valueSerde that will be used to materialize a stream + * @return a new instance of [[Grouped]] configured with the provided serdes + */ + def `with`[K, V](implicit keySerde: Serde[K], valueSerde: Serde[V]): GroupedJ[K, V] = + GroupedJ.`with`(keySerde, valueSerde) + + /** + * Construct a `Grouped` instance with the provided key and value [[Serde]]s. + * If the [[Serde]] params are `null` the default serdes defined in the configs will be used. + * + * @tparam K the key type + * @tparam V the value type + * @param name the name used as part of a potential repartition topic + * @param keySerde keySerde that will be used to materialize a stream + * @param valueSerde valueSerde that will be used to materialize a stream + * @return a new instance of [[Grouped]] configured with the provided serdes + */ + def `with`[K, V](name: String)(implicit keySerde: Serde[K], valueSerde: Serde[V]): GroupedJ[K, V] = + GroupedJ.`with`(name, keySerde, valueSerde) + +} diff --git a/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/Joined.scala b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/Joined.scala new file mode 100644 index 0000000000000..c614e1488f8c5 --- /dev/null +++ b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/Joined.scala @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.streams.scala.kstream + +import org.apache.kafka.common.serialization.Serde +import org.apache.kafka.streams.kstream.{Joined => JoinedJ} + +object Joined { + + /** + * Create an instance of [[org.apache.kafka.streams.kstream.Joined]] with key, value, and otherValue [[Serde]] + * instances. + * `null` values are accepted and will be replaced by the default serdes as defined in config. + * + * @tparam K key type + * @tparam V value type + * @tparam VO other value type + * @param keySerde the key serde to use. + * @param valueSerde the value serde to use. + * @param otherValueSerde the otherValue serde to use. If `null` the default value serde from config will be used + * @return new [[org.apache.kafka.streams.kstream.Joined]] instance with the provided serdes + */ + def `with`[K, V, VO](implicit + keySerde: Serde[K], + valueSerde: Serde[V], + otherValueSerde: Serde[VO] + ): JoinedJ[K, V, VO] = + JoinedJ.`with`(keySerde, valueSerde, otherValueSerde) + + /** + * Create an instance of [[org.apache.kafka.streams.kstream.Joined]] with key, value, and otherValue [[Serde]] + * instances. + * `null` values are accepted and will be replaced by the default serdes as defined in config. + * + * @tparam K key type + * @tparam V value type + * @tparam VO other value type + * @param name name of possible repartition topic + * @param keySerde the key serde to use. + * @param valueSerde the value serde to use. + * @param otherValueSerde the otherValue serde to use. If `null` the default value serde from config will be used + * @return new [[org.apache.kafka.streams.kstream.Joined]] instance with the provided serdes + */ + // disable spotless scala, which wants to make a mess of the argument lists + // format: off + def `with`[K, V, VO](name: String) + (implicit keySerde: Serde[K], + valueSerde: Serde[V], + otherValueSerde: Serde[VO]): JoinedJ[K, V, VO] = + JoinedJ.`with`(keySerde, valueSerde, otherValueSerde, name) + // format:on +} diff --git a/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/KGroupedStream.scala b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/KGroupedStream.scala new file mode 100644 index 0000000000000..60a9c572d16e7 --- /dev/null +++ b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/KGroupedStream.scala @@ -0,0 +1,190 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.streams.scala +package kstream + +import org.apache.kafka.streams.kstream.internals.KTableImpl +import org.apache.kafka.streams.scala.serialization.Serdes +import org.apache.kafka.streams.kstream.{ + SessionWindows, + SlidingWindows, + Window, + Windows, + KGroupedStream => KGroupedStreamJ, + KTable => KTableJ +} +import org.apache.kafka.streams.scala.FunctionsCompatConversions.{ + AggregatorFromFunction, + InitializerFromFunction, + ReducerFromFunction, + ValueMapperFromFunction +} + +/** + * Wraps the Java class KGroupedStream and delegates method calls to the underlying Java object. + * + * @tparam K Type of keys + * @tparam V Type of values + * @param inner The underlying Java abstraction for KGroupedStream + * @see `org.apache.kafka.streams.kstream.KGroupedStream` + */ +class KGroupedStream[K, V](val inner: KGroupedStreamJ[K, V]) { + + /** + * Count the number of records in this stream by the grouped key. + * The result is written into a local `KeyValueStore` (which is basically an ever-updating materialized view) + * provided by the given `materialized`. + * + * @param materialized an instance of `Materialized` used to materialize a state store. + * @return a [[KTable]] that contains "update" records with unmodified keys and `Long` values that + * represent the latest (rolling) count (i.e., number of records) for each key + * @see `org.apache.kafka.streams.kstream.KGroupedStream#count` + */ + def count()(implicit materialized: Materialized[K, Long, ByteArrayKeyValueStore]): KTable[K, Long] = { + val javaCountTable: KTableJ[K, java.lang.Long] = + inner.count(materialized.asInstanceOf[Materialized[K, java.lang.Long, ByteArrayKeyValueStore]]) + val tableImpl = javaCountTable.asInstanceOf[KTableImpl[K, ByteArrayKeyValueStore, java.lang.Long]] + new KTable( + javaCountTable.mapValues[Long]( + ((l: java.lang.Long) => Long2long(l)).asValueMapper, + Materialized.`with`[K, Long, ByteArrayKeyValueStore](tableImpl.keySerde(), Serdes.longSerde) + ) + ) + } + + /** + * Count the number of records in this stream by the grouped key. + * The result is written into a local `KeyValueStore` (which is basically an ever-updating materialized view) + * provided by the given `materialized`. + * + * @param named a [[Named]] config used to name the processor in the topology + * @param materialized an instance of `Materialized` used to materialize a state store. + * @return a [[KTable]] that contains "update" records with unmodified keys and `Long` values that + * represent the latest (rolling) count (i.e., number of records) for each key + * @see `org.apache.kafka.streams.kstream.KGroupedStream#count` + */ + def count(named: Named)(implicit materialized: Materialized[K, Long, ByteArrayKeyValueStore]): KTable[K, Long] = { + val javaCountTable: KTableJ[K, java.lang.Long] = + inner.count(named, materialized.asInstanceOf[Materialized[K, java.lang.Long, ByteArrayKeyValueStore]]) + val tableImpl = javaCountTable.asInstanceOf[KTableImpl[K, ByteArrayKeyValueStore, java.lang.Long]] + new KTable( + javaCountTable.mapValues[Long]( + ((l: java.lang.Long) => Long2long(l)).asValueMapper, + Materialized.`with`[K, Long, ByteArrayKeyValueStore](tableImpl.keySerde(), Serdes.longSerde) + ) + ) + } + + /** + * Combine the values of records in this stream by the grouped key. + * + * @param reducer a function `(V, V) => V` that computes a new aggregate result. + * @param materialized an instance of `Materialized` used to materialize a state store. + * @return a [[KTable]] that contains "update" records with unmodified keys, and values that represent the + * latest (rolling) aggregate for each key + * @see `org.apache.kafka.streams.kstream.KGroupedStream#reduce` + */ + def reduce(reducer: (V, V) => V)(implicit materialized: Materialized[K, V, ByteArrayKeyValueStore]): KTable[K, V] = + new KTable(inner.reduce(reducer.asReducer, materialized)) + + /** + * Combine the values of records in this stream by the grouped key. + * + * @param reducer a function `(V, V) => V` that computes a new aggregate result. + * @param named a [[Named]] config used to name the processor in the topology + * @param materialized an instance of `Materialized` used to materialize a state store. + * @return a [[KTable]] that contains "update" records with unmodified keys, and values that represent the + * latest (rolling) aggregate for each key + * @see `org.apache.kafka.streams.kstream.KGroupedStream#reduce` + */ + def reduce(reducer: (V, V) => V, named: Named)(implicit + materialized: Materialized[K, V, ByteArrayKeyValueStore] + ): KTable[K, V] = + new KTable(inner.reduce(reducer.asReducer, materialized)) + + /** + * Aggregate the values of records in this stream by the grouped key. + * + * @param initializer an `Initializer` that computes an initial intermediate aggregation result + * @param aggregator an `Aggregator` that computes a new aggregate result + * @param materialized an instance of `Materialized` used to materialize a state store. + * @return a [[KTable]] that contains "update" records with unmodified keys, and values that represent the + * latest (rolling) aggregate for each key + * @see `org.apache.kafka.streams.kstream.KGroupedStream#aggregate` + */ + def aggregate[VR](initializer: => VR)(aggregator: (K, V, VR) => VR)(implicit + materialized: Materialized[K, VR, ByteArrayKeyValueStore] + ): KTable[K, VR] = + new KTable(inner.aggregate((() => initializer).asInitializer, aggregator.asAggregator, materialized)) + + /** + * Aggregate the values of records in this stream by the grouped key. + * + * @param initializer an `Initializer` that computes an initial intermediate aggregation result + * @param aggregator an `Aggregator` that computes a new aggregate result + * @param named a [[Named]] config used to name the processor in the topology + * @param materialized an instance of `Materialized` used to materialize a state store. + * @return a [[KTable]] that contains "update" records with unmodified keys, and values that represent the + * latest (rolling) aggregate for each key + * @see `org.apache.kafka.streams.kstream.KGroupedStream#aggregate` + */ + def aggregate[VR](initializer: => VR, named: Named)(aggregator: (K, V, VR) => VR)(implicit + materialized: Materialized[K, VR, ByteArrayKeyValueStore] + ): KTable[K, VR] = + new KTable(inner.aggregate((() => initializer).asInitializer, aggregator.asAggregator, named, materialized)) + + /** + * Create a new [[TimeWindowedKStream]] instance that can be used to perform windowed aggregations. + * + * @param windows the specification of the aggregation `Windows` + * @return an instance of [[TimeWindowedKStream]] + * @see `org.apache.kafka.streams.kstream.KGroupedStream#windowedBy` + */ + def windowedBy[W <: Window](windows: Windows[W]): TimeWindowedKStream[K, V] = + new TimeWindowedKStream(inner.windowedBy(windows)) + + /** + * Create a new [[TimeWindowedKStream]] instance that can be used to perform sliding windowed aggregations. + * + * @param windows the specification of the aggregation `SlidingWindows` + * @return an instance of [[TimeWindowedKStream]] + * @see `org.apache.kafka.streams.kstream.KGroupedStream#windowedBy` + */ + def windowedBy(windows: SlidingWindows): TimeWindowedKStream[K, V] = + new TimeWindowedKStream(inner.windowedBy(windows)) + + /** + * Create a new [[SessionWindowedKStream]] instance that can be used to perform session windowed aggregations. + * + * @param windows the specification of the aggregation `SessionWindows` + * @return an instance of [[SessionWindowedKStream]] + * @see `org.apache.kafka.streams.kstream.KGroupedStream#windowedBy` + */ + def windowedBy(windows: SessionWindows): SessionWindowedKStream[K, V] = + new SessionWindowedKStream(inner.windowedBy(windows)) + + /** + * Create a new [[CogroupedKStream]] from this grouped KStream to allow cogrouping other [[KGroupedStream]] to it. + * + * @param aggregator an `Aggregator` that computes a new aggregate result + * @return an instance of [[CogroupedKStream]] + * @see `org.apache.kafka.streams.kstream.KGroupedStream#cogroup` + */ + def cogroup[VR](aggregator: (K, V, VR) => VR): CogroupedKStream[K, VR] = + new CogroupedKStream(inner.cogroup(aggregator.asAggregator)) + +} diff --git a/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/KGroupedTable.scala b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/KGroupedTable.scala new file mode 100644 index 0000000000000..3d9e052a2f17c --- /dev/null +++ b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/KGroupedTable.scala @@ -0,0 +1,145 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.streams.scala +package kstream + +import org.apache.kafka.streams.kstream.{KGroupedTable => KGroupedTableJ} +import org.apache.kafka.streams.scala.FunctionsCompatConversions.{ + AggregatorFromFunction, + InitializerFromFunction, + ReducerFromFunction +} + +/** + * Wraps the Java class KGroupedTable and delegates method calls to the underlying Java object. + * + * @tparam K Type of keys + * @tparam V Type of values + * @param inner The underlying Java abstraction for KGroupedTable + * @see `org.apache.kafka.streams.kstream.KGroupedTable` + */ +class KGroupedTable[K, V](inner: KGroupedTableJ[K, V]) { + + /** + * Count number of records of the original [[KTable]] that got [[KTable#groupBy]] to + * the same key into a new instance of [[KTable]]. + * + * @param materialized an instance of `Materialized` used to materialize a state store. + * @return a [[KTable]] that contains "update" records with unmodified keys and `Long` values that + * represent the latest (rolling) count (i.e., number of records) for each key + * @see `org.apache.kafka.streams.kstream.KGroupedTable#count` + */ + def count()(implicit materialized: Materialized[K, Long, ByteArrayKeyValueStore]): KTable[K, Long] = { + val c: KTable[K, java.lang.Long] = + new KTable(inner.count(materialized.asInstanceOf[Materialized[K, java.lang.Long, ByteArrayKeyValueStore]])) + c.mapValues[Long](Long2long _) + } + + /** + * Count number of records of the original [[KTable]] that got [[KTable#groupBy]] to + * the same key into a new instance of [[KTable]]. + * + * @param named a [[Named]] config used to name the processor in the topology + * @param materialized an instance of `Materialized` used to materialize a state store. + * @return a [[KTable]] that contains "update" records with unmodified keys and `Long` values that + * represent the latest (rolling) count (i.e., number of records) for each key + * @see `org.apache.kafka.streams.kstream.KGroupedTable#count` + */ + def count(named: Named)(implicit materialized: Materialized[K, Long, ByteArrayKeyValueStore]): KTable[K, Long] = { + val c: KTable[K, java.lang.Long] = + new KTable(inner.count(named, materialized.asInstanceOf[Materialized[K, java.lang.Long, ByteArrayKeyValueStore]])) + c.mapValues[Long](Long2long _) + } + + /** + * Combine the value of records of the original [[KTable]] that got [[KTable#groupBy]] + * to the same key into a new instance of [[KTable]]. + * + * @param adder a function that adds a new value to the aggregate result + * @param subtractor a function that removed an old value from the aggregate result + * @param materialized an instance of `Materialized` used to materialize a state store. + * @return a [[KTable]] that contains "update" records with unmodified keys, and values that represent the + * latest (rolling) aggregate for each key + * @see `org.apache.kafka.streams.kstream.KGroupedTable#reduce` + */ + def reduce(adder: (V, V) => V, subtractor: (V, V) => V)(implicit + materialized: Materialized[K, V, ByteArrayKeyValueStore] + ): KTable[K, V] = + new KTable(inner.reduce(adder.asReducer, subtractor.asReducer, materialized)) + + /** + * Combine the value of records of the original [[KTable]] that got [[KTable#groupBy]] + * to the same key into a new instance of [[KTable]]. + * + * @param adder a function that adds a new value to the aggregate result + * @param subtractor a function that removed an old value from the aggregate result + * @param named a [[Named]] config used to name the processor in the topology + * @param materialized an instance of `Materialized` used to materialize a state store. + * @return a [[KTable]] that contains "update" records with unmodified keys, and values that represent the + * latest (rolling) aggregate for each key + * @see `org.apache.kafka.streams.kstream.KGroupedTable#reduce` + */ + def reduce(adder: (V, V) => V, subtractor: (V, V) => V, named: Named)(implicit + materialized: Materialized[K, V, ByteArrayKeyValueStore] + ): KTable[K, V] = + new KTable(inner.reduce(adder.asReducer, subtractor.asReducer, named, materialized)) + + /** + * Aggregate the value of records of the original [[KTable]] that got [[KTable#groupBy]] + * to the same key into a new instance of [[KTable]] using default serializers and deserializers. + * + * @param initializer a function that provides an initial aggregate result value + * @param adder a function that adds a new record to the aggregate result + * @param subtractor an aggregator function that removed an old record from the aggregate result + * @param materialized an instance of `Materialized` used to materialize a state store. + * @return a [[KTable]] that contains "update" records with unmodified keys, and values that represent the + * latest (rolling) aggregate for each key + * @see `org.apache.kafka.streams.kstream.KGroupedTable#aggregate` + */ + def aggregate[VR](initializer: => VR)(adder: (K, V, VR) => VR, subtractor: (K, V, VR) => VR)(implicit + materialized: Materialized[K, VR, ByteArrayKeyValueStore] + ): KTable[K, VR] = + new KTable( + inner.aggregate((() => initializer).asInitializer, adder.asAggregator, subtractor.asAggregator, materialized) + ) + + /** + * Aggregate the value of records of the original [[KTable]] that got [[KTable#groupBy]] + * to the same key into a new instance of [[KTable]] using default serializers and deserializers. + * + * @param initializer a function that provides an initial aggregate result value + * @param named a [[Named]] config used to name the processor in the topology + * @param adder a function that adds a new record to the aggregate result + * @param subtractor an aggregator function that removed an old record from the aggregate result + * @param materialized an instance of `Materialized` used to materialize a state store. + * @return a [[KTable]] that contains "update" records with unmodified keys, and values that represent the + * latest (rolling) aggregate for each key + * @see `org.apache.kafka.streams.kstream.KGroupedTable#aggregate` + */ + def aggregate[VR](initializer: => VR, named: Named)(adder: (K, V, VR) => VR, subtractor: (K, V, VR) => VR)(implicit + materialized: Materialized[K, VR, ByteArrayKeyValueStore] + ): KTable[K, VR] = + new KTable( + inner.aggregate( + (() => initializer).asInitializer, + adder.asAggregator, + subtractor.asAggregator, + named, + materialized + ) + ) +} diff --git a/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/KStream.scala b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/KStream.scala new file mode 100644 index 0000000000000..24f9e6ed6a769 --- /dev/null +++ b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/KStream.scala @@ -0,0 +1,1253 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.streams.scala +package kstream + +import org.apache.kafka.streams.KeyValue +import org.apache.kafka.streams.kstream.{ + GlobalKTable, + JoinWindows, + Printed, + TransformerSupplier, + ValueTransformerSupplier, + ValueTransformerWithKeySupplier, + KStream => KStreamJ +} +import org.apache.kafka.streams.processor.TopicNameExtractor +import org.apache.kafka.streams.processor.api.{FixedKeyProcessorSupplier, ProcessorSupplier} +import org.apache.kafka.streams.scala.FunctionsCompatConversions.{ + FlatValueMapperFromFunction, + FlatValueMapperWithKeyFromFunction, + ForeachActionFromFunction, + KeyValueMapperFromFunction, + MapperFromFunction, + PredicateFromFunction, + TransformerSupplierAsJava, + ValueMapperFromFunction, + ValueMapperWithKeyFromFunction, + ValueTransformerSupplierAsJava, + ValueTransformerSupplierWithKeyAsJava +} + +import scala.jdk.CollectionConverters._ + +/** + * Wraps the Java class [[org.apache.kafka.streams.kstream.KStream KStream]] and delegates method calls to the + * underlying Java object. + * + * @tparam K Type of keys + * @tparam V Type of values + * @param inner The underlying Java abstraction for KStream + * @see `org.apache.kafka.streams.kstream.KStream` + */ +//noinspection ScalaDeprecation +class KStream[K, V](val inner: KStreamJ[K, V]) { + + /** + * Create a new [[KStream]] that consists all records of this stream which satisfies the given predicate. + * + * @param predicate a filter that is applied to each record + * @return a [[KStream]] that contains only those records that satisfy the given predicate + * @see `org.apache.kafka.streams.kstream.KStream#filter` + */ + def filter(predicate: (K, V) => Boolean): KStream[K, V] = + new KStream(inner.filter(predicate.asPredicate)) + + /** + * Create a new [[KStream]] that consists all records of this stream which satisfies the given predicate. + * + * @param predicate a filter that is applied to each record + * @param named a [[Named]] config used to name the processor in the topology + * @return a [[KStream]] that contains only those records that satisfy the given predicate + * @see `org.apache.kafka.streams.kstream.KStream#filter` + */ + def filter(predicate: (K, V) => Boolean, named: Named): KStream[K, V] = + new KStream(inner.filter(predicate.asPredicate, named)) + + /** + * Create a new [[KStream]] that consists all records of this stream which do not satisfy the given + * predicate. + * + * @param predicate a filter that is applied to each record + * @return a [[KStream]] that contains only those records that do not satisfy the given predicate + * @see `org.apache.kafka.streams.kstream.KStream#filterNot` + */ + def filterNot(predicate: (K, V) => Boolean): KStream[K, V] = + new KStream(inner.filterNot(predicate.asPredicate)) + + /** + * Create a new [[KStream]] that consists all records of this stream which do not satisfy the given + * predicate. + * + * @param predicate a filter that is applied to each record + * @param named a [[Named]] config used to name the processor in the topology + * @return a [[KStream]] that contains only those records that do not satisfy the given predicate + * @see `org.apache.kafka.streams.kstream.KStream#filterNot` + */ + def filterNot(predicate: (K, V) => Boolean, named: Named): KStream[K, V] = + new KStream(inner.filterNot(predicate.asPredicate, named)) + + /** + * Set a new key (with possibly new type) for each input record. + *

+ * The function `mapper` passed is applied to every record and results in the generation of a new + * key `KR`. The function outputs a new [[KStream]] where each record has this new key. + * + * @param mapper a function `(K, V) => KR` that computes a new key for each record + * @return a [[KStream]] that contains records with new key (possibly of different type) and unmodified value + * @see `org.apache.kafka.streams.kstream.KStream#selectKey` + */ + def selectKey[KR](mapper: (K, V) => KR): KStream[KR, V] = + new KStream(inner.selectKey[KR](mapper.asKeyValueMapper)) + + /** + * Set a new key (with possibly new type) for each input record. + *

+ * The function `mapper` passed is applied to every record and results in the generation of a new + * key `KR`. The function outputs a new [[KStream]] where each record has this new key. + * + * @param mapper a function `(K, V) => KR` that computes a new key for each record + * @param named a [[Named]] config used to name the processor in the topology + * @return a [[KStream]] that contains records with new key (possibly of different type) and unmodified value + * @see `org.apache.kafka.streams.kstream.KStream#selectKey` + */ + def selectKey[KR](mapper: (K, V) => KR, named: Named): KStream[KR, V] = + new KStream(inner.selectKey[KR](mapper.asKeyValueMapper, named)) + + /** + * Transform each record of the input stream into a new record in the output stream (both key and value type can be + * altered arbitrarily). + *

+ * The provided `mapper`, a function `(K, V) => (KR, VR)` is applied to each input record and computes a new output record. + * + * @param mapper a function `(K, V) => (KR, VR)` that computes a new output record + * @return a [[KStream]] that contains records with new key and value (possibly both of different type) + * @see `org.apache.kafka.streams.kstream.KStream#map` + */ + def map[KR, VR](mapper: (K, V) => (KR, VR)): KStream[KR, VR] = + new KStream(inner.map[KR, VR](mapper.asKeyValueMapper)) + + /** + * Transform each record of the input stream into a new record in the output stream (both key and value type can be + * altered arbitrarily). + *

+ * The provided `mapper`, a function `(K, V) => (KR, VR)` is applied to each input record and computes a new output record. + * + * @param mapper a function `(K, V) => (KR, VR)` that computes a new output record + * @param named a [[Named]] config used to name the processor in the topology + * @return a [[KStream]] that contains records with new key and value (possibly both of different type) + * @see `org.apache.kafka.streams.kstream.KStream#map` + */ + def map[KR, VR](mapper: (K, V) => (KR, VR), named: Named): KStream[KR, VR] = + new KStream(inner.map[KR, VR](mapper.asKeyValueMapper, named)) + + /** + * Transform the value of each input record into a new value (with possible new type) of the output record. + *

+ * The provided `mapper`, a function `V => VR` is applied to each input record value and computes a new value for it + * + * @param mapper , a function `V => VR` that computes a new output value + * @return a [[KStream]] that contains records with unmodified key and new values (possibly of different type) + * @see `org.apache.kafka.streams.kstream.KStream#mapValues` + */ + def mapValues[VR](mapper: V => VR): KStream[K, VR] = + new KStream(inner.mapValues[VR](mapper.asValueMapper)) + + /** + * Transform the value of each input record into a new value (with possible new type) of the output record. + *

+ * The provided `mapper`, a function `V => VR` is applied to each input record value and computes a new value for it + * + * @param mapper , a function `V => VR` that computes a new output value + * @param named a [[Named]] config used to name the processor in the topology + * @return a [[KStream]] that contains records with unmodified key and new values (possibly of different type) + * @see `org.apache.kafka.streams.kstream.KStream#mapValues` + */ + def mapValues[VR](mapper: V => VR, named: Named): KStream[K, VR] = + new KStream(inner.mapValues[VR](mapper.asValueMapper, named)) + + /** + * Transform the value of each input record into a new value (with possible new type) of the output record. + *

+ * The provided `mapper`, a function `(K, V) => VR` is applied to each input record value and computes a new value for it + * + * @param mapper , a function `(K, V) => VR` that computes a new output value + * @return a [[KStream]] that contains records with unmodified key and new values (possibly of different type) + * @see `org.apache.kafka.streams.kstream.KStream#mapValues` + */ + def mapValues[VR](mapper: (K, V) => VR): KStream[K, VR] = + new KStream(inner.mapValues[VR](mapper.asValueMapperWithKey)) + + /** + * Transform the value of each input record into a new value (with possible new type) of the output record. + *

+ * The provided `mapper`, a function `(K, V) => VR` is applied to each input record value and computes a new value for it + * + * @param mapper , a function `(K, V) => VR` that computes a new output value + * @param named a [[Named]] config used to name the processor in the topology + * @return a [[KStream]] that contains records with unmodified key and new values (possibly of different type) + * @see `org.apache.kafka.streams.kstream.KStream#mapValues` + */ + def mapValues[VR](mapper: (K, V) => VR, named: Named): KStream[K, VR] = + new KStream(inner.mapValues[VR](mapper.asValueMapperWithKey, named)) + + /** + * Transform each record of the input stream into zero or more records in the output stream (both key and value type + * can be altered arbitrarily). + *

+ * The provided `mapper`, function `(K, V) => Iterable[(KR, VR)]` is applied to each input record and computes zero or more output records. + * + * @param mapper function `(K, V) => Iterable[(KR, VR)]` that computes the new output records + * @return a [[KStream]] that contains more or less records with new key and value (possibly of different type) + * @see `org.apache.kafka.streams.kstream.KStream#flatMap` + */ + def flatMap[KR, VR](mapper: (K, V) => Iterable[(KR, VR)]): KStream[KR, VR] = { + val kvMapper = mapper.tupled.andThen(_.map(ImplicitConversions.tuple2ToKeyValue).asJava) + new KStream(inner.flatMap[KR, VR](((k: K, v: V) => kvMapper(k, v)).asKeyValueMapper)) + } + + /** + * Transform each record of the input stream into zero or more records in the output stream (both key and value type + * can be altered arbitrarily). + *

+ * The provided `mapper`, function `(K, V) => Iterable[(KR, VR)]` is applied to each input record and computes zero or more output records. + * + * @param mapper function `(K, V) => Iterable[(KR, VR)]` that computes the new output records + * @param named a [[Named]] config used to name the processor in the topology + * @return a [[KStream]] that contains more or less records with new key and value (possibly of different type) + * @see `org.apache.kafka.streams.kstream.KStream#flatMap` + */ + def flatMap[KR, VR](mapper: (K, V) => Iterable[(KR, VR)], named: Named): KStream[KR, VR] = { + val kvMapper = mapper.tupled.andThen(_.map(ImplicitConversions.tuple2ToKeyValue).asJava) + new KStream(inner.flatMap[KR, VR](((k: K, v: V) => kvMapper(k, v)).asKeyValueMapper, named)) + } + + /** + * Create a new [[KStream]] by transforming the value of each record in this stream into zero or more values + * with the same key in the new stream. + *

+ * Transform the value of each input record into zero or more records with the same (unmodified) key in the output + * stream (value type can be altered arbitrarily). + * The provided `mapper`, a function `V => Iterable[VR]` is applied to each input record and computes zero or more output values. + * + * @param mapper a function `V => Iterable[VR]` that computes the new output values + * @return a [[KStream]] that contains more or less records with unmodified keys and new values of different type + * @see `org.apache.kafka.streams.kstream.KStream#flatMapValues` + */ + def flatMapValues[VR](mapper: V => Iterable[VR]): KStream[K, VR] = + new KStream(inner.flatMapValues[VR](mapper.asValueMapper)) + + /** + * Create a new [[KStream]] by transforming the value of each record in this stream into zero or more values + * with the same key in the new stream. + *

+ * Transform the value of each input record into zero or more records with the same (unmodified) key in the output + * stream (value type can be altered arbitrarily). + * The provided `mapper`, a function `V => Iterable[VR]` is applied to each input record and computes zero or more output values. + * + * @param mapper a function `V => Iterable[VR]` that computes the new output values + * @param named a [[Named]] config used to name the processor in the topology + * @return a [[KStream]] that contains more or less records with unmodified keys and new values of different type + * @see `org.apache.kafka.streams.kstream.KStream#flatMapValues` + */ + def flatMapValues[VR](mapper: V => Iterable[VR], named: Named): KStream[K, VR] = + new KStream(inner.flatMapValues[VR](mapper.asValueMapper, named)) + + /** + * Create a new [[KStream]] by transforming the value of each record in this stream into zero or more values + * with the same key in the new stream. + *

+ * Transform the value of each input record into zero or more records with the same (unmodified) key in the output + * stream (value type can be altered arbitrarily). + * The provided `mapper`, a function `(K, V) => Iterable[VR]` is applied to each input record and computes zero or more output values. + * + * @param mapper a function `(K, V) => Iterable[VR]` that computes the new output values + * @return a [[KStream]] that contains more or less records with unmodified keys and new values of different type + * @see `org.apache.kafka.streams.kstream.KStream#flatMapValues` + */ + def flatMapValues[VR](mapper: (K, V) => Iterable[VR]): KStream[K, VR] = + new KStream(inner.flatMapValues[VR](mapper.asValueMapperWithKey)) + + /** + * Create a new [[KStream]] by transforming the value of each record in this stream into zero or more values + * with the same key in the new stream. + *

+ * Transform the value of each input record into zero or more records with the same (unmodified) key in the output + * stream (value type can be altered arbitrarily). + * The provided `mapper`, a function `(K, V) => Iterable[VR]` is applied to each input record and computes zero or more output values. + * + * @param mapper a function `(K, V) => Iterable[VR]` that computes the new output values + * @param named a [[Named]] config used to name the processor in the topology + * @return a [[KStream]] that contains more or less records with unmodified keys and new values of different type + * @see `org.apache.kafka.streams.kstream.KStream#flatMapValues` + */ + def flatMapValues[VR](mapper: (K, V) => Iterable[VR], named: Named): KStream[K, VR] = + new KStream(inner.flatMapValues[VR](mapper.asValueMapperWithKey, named)) + + /** + * Print the records of this KStream using the options provided by `Printed` + * + * @param printed options for printing + * @see `org.apache.kafka.streams.kstream.KStream#print` + */ + def print(printed: Printed[K, V]): Unit = inner.print(printed) + + /** + * Perform an action on each record of `KStream` + * + * @param action an action to perform on each record + * @see `org.apache.kafka.streams.kstream.KStream#foreach` + */ + def foreach(action: (K, V) => Unit): Unit = + inner.foreach(action.asForeachAction) + + /** + * Perform an action on each record of `KStream` + * + * @param action an action to perform on each record + * @param named a [[Named]] config used to name the processor in the topology + * @see `org.apache.kafka.streams.kstream.KStream#foreach` + */ + def foreach(action: (K, V) => Unit, named: Named): Unit = + inner.foreach(action.asForeachAction, named) + + /** + * Creates an array of `KStream` from this stream by branching the records in the original stream based on + * the supplied predicates. + * + * @param predicates the ordered list of functions that return a Boolean + * @return multiple distinct substreams of this [[KStream]] + * @see `org.apache.kafka.streams.kstream.KStream#branch` + * @deprecated since 2.8. Use `split` instead. + */ + //noinspection ScalaUnnecessaryParentheses + @deprecated("use `split()` instead", "2.8") + def branch(predicates: ((K, V) => Boolean)*): Array[KStream[K, V]] = + inner.branch(predicates.map(_.asPredicate): _*).map(kstream => new KStream(kstream)) + + /** + * Split this stream. [[BranchedKStream]] can be used for routing the records to different branches depending + * on evaluation against the supplied predicates. + * Stream branching is a stateless record-by-record operation. + * + * @return [[BranchedKStream]] that provides methods for routing the records to different branches. + * @see `org.apache.kafka.streams.kstream.KStream#split` + */ + def split(): BranchedKStream[K, V] = + new BranchedKStream(inner.split()) + + /** + * Split this stream. [[BranchedKStream]] can be used for routing the records to different branches depending + * on evaluation against the supplied predicates. + * Stream branching is a stateless record-by-record operation. + * + * @param named a [[Named]] config used to name the processor in the topology and also to set the name prefix + * for the resulting branches (see [[BranchedKStream]]) + * @return [[BranchedKStream]] that provides methods for routing the records to different branches. + * @see `org.apache.kafka.streams.kstream.KStream#split` + */ + def split(named: Named): BranchedKStream[K, V] = + new BranchedKStream(inner.split(named)) + + /** + * Materialize this stream to a topic and creates a new [[KStream]] from the topic using the `Produced` instance for + * configuration of the `Serde key serde`, `Serde value serde`, and `StreamPartitioner` + *

+ * The user can either supply the `Produced` instance as an implicit in scope or they can also provide implicit + * key and value serdes that will be converted to a `Produced` instance implicitly. + *

+ * {{{ + * Example: + * + * // brings implicit serdes in scope + * import Serdes._ + * + * //.. + * val clicksPerRegion: KStream[String, Long] = //.. + * + * // Implicit serdes in scope will generate an implicit Produced instance, which + * // will be passed automatically to the call of through below + * clicksPerRegion.through(topic) + * + * // Similarly you can create an implicit Produced and it will be passed implicitly + * // to the through call + * }}} + * + * @param topic the topic name + * @param produced the instance of Produced that gives the serdes and `StreamPartitioner` + * @return a [[KStream]] that contains the exact same (and potentially repartitioned) records as this [[KStream]] + * @see `org.apache.kafka.streams.kstream.KStream#through` + * @deprecated use `repartition()` instead + */ + @deprecated("use `repartition()` instead", "2.6.0") + def through(topic: String)(implicit produced: Produced[K, V]): KStream[K, V] = + new KStream(inner.through(topic, produced)) + + /** + * Materialize this stream to a topic and creates a new [[KStream]] from the topic using the `Repartitioned` instance + * for configuration of the `Serde key serde`, `Serde value serde`, `StreamPartitioner`, number of partitions, and + * topic name part. + *

+ * The created topic is considered as an internal topic and is meant to be used only by the current Kafka Streams instance. + * Similar to auto-repartitioning, the topic will be created with infinite retention time and data will be automatically purged by Kafka Streams. + * The topic will be named as "${applicationId}-<name>-repartition", where "applicationId" is user-specified in + * `StreamsConfig` via parameter `APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG`, + * "<name>" is either provided via `Repartitioned#as(String)` or an internally + * generated name, and "-repartition" is a fixed suffix. + *

+ * The user can either supply the `Repartitioned` instance as an implicit in scope or they can also provide implicit + * key and value serdes that will be converted to a `Repartitioned` instance implicitly. + *

+ * {{{ + * Example: + * + * // brings implicit serdes in scope + * import Serdes._ + * + * //.. + * val clicksPerRegion: KStream[String, Long] = //.. + * + * // Implicit serdes in scope will generate an implicit Produced instance, which + * // will be passed automatically to the call of through below + * clicksPerRegion.repartition + * + * // Similarly you can create an implicit Repartitioned and it will be passed implicitly + * // to the repartition call + * }}} + * + * @param repartitioned the `Repartitioned` instance used to specify `Serdes`, `StreamPartitioner` which determines + * how records are distributed among partitions of the topic, + * part of the topic name, and number of partitions for a repartition topic. + * @return a [[KStream]] that contains the exact same repartitioned records as this [[KStream]] + * @see `org.apache.kafka.streams.kstream.KStream#repartition` + */ + def repartition(implicit repartitioned: Repartitioned[K, V]): KStream[K, V] = + new KStream(inner.repartition(repartitioned)) + + /** + * Materialize this stream to a topic using the `Produced` instance for + * configuration of the `Serde key serde`, `Serde value serde`, and `StreamPartitioner` + *

+ * The user can either supply the `Produced` instance as an implicit in scope or they can also provide implicit + * key and value serdes that will be converted to a `Produced` instance implicitly. + *

+ * {{{ + * Example: + * + * // brings implicit serdes in scope + * import Serdes._ + * + * //.. + * val clicksPerRegion: KTable[String, Long] = //.. + * + * // Implicit serdes in scope will generate an implicit Produced instance, which + * // will be passed automatically to the call of through below + * clicksPerRegion.to(topic) + * + * // Similarly you can create an implicit Produced and it will be passed implicitly + * // to the through call + * }}} + * + * @param topic the topic name + * @param produced the instance of Produced that gives the serdes and `StreamPartitioner` + * @see `org.apache.kafka.streams.kstream.KStream#to` + */ + def to(topic: String)(implicit produced: Produced[K, V]): Unit = + inner.to(topic, produced) + + /** + * Dynamically materialize this stream to topics using the `Produced` instance for + * configuration of the `Serde key serde`, `Serde value serde`, and `StreamPartitioner`. + * The topic names for each record to send to is dynamically determined based on the given mapper. + *

+ * The user can either supply the `Produced` instance as an implicit in scope or they can also provide implicit + * key and value serdes that will be converted to a `Produced` instance implicitly. + *

+ * {{{ + * Example: + * + * // brings implicit serdes in scope + * import Serdes._ + * + * //.. + * val clicksPerRegion: KTable[String, Long] = //.. + * + * // Implicit serdes in scope will generate an implicit Produced instance, which + * // will be passed automatically to the call of through below + * clicksPerRegion.to(topicChooser) + * + * // Similarly you can create an implicit Produced and it will be passed implicitly + * // to the through call + * }}} + * + * @param extractor the extractor to determine the name of the Kafka topic to write to for reach record + * @param produced the instance of Produced that gives the serdes and `StreamPartitioner` + * @see `org.apache.kafka.streams.kstream.KStream#to` + */ + def to(extractor: TopicNameExtractor[K, V])(implicit produced: Produced[K, V]): Unit = + inner.to(extractor, produced) + + /** + * Convert this stream to a [[KTable]]. + * + * @return a [[KTable]] that contains the same records as this [[KStream]] + * @see `org.apache.kafka.streams.kstream.KStream#toTable` + */ + def toTable: KTable[K, V] = + new KTable(inner.toTable) + + /** + * Convert this stream to a [[KTable]]. + * + * @param named a [[Named]] config used to name the processor in the topology + * @return a [[KTable]] that contains the same records as this [[KStream]] + * @see `org.apache.kafka.streams.kstream.KStream#toTable` + */ + def toTable(named: Named): KTable[K, V] = + new KTable(inner.toTable(named)) + + /** + * Convert this stream to a [[KTable]]. + * + * @param materialized a `Materialized` that describes how the `StateStore` for the resulting [[KTable]] + * should be materialized. + * @return a [[KTable]] that contains the same records as this [[KStream]] + * @see `org.apache.kafka.streams.kstream.KStream#toTable` + */ + def toTable(materialized: Materialized[K, V, ByteArrayKeyValueStore]): KTable[K, V] = + new KTable(inner.toTable(materialized)) + + /** + * Convert this stream to a [[KTable]]. + * + * @param named a [[Named]] config used to name the processor in the topology + * @param materialized a `Materialized` that describes how the `StateStore` for the resulting [[KTable]] + * should be materialized. + * @return a [[KTable]] that contains the same records as this [[KStream]] + * @see `org.apache.kafka.streams.kstream.KStream#toTable` + */ + def toTable(named: Named, materialized: Materialized[K, V, ByteArrayKeyValueStore]): KTable[K, V] = + new KTable(inner.toTable(named, materialized)) + + /** + * Transform each record of the input stream into zero or more records in the output stream (both key and value type + * can be altered arbitrarily). + * A `Transformer` (provided by the given `TransformerSupplier`) is applied to each input record + * and computes zero or more output records. + * In order to assign a state, the state must be created and added via `addStateStore` before they can be connected + * to the `Transformer`. + * It's not required to connect global state stores that are added via `addGlobalStore`; + * read-only access to global state stores is available by default. + * + * @param transformerSupplier the `TransformerSuplier` that generates `Transformer` + * @param stateStoreNames the names of the state stores used by the processor + * @return a [[KStream]] that contains more or less records with new key and value (possibly of different type) + * @see `org.apache.kafka.streams.kstream.KStream#transform` + */ + @deprecated(since = "3.3", message = "Use process(ProcessorSupplier, String*) instead.") + def transform[K1, V1]( + transformerSupplier: TransformerSupplier[K, V, KeyValue[K1, V1]], + stateStoreNames: String* + ): KStream[K1, V1] = + new KStream(inner.transform(transformerSupplier, stateStoreNames: _*)) + + /** + * Transform each record of the input stream into zero or more records in the output stream (both key and value type + * can be altered arbitrarily). + * A `Transformer` (provided by the given `TransformerSupplier`) is applied to each input record + * and computes zero or more output records. + * In order to assign a state, the state must be created and added via `addStateStore` before they can be connected + * to the `Transformer`. + * It's not required to connect global state stores that are added via `addGlobalStore`; + * read-only access to global state stores is available by default. + * + * @param transformerSupplier the `TransformerSuplier` that generates `Transformer` + * @param named a [[Named]] config used to name the processor in the topology + * @param stateStoreNames the names of the state stores used by the processor + * @return a [[KStream]] that contains more or less records with new key and value (possibly of different type) + * @see `org.apache.kafka.streams.kstream.KStream#transform` + */ + @deprecated(since = "3.3", message = "Use process(ProcessorSupplier, Named, String*) instead.") + def transform[K1, V1]( + transformerSupplier: TransformerSupplier[K, V, KeyValue[K1, V1]], + named: Named, + stateStoreNames: String* + ): KStream[K1, V1] = + new KStream(inner.transform(transformerSupplier, named, stateStoreNames: _*)) + + /** + * Transform each record of the input stream into zero or more records in the output stream (both key and value type + * can be altered arbitrarily). + * A `Transformer` (provided by the given `TransformerSupplier`) is applied to each input record + * and computes zero or more output records. + * In order to assign a state, the state must be created and added via `addStateStore` before they can be connected + * to the `Transformer`. + * It's not required to connect global state stores that are added via `addGlobalStore`; + * read-only access to global state stores is available by default. + * + * @param transformerSupplier the `TransformerSuplier` that generates `Transformer` + * @param stateStoreNames the names of the state stores used by the processor + * @return a [[KStream]] that contains more or less records with new key and value (possibly of different type) + * @see `org.apache.kafka.streams.kstream.KStream#transform` + */ + @deprecated(since = "3.3", message = "Use process(ProcessorSupplier, String*) instead.") + def flatTransform[K1, V1]( + transformerSupplier: TransformerSupplier[K, V, Iterable[KeyValue[K1, V1]]], + stateStoreNames: String* + ): KStream[K1, V1] = + new KStream(inner.flatTransform(transformerSupplier.asJava, stateStoreNames: _*)) + + /** + * Transform each record of the input stream into zero or more records in the output stream (both key and value type + * can be altered arbitrarily). + * A `Transformer` (provided by the given `TransformerSupplier`) is applied to each input record + * and computes zero or more output records. + * In order to assign a state, the state must be created and added via `addStateStore` before they can be connected + * to the `Transformer`. + * It's not required to connect global state stores that are added via `addGlobalStore`; + * read-only access to global state stores is available by default. + * + * @param transformerSupplier the `TransformerSuplier` that generates `Transformer` + * @param named a [[Named]] config used to name the processor in the topology + * @param stateStoreNames the names of the state stores used by the processor + * @return a [[KStream]] that contains more or less records with new key and value (possibly of different type) + * @see `org.apache.kafka.streams.kstream.KStream#transform` + */ + @deprecated(since = "3.3", message = "Use process(ProcessorSupplier, Named, String*) instead.") + def flatTransform[K1, V1]( + transformerSupplier: TransformerSupplier[K, V, Iterable[KeyValue[K1, V1]]], + named: Named, + stateStoreNames: String* + ): KStream[K1, V1] = + new KStream(inner.flatTransform(transformerSupplier.asJava, named, stateStoreNames: _*)) + + /** + * Transform the value of each input record into zero or more records (with possible new type) in the + * output stream. + * A `ValueTransformer` (provided by the given `ValueTransformerSupplier`) is applied to each input + * record value and computes a new value for it. + * In order to assign a state, the state must be created and added via `addStateStore` before they can be connected + * to the `ValueTransformer`. + * It's not required to connect global state stores that are added via `addGlobalStore`; + * read-only access to global state stores is available by default. + * + * @param valueTransformerSupplier a instance of `ValueTransformerSupplier` that generates a `ValueTransformer` + * @param stateStoreNames the names of the state stores used by the processor + * @return a [[KStream]] that contains records with unmodified key and new values (possibly of different type) + * @see `org.apache.kafka.streams.kstream.KStream#transformValues` + */ + @deprecated(since = "3.3", message = "Use processValues(FixedKeyProcessorSupplier, Named, String*) instead.") + def flatTransformValues[VR]( + valueTransformerSupplier: ValueTransformerSupplier[V, Iterable[VR]], + stateStoreNames: String* + ): KStream[K, VR] = + new KStream(inner.flatTransformValues[VR](valueTransformerSupplier.asJava, stateStoreNames: _*)) + + /** + * Transform the value of each input record into zero or more records (with possible new type) in the + * output stream. + * A `ValueTransformer` (provided by the given `ValueTransformerSupplier`) is applied to each input + * record value and computes a new value for it. + * In order to assign a state, the state must be created and added via `addStateStore` before they can be connected + * to the `ValueTransformer`. + * It's not required to connect global state stores that are added via `addGlobalStore`; + * read-only access to global state stores is available by default. + * + * @param valueTransformerSupplier a instance of `ValueTransformerSupplier` that generates a `ValueTransformer` + * @param named a [[Named]] config used to name the processor in the topology + * @param stateStoreNames the names of the state stores used by the processor + * @return a [[KStream]] that contains records with unmodified key and new values (possibly of different type) + * @see `org.apache.kafka.streams.kstream.KStream#transformValues` + */ + @deprecated(since = "3.3", message = "Use processValues(FixedKeyProcessorSupplier, Named, String*) instead.") + def flatTransformValues[VR]( + valueTransformerSupplier: ValueTransformerSupplier[V, Iterable[VR]], + named: Named, + stateStoreNames: String* + ): KStream[K, VR] = + new KStream(inner.flatTransformValues[VR](valueTransformerSupplier.asJava, named, stateStoreNames: _*)) + + /** + * Transform the value of each input record into zero or more records (with possible new type) in the + * output stream. + * A `ValueTransformer` (provided by the given `ValueTransformerSupplier`) is applied to each input + * record value and computes a new value for it. + * In order to assign a state, the state must be created and added via `addStateStore` before they can be connected + * to the `ValueTransformer`. + * It's not required to connect global state stores that are added via `addGlobalStore`; + * read-only access to global state stores is available by default. + * + * @param valueTransformerSupplier a instance of `ValueTransformerWithKeySupplier` that generates a `ValueTransformerWithKey` + * @param stateStoreNames the names of the state stores used by the processor + * @return a [[KStream]] that contains records with unmodified key and new values (possibly of different type) + * @see `org.apache.kafka.streams.kstream.KStream#transformValues` + */ + @deprecated(since = "3.3", message = "Use processValues(FixedKeyProcessorSupplier, String*) instead.") + def flatTransformValues[VR]( + valueTransformerSupplier: ValueTransformerWithKeySupplier[K, V, Iterable[VR]], + stateStoreNames: String* + ): KStream[K, VR] = + new KStream(inner.flatTransformValues[VR](valueTransformerSupplier.asJava, stateStoreNames: _*)) + + /** + * Transform the value of each input record into zero or more records (with possible new type) in the + * output stream. + * A `ValueTransformer` (provided by the given `ValueTransformerSupplier`) is applied to each input + * record value and computes a new value for it. + * In order to assign a state, the state must be created and added via `addStateStore` before they can be connected + * to the `ValueTransformer`. + * It's not required to connect global state stores that are added via `addGlobalStore`; + * read-only access to global state stores is available by default. + * + * @param valueTransformerSupplier a instance of `ValueTransformerWithKeySupplier` that generates a `ValueTransformerWithKey` + * @param named a [[Named]] config used to name the processor in the topology + * @param stateStoreNames the names of the state stores used by the processor + * @return a [[KStream]] that contains records with unmodified key and new values (possibly of different type) + * @see `org.apache.kafka.streams.kstream.KStream#transformValues` + */ + @deprecated(since = "3.3", message = "Use processValues(FixedKeyProcessorSupplier, Named, String*) instead.") + def flatTransformValues[VR]( + valueTransformerSupplier: ValueTransformerWithKeySupplier[K, V, Iterable[VR]], + named: Named, + stateStoreNames: String* + ): KStream[K, VR] = + new KStream(inner.flatTransformValues[VR](valueTransformerSupplier.asJava, named, stateStoreNames: _*)) + + /** + * Transform the value of each input record into a new value (with possible new type) of the output record. + * A `ValueTransformer` (provided by the given `ValueTransformerSupplier`) is applied to each input + * record value and computes a new value for it. + * In order to assign a state, the state must be created and added via `addStateStore` before they can be connected + * to the `ValueTransformer`. + * It's not required to connect global state stores that are added via `addGlobalStore`; + * read-only access to global state stores is available by default. + * + * @param valueTransformerSupplier a instance of `ValueTransformerSupplier` that generates a `ValueTransformer` + * @param stateStoreNames the names of the state stores used by the processor + * @return a [[KStream]] that contains records with unmodified key and new values (possibly of different type) + * @see `org.apache.kafka.streams.kstream.KStream#transformValues` + */ + @deprecated(since = "3.3", message = "Use processValues(FixedKeyProcessorSupplier, String*) instead.") + def transformValues[VR]( + valueTransformerSupplier: ValueTransformerSupplier[V, VR], + stateStoreNames: String* + ): KStream[K, VR] = + new KStream(inner.transformValues[VR](valueTransformerSupplier, stateStoreNames: _*)) + + /** + * Transform the value of each input record into a new value (with possible new type) of the output record. + * A `ValueTransformer` (provided by the given `ValueTransformerSupplier`) is applied to each input + * record value and computes a new value for it. + * In order to assign a state, the state must be created and added via `addStateStore` before they can be connected + * to the `ValueTransformer`. + * It's not required to connect global state stores that are added via `addGlobalStore`; + * read-only access to global state stores is available by default. + * + * @param valueTransformerSupplier a instance of `ValueTransformerSupplier` that generates a `ValueTransformer` + * @param named a [[Named]] config used to name the processor in the topology + * @param stateStoreNames the names of the state stores used by the processor + * @return a [[KStream]] that contains records with unmodified key and new values (possibly of different type) + * @see `org.apache.kafka.streams.kstream.KStream#transformValues` + */ + @deprecated(since = "3.3", message = "Use processValues(FixedKeyProcessorSupplier, Named, String*) instead.") + def transformValues[VR]( + valueTransformerSupplier: ValueTransformerSupplier[V, VR], + named: Named, + stateStoreNames: String* + ): KStream[K, VR] = + new KStream(inner.transformValues[VR](valueTransformerSupplier, named, stateStoreNames: _*)) + + /** + * Transform the value of each input record into a new value (with possible new type) of the output record. + * A `ValueTransformer` (provided by the given `ValueTransformerSupplier`) is applied to each input + * record value and computes a new value for it. + * In order to assign a state, the state must be created and added via `addStateStore` before they can be connected + * to the `ValueTransformer`. + * It's not required to connect global state stores that are added via `addGlobalStore`; + * read-only access to global state stores is available by default. + * + * @param valueTransformerSupplier a instance of `ValueTransformerWithKeySupplier` that generates a `ValueTransformerWithKey` + * @param stateStoreNames the names of the state stores used by the processor + * @return a [[KStream]] that contains records with unmodified key and new values (possibly of different type) + * @see `org.apache.kafka.streams.kstream.KStream#transformValues` + */ + @deprecated(since = "3.3", message = "Use processValues(FixedKeyProcessorSupplier, String*) instead.") + def transformValues[VR]( + valueTransformerSupplier: ValueTransformerWithKeySupplier[K, V, VR], + stateStoreNames: String* + ): KStream[K, VR] = + new KStream(inner.transformValues[VR](valueTransformerSupplier, stateStoreNames: _*)) + + /** + * Transform the value of each input record into a new value (with possible new type) of the output record. + * A `ValueTransformer` (provided by the given `ValueTransformerSupplier`) is applied to each input + * record value and computes a new value for it. + * In order to assign a state, the state must be created and added via `addStateStore` before they can be connected + * to the `ValueTransformer`. + * It's not required to connect global state stores that are added via `addGlobalStore`; + * read-only access to global state stores is available by default. + * + * @param valueTransformerSupplier a instance of `ValueTransformerWithKeySupplier` that generates a `ValueTransformerWithKey` + * @param named a [[Named]] config used to name the processor in the topology + * @param stateStoreNames the names of the state stores used by the processor + * @return a [[KStream]] that contains records with unmodified key and new values (possibly of different type) + * @see `org.apache.kafka.streams.kstream.KStream#transformValues` + */ + @deprecated(since = "3.3", message = "Use processValues(FixedKeyProcessorSupplier, Named, String*) instead.") + def transformValues[VR]( + valueTransformerSupplier: ValueTransformerWithKeySupplier[K, V, VR], + named: Named, + stateStoreNames: String* + ): KStream[K, VR] = + new KStream(inner.transformValues[VR](valueTransformerSupplier, named, stateStoreNames: _*)) + + /** + * Process all records in this stream, one record at a time, by applying a `Processor` (provided by the given + * `processorSupplier`). + * In order to assign a state, the state must be created and added via `addStateStore` before they can be connected + * to the `Processor`. + * It's not required to connect global state stores that are added via `addGlobalStore`; + * read-only access to global state stores is available by default. + * + * @param processorSupplier a function that generates a [[org.apache.kafka.streams.processor.Processor]] + * @param stateStoreNames the names of the state store used by the processor + * @see `org.apache.kafka.streams.kstream.KStream#process` + */ + @deprecated(since = "3.0", message = "Use process(ProcessorSupplier, String*) instead.") + def process( + processorSupplier: () => org.apache.kafka.streams.processor.Processor[K, V], + stateStoreNames: String* + ): Unit = { + val processorSupplierJ: org.apache.kafka.streams.processor.ProcessorSupplier[K, V] = () => processorSupplier() + inner.process(processorSupplierJ, stateStoreNames: _*) + } + + /** + * Process all records in this stream, one record at a time, by applying a `Processor` (provided by the given + * `processorSupplier`). + * In order to assign a state, the state must be created and added via `addStateStore` before they can be connected + * to the `Processor`. + * It's not required to connect global state stores that are added via `addGlobalStore`; + * read-only access to global state stores is available by default. + * + * @param processorSupplier a function that generates a [[org.apache.kafka.streams.processor.Processor]] + * @param named a [[Named]] config used to name the processor in the topology + * @param stateStoreNames the names of the state store used by the processor + * @see `org.apache.kafka.streams.kstream.KStream#process` + */ + @deprecated(since = "3.0", message = "Use process(ProcessorSupplier, String*) instead.") + def process( + processorSupplier: () => org.apache.kafka.streams.processor.Processor[K, V], + named: Named, + stateStoreNames: String* + ): Unit = { + val processorSupplierJ: org.apache.kafka.streams.processor.ProcessorSupplier[K, V] = () => processorSupplier() + inner.process(processorSupplierJ, named, stateStoreNames: _*) + } + + /** + * Process all records in this stream, one record at a time, by applying a `Processor` (provided by the given + * `processorSupplier`). + * In order to assign a state, the state must be created and added via `addStateStore` before they can be connected + * to the `Processor`. + * It's not required to connect global state stores that are added via `addGlobalStore`; + * read-only access to global state stores is available by default. + * + * Note that this overload takes a ProcessorSupplier instead of a Function to avoid post-erasure ambiguity with + * the older (deprecated) overload. + * + * @param processorSupplier a supplier for [[org.apache.kafka.streams.processor.api.Processor]] + * @param stateStoreNames the names of the state store used by the processor + * @see `org.apache.kafka.streams.kstream.KStream#process` + */ + def process[KR, VR](processorSupplier: ProcessorSupplier[K, V, KR, VR], stateStoreNames: String*): KStream[KR, VR] = + new KStream(inner.process(processorSupplier, stateStoreNames: _*)) + + /** + * Process all records in this stream, one record at a time, by applying a `Processor` (provided by the given + * `processorSupplier`). + * In order to assign a state, the state must be created and added via `addStateStore` before they can be connected + * to the `Processor`. + * It's not required to connect global state stores that are added via `addGlobalStore`; + * read-only access to global state stores is available by default. + * + * Note that this overload takes a ProcessorSupplier instead of a Function to avoid post-erasure ambiguity with + * the older (deprecated) overload. + * + * @param processorSupplier a supplier for [[org.apache.kafka.streams.processor.api.Processor]] + * @param named a [[Named]] config used to name the processor in the topology + * @param stateStoreNames the names of the state store used by the processor + * @see `org.apache.kafka.streams.kstream.KStream#process` + */ + def process[KR, VR]( + processorSupplier: ProcessorSupplier[K, V, KR, VR], + named: Named, + stateStoreNames: String* + ): KStream[KR, VR] = + new KStream(inner.process(processorSupplier, named, stateStoreNames: _*)) + + /** + * Process all records in this stream, one record at a time, by applying a `FixedKeyProcessor` (provided by the given + * `processorSupplier`). + * In order to assign a state, the state must be created and added via `addStateStore` before they can be connected + * to the `FixedKeyProcessor`. + * It's not required to connect global state stores that are added via `addGlobalStore`; + * read-only access to global state stores is available by default. + * + * Note that this overload takes a FixedKeyProcessorSupplier instead of a Function to avoid post-erasure ambiguity with + * the older (deprecated) overload. + * + * @param processorSupplier a supplier for [[org.apache.kafka.streams.processor.api.FixedKeyProcessor]] + * @param stateStoreNames the names of the state store used by the processor + * @see `org.apache.kafka.streams.kstream.KStream#process` + */ + def processValues[VR]( + processorSupplier: FixedKeyProcessorSupplier[K, V, VR], + stateStoreNames: String* + ): KStream[K, VR] = + new KStream(inner.processValues(processorSupplier, stateStoreNames: _*)) + + /** + * Process all records in this stream, one record at a time, by applying a `FixedKeyProcessor` (provided by the given + * `processorSupplier`). + * In order to assign a state, the state must be created and added via `addStateStore` before they can be connected + * to the `FixedKeyProcessor`. + * It's not required to connect global state stores that are added via `addGlobalStore`; + * read-only access to global state stores is available by default. + * + * Note that this overload takes a ProcessorSupplier instead of a Function to avoid post-erasure ambiguity with + * the older (deprecated) overload. + * + * @param processorSupplier a supplier for [[org.apache.kafka.streams.processor.api.FixedKeyProcessor]] + * @param named a [[Named]] config used to name the processor in the topology + * @param stateStoreNames the names of the state store used by the processor + * @see `org.apache.kafka.streams.kstream.KStream#process` + */ + def processValues[VR]( + processorSupplier: FixedKeyProcessorSupplier[K, V, VR], + named: Named, + stateStoreNames: String* + ): KStream[K, VR] = + new KStream(inner.processValues(processorSupplier, named, stateStoreNames: _*)) + + /** + * Group the records by their current key into a [[KGroupedStream]] + *

+ * The user can either supply the `Grouped` instance as an implicit in scope or they can also provide an implicit + * serdes that will be converted to a `Grouped` instance implicitly. + *

+ * {{{ + * Example: + * + * // brings implicit serdes in scope + * import Serdes._ + * + * val clicksPerRegion: KTable[String, Long] = + * userClicksStream + * .leftJoin(userRegionsTable, (clicks: Long, region: String) => (if (region == null) "UNKNOWN" else region, clicks)) + * .map((_, regionWithClicks) => regionWithClicks) + * + * // the groupByKey gets the Grouped instance through an implicit conversion of the + * // serdes brought into scope through the import Serdes._ above + * .groupByKey + * .reduce(_ + _) + * + * // Similarly you can create an implicit Grouped and it will be passed implicitly + * // to the groupByKey call + * }}} + * + * @param grouped the instance of Grouped that gives the serdes + * @return a [[KGroupedStream]] that contains the grouped records of the original [[KStream]] + * @see `org.apache.kafka.streams.kstream.KStream#groupByKey` + */ + def groupByKey(implicit grouped: Grouped[K, V]): KGroupedStream[K, V] = + new KGroupedStream(inner.groupByKey(grouped)) + + /** + * Group the records of this [[KStream]] on a new key that is selected using the provided key transformation function + * and the `Grouped` instance. + *

+ * The user can either supply the `Grouped` instance as an implicit in scope or they can also provide an implicit + * serdes that will be converted to a `Grouped` instance implicitly. + *

+ * {{{ + * Example: + * + * // brings implicit serdes in scope + * import Serdes._ + * + * val textLines = streamBuilder.stream[String, String](inputTopic) + * + * val pattern = Pattern.compile("\\W+", Pattern.UNICODE_CHARACTER_CLASS) + * + * val wordCounts: KTable[String, Long] = + * textLines.flatMapValues(v => pattern.split(v.toLowerCase)) + * + * // the groupBy gets the Grouped instance through an implicit conversion of the + * // serdes brought into scope through the import Serdes._ above + * .groupBy((k, v) => v) + * + * .count() + * }}} + * + * @param selector a function that computes a new key for grouping + * @return a [[KGroupedStream]] that contains the grouped records of the original [[KStream]] + * @see `org.apache.kafka.streams.kstream.KStream#groupBy` + */ + def groupBy[KR](selector: (K, V) => KR)(implicit grouped: Grouped[KR, V]): KGroupedStream[KR, V] = + new KGroupedStream(inner.groupBy(selector.asKeyValueMapper, grouped)) + + /** + * Join records of this stream with another [[KStream]]'s records using windowed inner equi join with + * serializers and deserializers supplied by the implicit `StreamJoined` instance. + * + * @param otherStream the [[KStream]] to be joined with this stream + * @param joiner a function that computes the join result for a pair of matching records + * @param windows the specification of the `JoinWindows` + * @param streamJoin an implicit `StreamJoin` instance that defines the serdes to be used to serialize/deserialize + * inputs and outputs of the joined streams. Instead of `StreamJoin`, the user can also supply + * key serde, value serde and other value serde in implicit scope and they will be + * converted to the instance of `Stream` through implicit conversion. The `StreamJoin` instance can + * also name the repartition topic (if required), the state stores for the join, and the join + * processor node. + * @return a [[KStream]] that contains join-records for each key and values computed by the given `joiner`, + * one for each matched record-pair with the same key and within the joining window intervals + * @see `org.apache.kafka.streams.kstream.KStream#join` + */ + def join[VO, VR](otherStream: KStream[K, VO])( + joiner: (V, VO) => VR, + windows: JoinWindows + )(implicit streamJoin: StreamJoined[K, V, VO]): KStream[K, VR] = + new KStream(inner.join[VO, VR](otherStream.inner, joiner.asValueJoiner, windows, streamJoin)) + + /** + * Join records of this stream with another [[KStream]]'s records using windowed left equi join with + * serializers and deserializers supplied by the implicit `StreamJoined` instance. + * + * @param otherStream the [[KStream]] to be joined with this stream + * @param joiner a function that computes the join result for a pair of matching records + * @param windows the specification of the `JoinWindows` + * @param streamJoin an implicit `StreamJoin` instance that defines the serdes to be used to serialize/deserialize + * inputs and outputs of the joined streams. Instead of `StreamJoin`, the user can also supply + * key serde, value serde and other value serde in implicit scope and they will be + * converted to the instance of `Stream` through implicit conversion. The `StreamJoin` instance can + * also name the repartition topic (if required), the state stores for the join, and the join + * processor node. + * @return a [[KStream]] that contains join-records for each key and values computed by the given `joiner`, + * one for each matched record-pair with the same key and within the joining window intervals + * @see `org.apache.kafka.streams.kstream.KStream#leftJoin` + */ + def leftJoin[VO, VR](otherStream: KStream[K, VO])( + joiner: (V, VO) => VR, + windows: JoinWindows + )(implicit streamJoin: StreamJoined[K, V, VO]): KStream[K, VR] = + new KStream(inner.leftJoin[VO, VR](otherStream.inner, joiner.asValueJoiner, windows, streamJoin)) + + /** + * Join records of this stream with another [[KStream]]'s records using windowed outer equi join with + * serializers and deserializers supplied by the implicit `Joined` instance. + * + * @param otherStream the [[KStream]] to be joined with this stream + * @param joiner a function that computes the join result for a pair of matching records + * @param windows the specification of the `JoinWindows` + * @param streamJoin an implicit `StreamJoin` instance that defines the serdes to be used to serialize/deserialize + * inputs and outputs of the joined streams. Instead of `StreamJoin`, the user can also supply + * key serde, value serde and other value serde in implicit scope and they will be + * converted to the instance of `Stream` through implicit conversion. The `StreamJoin` instance can + * also name the repartition topic (if required), the state stores for the join, and the join + * processor node. + * @return a [[KStream]] that contains join-records for each key and values computed by the given `joiner`, + * one for each matched record-pair with the same key and within the joining window intervals + * @see `org.apache.kafka.streams.kstream.KStream#outerJoin` + */ + def outerJoin[VO, VR](otherStream: KStream[K, VO])( + joiner: (V, VO) => VR, + windows: JoinWindows + )(implicit streamJoin: StreamJoined[K, V, VO]): KStream[K, VR] = + new KStream(inner.outerJoin[VO, VR](otherStream.inner, joiner.asValueJoiner, windows, streamJoin)) + + /** + * Join records of this stream with another [[KTable]]'s records using inner equi join with + * serializers and deserializers supplied by the implicit `Joined` instance. + * + * @param table the [[KTable]] to be joined with this stream + * @param joiner a function that computes the join result for a pair of matching records + * @param joined an implicit `Joined` instance that defines the serdes to be used to serialize/deserialize + * inputs and outputs of the joined streams. Instead of `Joined`, the user can also supply + * key serde, value serde and other value serde in implicit scope and they will be + * converted to the instance of `Joined` through implicit conversion + * @return a [[KStream]] that contains join-records for each key and values computed by the given `joiner`, + * one for each matched record-pair with the same key + * @see `org.apache.kafka.streams.kstream.KStream#join` + */ + def join[VT, VR](table: KTable[K, VT])(joiner: (V, VT) => VR)(implicit joined: Joined[K, V, VT]): KStream[K, VR] = + new KStream(inner.join[VT, VR](table.inner, joiner.asValueJoiner, joined)) + + /** + * Join records of this stream with another [[KTable]]'s records using left equi join with + * serializers and deserializers supplied by the implicit `Joined` instance. + * + * @param table the [[KTable]] to be joined with this stream + * @param joiner a function that computes the join result for a pair of matching records + * @param joined an implicit `Joined` instance that defines the serdes to be used to serialize/deserialize + * inputs and outputs of the joined streams. Instead of `Joined`, the user can also supply + * key serde, value serde and other value serde in implicit scope and they will be + * converted to the instance of `Joined` through implicit conversion + * @return a [[KStream]] that contains join-records for each key and values computed by the given `joiner`, + * one for each matched record-pair with the same key + * @see `org.apache.kafka.streams.kstream.KStream#leftJoin` + */ + def leftJoin[VT, VR](table: KTable[K, VT])(joiner: (V, VT) => VR)(implicit joined: Joined[K, V, VT]): KStream[K, VR] = + new KStream(inner.leftJoin[VT, VR](table.inner, joiner.asValueJoiner, joined)) + + /** + * Join records of this stream with `GlobalKTable`'s records using non-windowed inner equi join. + * + * @param globalKTable the `GlobalKTable` to be joined with this stream + * @param keyValueMapper a function used to map from the (key, value) of this stream + * to the key of the `GlobalKTable` + * @param joiner a function that computes the join result for a pair of matching records + * @return a [[KStream]] that contains join-records for each key and values computed by the given `joiner`, + * one output for each input [[KStream]] record + * @see `org.apache.kafka.streams.kstream.KStream#join` + */ + def join[GK, GV, RV](globalKTable: GlobalKTable[GK, GV])( + keyValueMapper: (K, V) => GK, + joiner: (V, GV) => RV + ): KStream[K, RV] = + new KStream( + inner.join[GK, GV, RV]( + globalKTable, + ((k: K, v: V) => keyValueMapper(k, v)).asKeyValueMapper, + ((v: V, gv: GV) => joiner(v, gv)).asValueJoiner + ) + ) + + /** + * Join records of this stream with `GlobalKTable`'s records using non-windowed inner equi join. + * + * @param globalKTable the `GlobalKTable` to be joined with this stream + * @param named a [[Named]] config used to name the processor in the topology + * @param keyValueMapper a function used to map from the (key, value) of this stream + * to the key of the `GlobalKTable` + * @param joiner a function that computes the join result for a pair of matching records + * @return a [[KStream]] that contains join-records for each key and values computed by the given `joiner`, + * one output for each input [[KStream]] record + * @see `org.apache.kafka.streams.kstream.KStream#join` + */ + def join[GK, GV, RV](globalKTable: GlobalKTable[GK, GV], named: Named)( + keyValueMapper: (K, V) => GK, + joiner: (V, GV) => RV + ): KStream[K, RV] = + new KStream( + inner.join[GK, GV, RV]( + globalKTable, + ((k: K, v: V) => keyValueMapper(k, v)).asKeyValueMapper, + ((v: V, gv: GV) => joiner(v, gv)).asValueJoiner, + named + ) + ) + + /** + * Join records of this stream with `GlobalKTable`'s records using non-windowed left equi join. + * + * @param globalKTable the `GlobalKTable` to be joined with this stream + * @param keyValueMapper a function used to map from the (key, value) of this stream + * to the key of the `GlobalKTable` + * @param joiner a function that computes the join result for a pair of matching records + * @return a [[KStream]] that contains join-records for each key and values computed by the given `joiner`, + * one output for each input [[KStream]] record + * @see `org.apache.kafka.streams.kstream.KStream#leftJoin` + */ + def leftJoin[GK, GV, RV](globalKTable: GlobalKTable[GK, GV])( + keyValueMapper: (K, V) => GK, + joiner: (V, GV) => RV + ): KStream[K, RV] = + new KStream(inner.leftJoin[GK, GV, RV](globalKTable, keyValueMapper.asKeyValueMapper, joiner.asValueJoiner)) + + /** + * Join records of this stream with `GlobalKTable`'s records using non-windowed left equi join. + * + * @param globalKTable the `GlobalKTable` to be joined with this stream + * @param named a [[Named]] config used to name the processor in the topology + * @param keyValueMapper a function used to map from the (key, value) of this stream + * to the key of the `GlobalKTable` + * @param joiner a function that computes the join result for a pair of matching records + * @return a [[KStream]] that contains join-records for each key and values computed by the given `joiner`, + * one output for each input [[KStream]] record + * @see `org.apache.kafka.streams.kstream.KStream#leftJoin` + */ + def leftJoin[GK, GV, RV](globalKTable: GlobalKTable[GK, GV], named: Named)( + keyValueMapper: (K, V) => GK, + joiner: (V, GV) => RV + ): KStream[K, RV] = + new KStream(inner.leftJoin[GK, GV, RV](globalKTable, keyValueMapper.asKeyValueMapper, joiner.asValueJoiner, named)) + + /** + * Merge this stream and the given stream into one larger stream. + *

+ * There is no ordering guarantee between records from this `KStream` and records from the provided `KStream` + * in the merged stream. Relative order is preserved within each input stream though (ie, records within + * one input stream are processed in order). + * + * @param stream a stream which is to be merged into this stream + * @return a merged stream containing all records from this and the provided [[KStream]] + * @see `org.apache.kafka.streams.kstream.KStream#merge` + */ + def merge(stream: KStream[K, V]): KStream[K, V] = + new KStream(inner.merge(stream.inner)) + + /** + * Merge this stream and the given stream into one larger stream. + *

+ * There is no ordering guarantee between records from this `KStream` and records from the provided `KStream` + * in the merged stream. Relative order is preserved within each input stream though (ie, records within + * one input stream are processed in order). + * + * @param named a [[Named]] config used to name the processor in the topology + * @param stream a stream which is to be merged into this stream + * @return a merged stream containing all records from this and the provided [[KStream]] + * @see `org.apache.kafka.streams.kstream.KStream#merge` + */ + def merge(stream: KStream[K, V], named: Named): KStream[K, V] = + new KStream(inner.merge(stream.inner, named)) + + /** + * Perform an action on each record of `KStream`. + *

+ * Peek is a non-terminal operation that triggers a side effect (such as logging or statistics collection) + * and returns an unchanged stream. + * + * @param action an action to perform on each record + * @see `org.apache.kafka.streams.kstream.KStream#peek` + */ + def peek(action: (K, V) => Unit): KStream[K, V] = + new KStream(inner.peek(action.asForeachAction)) + + /** + * Perform an action on each record of `KStream`. + *

+ * Peek is a non-terminal operation that triggers a side effect (such as logging or statistics collection) + * and returns an unchanged stream. + * + * @param action an action to perform on each record + * @param named a [[Named]] config used to name the processor in the topology + * @see `org.apache.kafka.streams.kstream.KStream#peek` + */ + def peek(action: (K, V) => Unit, named: Named): KStream[K, V] = + new KStream(inner.peek(action.asForeachAction, named)) +} diff --git a/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/KTable.scala b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/KTable.scala new file mode 100644 index 0000000000000..9d8fe81f71a8e --- /dev/null +++ b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/KTable.scala @@ -0,0 +1,763 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.streams.scala +package kstream + +import org.apache.kafka.common.utils.Bytes +import org.apache.kafka.streams.kstream.{TableJoined, ValueJoiner, ValueTransformerWithKeySupplier, KTable => KTableJ} +import org.apache.kafka.streams.scala.FunctionsCompatConversions.{ + FunctionFromFunction, + KeyValueMapperFromFunction, + MapperFromFunction, + PredicateFromFunction, + ValueMapperFromFunction, + ValueMapperWithKeyFromFunction +} +import org.apache.kafka.streams.state.KeyValueStore + +/** + * Wraps the Java class [[org.apache.kafka.streams.kstream.KTable]] and delegates method calls to the underlying Java object. + * + * @tparam K Type of keys + * @tparam V Type of values + * @param inner The underlying Java abstraction for KTable + * @see `org.apache.kafka.streams.kstream.KTable` + */ +class KTable[K, V](val inner: KTableJ[K, V]) { + + /** + * Create a new [[KTable]] that consists all records of this [[KTable]] which satisfies the given + * predicate + * + * @param predicate a filter that is applied to each record + * @return a [[KTable]] that contains only those records that satisfy the given predicate + * @see `org.apache.kafka.streams.kstream.KTable#filter` + */ + def filter(predicate: (K, V) => Boolean): KTable[K, V] = + new KTable(inner.filter(predicate.asPredicate)) + + /** + * Create a new [[KTable]] that consists all records of this [[KTable]] which satisfies the given + * predicate + * + * @param predicate a filter that is applied to each record + * @param named a [[Named]] config used to name the processor in the topology + * @return a [[KTable]] that contains only those records that satisfy the given predicate + * @see `org.apache.kafka.streams.kstream.KTable#filter` + */ + def filter(predicate: (K, V) => Boolean, named: Named): KTable[K, V] = + new KTable(inner.filter(predicate.asPredicate, named)) + + /** + * Create a new [[KTable]] that consists all records of this [[KTable]] which satisfies the given + * predicate + * + * @param predicate a filter that is applied to each record + * @param materialized a `Materialized` that describes how the `StateStore` for the resulting [[KTable]] + * should be materialized. + * @return a [[KTable]] that contains only those records that satisfy the given predicate + * @see `org.apache.kafka.streams.kstream.KTable#filter` + */ + def filter(predicate: (K, V) => Boolean, materialized: Materialized[K, V, ByteArrayKeyValueStore]): KTable[K, V] = + new KTable(inner.filter(predicate.asPredicate, materialized)) + + /** + * Create a new [[KTable]] that consists all records of this [[KTable]] which satisfies the given + * predicate + * + * @param predicate a filter that is applied to each record + * @param named a [[Named]] config used to name the processor in the topology + * @param materialized a `Materialized` that describes how the `StateStore` for the resulting [[KTable]] + * should be materialized. + * @return a [[KTable]] that contains only those records that satisfy the given predicate + * @see `org.apache.kafka.streams.kstream.KTable#filter` + */ + def filter( + predicate: (K, V) => Boolean, + named: Named, + materialized: Materialized[K, V, ByteArrayKeyValueStore] + ): KTable[K, V] = + new KTable(inner.filter(predicate.asPredicate, named, materialized)) + + /** + * Create a new [[KTable]] that consists all records of this [[KTable]] which do not satisfy the given + * predicate + * + * @param predicate a filter that is applied to each record + * @return a [[KTable]] that contains only those records that do not satisfy the given predicate + * @see `org.apache.kafka.streams.kstream.KTable#filterNot` + */ + def filterNot(predicate: (K, V) => Boolean): KTable[K, V] = + new KTable(inner.filterNot(predicate.asPredicate)) + + /** + * Create a new [[KTable]] that consists all records of this [[KTable]] which do not satisfy the given + * predicate + * + * @param predicate a filter that is applied to each record + * @param named a [[Named]] config used to name the processor in the topology + * @return a [[KTable]] that contains only those records that do not satisfy the given predicate + * @see `org.apache.kafka.streams.kstream.KTable#filterNot` + */ + def filterNot(predicate: (K, V) => Boolean, named: Named): KTable[K, V] = + new KTable(inner.filterNot(predicate.asPredicate, named)) + + /** + * Create a new [[KTable]] that consists all records of this [[KTable]] which do not satisfy the given + * predicate + * + * @param predicate a filter that is applied to each record + * @param materialized a `Materialized` that describes how the `StateStore` for the resulting [[KTable]] + * should be materialized. + * @return a [[KTable]] that contains only those records that do not satisfy the given predicate + * @see `org.apache.kafka.streams.kstream.KTable#filterNot` + */ + def filterNot(predicate: (K, V) => Boolean, materialized: Materialized[K, V, ByteArrayKeyValueStore]): KTable[K, V] = + new KTable(inner.filterNot(predicate.asPredicate, materialized)) + + /** + * Create a new [[KTable]] that consists all records of this [[KTable]] which do not satisfy the given + * predicate + * + * @param predicate a filter that is applied to each record + * @param named a [[Named]] config used to name the processor in the topology + * @param materialized a `Materialized` that describes how the `StateStore` for the resulting [[KTable]] + * should be materialized. + * @return a [[KTable]] that contains only those records that do not satisfy the given predicate + * @see `org.apache.kafka.streams.kstream.KTable#filterNot` + */ + def filterNot( + predicate: (K, V) => Boolean, + named: Named, + materialized: Materialized[K, V, ByteArrayKeyValueStore] + ): KTable[K, V] = + new KTable(inner.filterNot(predicate.asPredicate, named, materialized)) + + /** + * Create a new [[KTable]] by transforming the value of each record in this [[KTable]] into a new value + * (with possible new type) in the new [[KTable]]. + *

+ * The provided `mapper`, a function `V => VR` is applied to each input record value and computes a new value for it + * + * @param mapper , a function `V => VR` that computes a new output value + * @return a [[KTable]] that contains records with unmodified key and new values (possibly of different type) + * @see `org.apache.kafka.streams.kstream.KTable#mapValues` + */ + def mapValues[VR](mapper: V => VR): KTable[K, VR] = + new KTable(inner.mapValues[VR](mapper.asValueMapper)) + + /** + * Create a new [[KTable]] by transforming the value of each record in this [[KTable]] into a new value + * (with possible new type) in the new [[KTable]]. + *

+ * The provided `mapper`, a function `V => VR` is applied to each input record value and computes a new value for it + * + * @param mapper , a function `V => VR` that computes a new output value + * @param named a [[Named]] config used to name the processor in the topology + * @return a [[KTable]] that contains records with unmodified key and new values (possibly of different type) + * @see `org.apache.kafka.streams.kstream.KTable#mapValues` + */ + def mapValues[VR](mapper: V => VR, named: Named): KTable[K, VR] = + new KTable(inner.mapValues[VR](mapper.asValueMapper, named)) + + /** + * Create a new [[KTable]] by transforming the value of each record in this [[KTable]] into a new value + * (with possible new type) in the new [[KTable]]. + *

+ * The provided `mapper`, a function `V => VR` is applied to each input record value and computes a new value for it + * + * @param mapper , a function `V => VR` that computes a new output value + * @param materialized a `Materialized` that describes how the `StateStore` for the resulting [[KTable]] + * should be materialized. + * @return a [[KTable]] that contains records with unmodified key and new values (possibly of different type) + * @see `org.apache.kafka.streams.kstream.KTable#mapValues` + */ + def mapValues[VR](mapper: V => VR, materialized: Materialized[K, VR, ByteArrayKeyValueStore]): KTable[K, VR] = + new KTable(inner.mapValues[VR](mapper.asValueMapper, materialized)) + + /** + * Create a new [[KTable]] by transforming the value of each record in this [[KTable]] into a new value + * (with possible new type) in the new [[KTable]]. + *

+ * The provided `mapper`, a function `V => VR` is applied to each input record value and computes a new value for it + * + * @param mapper , a function `V => VR` that computes a new output value + * @param named a [[Named]] config used to name the processor in the topology + * @param materialized a `Materialized` that describes how the `StateStore` for the resulting [[KTable]] + * should be materialized. + * @return a [[KTable]] that contains records with unmodified key and new values (possibly of different type) + * @see `org.apache.kafka.streams.kstream.KTable#mapValues` + */ + def mapValues[VR]( + mapper: V => VR, + named: Named, + materialized: Materialized[K, VR, ByteArrayKeyValueStore] + ): KTable[K, VR] = + new KTable(inner.mapValues[VR](mapper.asValueMapper, named, materialized)) + + /** + * Create a new [[KTable]] by transforming the value of each record in this [[KTable]] into a new value + * (with possible new type) in the new [[KTable]]. + *

+ * The provided `mapper`, a function `(K, V) => VR` is applied to each input record value and computes a new value for it + * + * @param mapper , a function `(K, V) => VR` that computes a new output value + * @return a [[KTable]] that contains records with unmodified key and new values (possibly of different type) + * @see `org.apache.kafka.streams.kstream.KTable#mapValues` + */ + def mapValues[VR](mapper: (K, V) => VR): KTable[K, VR] = + new KTable(inner.mapValues[VR](mapper.asValueMapperWithKey)) + + /** + * Create a new [[KTable]] by transforming the value of each record in this [[KTable]] into a new value + * (with possible new type) in the new [[KTable]]. + *

+ * The provided `mapper`, a function `(K, V) => VR` is applied to each input record value and computes a new value for it + * + * @param mapper , a function `(K, V) => VR` that computes a new output value + * @param named a [[Named]] config used to name the processor in the topology + * @return a [[KTable]] that contains records with unmodified key and new values (possibly of different type) + * @see `org.apache.kafka.streams.kstream.KTable#mapValues` + */ + def mapValues[VR](mapper: (K, V) => VR, named: Named): KTable[K, VR] = + new KTable(inner.mapValues[VR](mapper.asValueMapperWithKey, named)) + + /** + * Create a new [[KTable]] by transforming the value of each record in this [[KTable]] into a new value + * (with possible new type) in the new [[KTable]]. + *

+ * The provided `mapper`, a function `(K, V) => VR` is applied to each input record value and computes a new value for it + * + * @param mapper , a function `(K, V) => VR` that computes a new output value + * @param materialized a `Materialized` that describes how the `StateStore` for the resulting [[KTable]] + * should be materialized. + * @return a [[KTable]] that contains records with unmodified key and new values (possibly of different type) + * @see `org.apache.kafka.streams.kstream.KTable#mapValues` + */ + def mapValues[VR](mapper: (K, V) => VR, materialized: Materialized[K, VR, ByteArrayKeyValueStore]): KTable[K, VR] = + new KTable(inner.mapValues[VR](mapper.asValueMapperWithKey, materialized)) + + /** + * Create a new [[KTable]] by transforming the value of each record in this [[KTable]] into a new value + * (with possible new type) in the new [[KTable]]. + *

+ * The provided `mapper`, a function `(K, V) => VR` is applied to each input record value and computes a new value for it + * + * @param mapper , a function `(K, V) => VR` that computes a new output value + * @param named a [[Named]] config used to name the processor in the topology + * @param materialized a `Materialized` that describes how the `StateStore` for the resulting [[KTable]] + * should be materialized. + * @return a [[KTable]] that contains records with unmodified key and new values (possibly of different type) + * @see `org.apache.kafka.streams.kstream.KTable#mapValues` + */ + def mapValues[VR]( + mapper: (K, V) => VR, + named: Named, + materialized: Materialized[K, VR, ByteArrayKeyValueStore] + ): KTable[K, VR] = + new KTable(inner.mapValues[VR](mapper.asValueMapperWithKey, named, materialized)) + + /** + * Convert this changelog stream to a [[KStream]]. + * + * @return a [[KStream]] that contains the same records as this [[KTable]] + * @see `org.apache.kafka.streams.kstream.KTable#toStream` + */ + def toStream: KStream[K, V] = + new KStream(inner.toStream) + + /** + * Convert this changelog stream to a [[KStream]]. + * + * @param named a [[Named]] config used to name the processor in the topology + * @return a [[KStream]] that contains the same records as this [[KTable]] + * @see `org.apache.kafka.streams.kstream.KTable#toStream` + */ + def toStream(named: Named): KStream[K, V] = + new KStream(inner.toStream(named)) + + /** + * Convert this changelog stream to a [[KStream]] using the given key/value mapper to select the new key + * + * @param mapper a function that computes a new key for each record + * @return a [[KStream]] that contains the same records as this [[KTable]] + * @see `org.apache.kafka.streams.kstream.KTable#toStream` + */ + def toStream[KR](mapper: (K, V) => KR): KStream[KR, V] = + new KStream(inner.toStream[KR](mapper.asKeyValueMapper)) + + /** + * Convert this changelog stream to a [[KStream]] using the given key/value mapper to select the new key + * + * @param mapper a function that computes a new key for each record + * @param named a [[Named]] config used to name the processor in the topology + * @return a [[KStream]] that contains the same records as this [[KTable]] + * @see `org.apache.kafka.streams.kstream.KTable#toStream` + */ + def toStream[KR](mapper: (K, V) => KR, named: Named): KStream[KR, V] = + new KStream(inner.toStream[KR](mapper.asKeyValueMapper, named)) + + /** + * Suppress some updates from this changelog stream, determined by the supplied [[org.apache.kafka.streams.kstream.Suppressed]] configuration. + * + * This controls what updates downstream table and stream operations will receive. + * + * @param suppressed Configuration object determining what, if any, updates to suppress. + * @return A new KTable with the desired suppression characteristics. + * @see `org.apache.kafka.streams.kstream.KTable#suppress` + */ + def suppress(suppressed: org.apache.kafka.streams.kstream.Suppressed[_ >: K]): KTable[K, V] = + new KTable(inner.suppress(suppressed)) + + /** + * Create a new `KTable` by transforming the value of each record in this `KTable` into a new value, (with possibly new type). + * Transform the value of each input record into a new value (with possible new type) of the output record. + * A `ValueTransformerWithKey` (provided by the given `ValueTransformerWithKeySupplier`) is applied to each input + * record value and computes a new value for it. + * This is similar to `#mapValues(ValueMapperWithKey)`, but more flexible, allowing access to additional state-stores, + * and to the `ProcessorContext`. + * If the downstream topology uses aggregation functions, (e.g. `KGroupedTable#reduce`, `KGroupedTable#aggregate`, etc), + * care must be taken when dealing with state, (either held in state-stores or transformer instances), to ensure correct + * aggregate results. + * In contrast, if the resulting KTable is materialized, (cf. `#transformValues(ValueTransformerWithKeySupplier, Materialized, String...)`), + * such concerns are handled for you. + * In order to assign a state, the state must be created and registered + * beforehand via stores added via `addStateStore` or `addGlobalStore` before they can be connected to the `Transformer` + * + * @param valueTransformerWithKeySupplier a instance of `ValueTransformerWithKeySupplier` that generates a `ValueTransformerWithKey`. + * At least one transformer instance will be created per streaming task. + * Transformer implementations doe not need to be thread-safe. + * @param stateStoreNames the names of the state stores used by the processor + * @return a [[KStream]] that contains records with unmodified key and new values (possibly of different type) + * @see `org.apache.kafka.streams.kstream.KStream#transformValues` + */ + def transformValues[VR]( + valueTransformerWithKeySupplier: ValueTransformerWithKeySupplier[K, V, VR], + stateStoreNames: String* + ): KTable[K, VR] = + new KTable(inner.transformValues[VR](valueTransformerWithKeySupplier, stateStoreNames: _*)) + + /** + * Create a new `KTable` by transforming the value of each record in this `KTable` into a new value, (with possibly new type). + * Transform the value of each input record into a new value (with possible new type) of the output record. + * A `ValueTransformerWithKey` (provided by the given `ValueTransformerWithKeySupplier`) is applied to each input + * record value and computes a new value for it. + * This is similar to `#mapValues(ValueMapperWithKey)`, but more flexible, allowing access to additional state-stores, + * and to the `ProcessorContext`. + * If the downstream topology uses aggregation functions, (e.g. `KGroupedTable#reduce`, `KGroupedTable#aggregate`, etc), + * care must be taken when dealing with state, (either held in state-stores or transformer instances), to ensure correct + * aggregate results. + * In contrast, if the resulting KTable is materialized, (cf. `#transformValues(ValueTransformerWithKeySupplier, Materialized, String...)`), + * such concerns are handled for you. + * In order to assign a state, the state must be created and registered + * beforehand via stores added via `addStateStore` or `addGlobalStore` before they can be connected to the `Transformer` + * + * @param valueTransformerWithKeySupplier a instance of `ValueTransformerWithKeySupplier` that generates a `ValueTransformerWithKey`. + * At least one transformer instance will be created per streaming task. + * Transformer implementations doe not need to be thread-safe. + * @param named a [[Named]] config used to name the processor in the topology + * @param stateStoreNames the names of the state stores used by the processor + * @return a [[KStream]] that contains records with unmodified key and new values (possibly of different type) + * @see `org.apache.kafka.streams.kstream.KStream#transformValues` + */ + def transformValues[VR]( + valueTransformerWithKeySupplier: ValueTransformerWithKeySupplier[K, V, VR], + named: Named, + stateStoreNames: String* + ): KTable[K, VR] = + new KTable(inner.transformValues[VR](valueTransformerWithKeySupplier, named, stateStoreNames: _*)) + + /** + * Create a new `KTable` by transforming the value of each record in this `KTable` into a new value, (with possibly new type). + * A `ValueTransformer` (provided by the given `ValueTransformerSupplier`) is applied to each input + * record value and computes a new value for it. + * This is similar to `#mapValues(ValueMapperWithKey)`, but more flexible, allowing stateful, rather than stateless, + * record-by-record operation, access to additional state-stores, and access to the `ProcessorContext`. + * In order to assign a state, the state must be created and registered + * beforehand via stores added via `addStateStore` or `addGlobalStore` before they can be connected to the `Transformer` + * The resulting `KTable` is materialized into another state store (additional to the provided state store names) + * as specified by the user via `Materialized` parameter, and is queryable through its given name. + * + * @param valueTransformerWithKeySupplier a instance of `ValueTransformerWithKeySupplier` that generates a `ValueTransformerWithKey` + * At least one transformer instance will be created per streaming task. + * Transformer implementations doe not need to be thread-safe. + * @param materialized an instance of `Materialized` used to describe how the state store of the + * resulting table should be materialized. + * @param stateStoreNames the names of the state stores used by the processor + * @return a [[KStream]] that contains records with unmodified key and new values (possibly of different type) + * @see `org.apache.kafka.streams.kstream.KStream#transformValues` + */ + def transformValues[VR]( + valueTransformerWithKeySupplier: ValueTransformerWithKeySupplier[K, V, VR], + materialized: Materialized[K, VR, KeyValueStore[Bytes, Array[Byte]]], + stateStoreNames: String* + ): KTable[K, VR] = + new KTable(inner.transformValues[VR](valueTransformerWithKeySupplier, materialized, stateStoreNames: _*)) + + /** + * Create a new `KTable` by transforming the value of each record in this `KTable` into a new value, (with possibly new type). + * A `ValueTransformer` (provided by the given `ValueTransformerSupplier`) is applied to each input + * record value and computes a new value for it. + * This is similar to `#mapValues(ValueMapperWithKey)`, but more flexible, allowing stateful, rather than stateless, + * record-by-record operation, access to additional state-stores, and access to the `ProcessorContext`. + * In order to assign a state, the state must be created and registered + * beforehand via stores added via `addStateStore` or `addGlobalStore` before they can be connected to the `Transformer` + * The resulting `KTable` is materialized into another state store (additional to the provided state store names) + * as specified by the user via `Materialized` parameter, and is queryable through its given name. + * + * @param valueTransformerWithKeySupplier a instance of `ValueTransformerWithKeySupplier` that generates a `ValueTransformerWithKey` + * At least one transformer instance will be created per streaming task. + * Transformer implementations doe not need to be thread-safe. + * @param materialized an instance of `Materialized` used to describe how the state store of the + * resulting table should be materialized. + * @param named a [[Named]] config used to name the processor in the topology + * @param stateStoreNames the names of the state stores used by the processor + * @return a [[KStream]] that contains records with unmodified key and new values (possibly of different type) + * @see `org.apache.kafka.streams.kstream.KStream#transformValues` + */ + def transformValues[VR]( + valueTransformerWithKeySupplier: ValueTransformerWithKeySupplier[K, V, VR], + materialized: Materialized[K, VR, KeyValueStore[Bytes, Array[Byte]]], + named: Named, + stateStoreNames: String* + ): KTable[K, VR] = + new KTable(inner.transformValues[VR](valueTransformerWithKeySupplier, materialized, named, stateStoreNames: _*)) + + /** + * Re-groups the records of this [[KTable]] using the provided key/value mapper + * and `Serde`s as specified by `Grouped`. + * + * @param selector a function that computes a new grouping key and value to be aggregated + * @param grouped the `Grouped` instance used to specify `Serdes` + * @return a [[KGroupedTable]] that contains the re-grouped records of the original [[KTable]] + * @see `org.apache.kafka.streams.kstream.KTable#groupBy` + */ + def groupBy[KR, VR](selector: (K, V) => (KR, VR))(implicit grouped: Grouped[KR, VR]): KGroupedTable[KR, VR] = + new KGroupedTable(inner.groupBy(selector.asKeyValueMapper, grouped)) + + /** + * Join records of this [[KTable]] with another [[KTable]]'s records using non-windowed inner equi join. + * + * @param other the other [[KTable]] to be joined with this [[KTable]] + * @param joiner a function that computes the join result for a pair of matching records + * @return a [[KTable]] that contains join-records for each key and values computed by the given joiner, + * one for each matched record-pair with the same key + * @see `org.apache.kafka.streams.kstream.KTable#join` + */ + def join[VO, VR](other: KTable[K, VO])(joiner: (V, VO) => VR): KTable[K, VR] = + new KTable(inner.join[VO, VR](other.inner, joiner.asValueJoiner)) + + /** + * Join records of this [[KTable]] with another [[KTable]]'s records using non-windowed inner equi join. + * + * @param other the other [[KTable]] to be joined with this [[KTable]] + * @param named a [[Named]] config used to name the processor in the topology + * @param joiner a function that computes the join result for a pair of matching records + * @return a [[KTable]] that contains join-records for each key and values computed by the given joiner, + * one for each matched record-pair with the same key + * @see `org.apache.kafka.streams.kstream.KTable#join` + */ + def join[VO, VR](other: KTable[K, VO], named: Named)(joiner: (V, VO) => VR): KTable[K, VR] = + new KTable(inner.join[VO, VR](other.inner, joiner.asValueJoiner, named)) + + /** + * Join records of this [[KTable]] with another [[KTable]]'s records using non-windowed inner equi join. + * + * @param other the other [[KTable]] to be joined with this [[KTable]] + * @param joiner a function that computes the join result for a pair of matching records + * @param materialized a `Materialized` that describes how the `StateStore` for the resulting [[KTable]] + * should be materialized. + * @return a [[KTable]] that contains join-records for each key and values computed by the given joiner, + * one for each matched record-pair with the same key + * @see `org.apache.kafka.streams.kstream.KTable#join` + */ + def join[VO, VR](other: KTable[K, VO], materialized: Materialized[K, VR, ByteArrayKeyValueStore])( + joiner: (V, VO) => VR + ): KTable[K, VR] = + new KTable(inner.join[VO, VR](other.inner, joiner.asValueJoiner, materialized)) + + /** + * Join records of this [[KTable]] with another [[KTable]]'s records using non-windowed inner equi join. + * + * @param other the other [[KTable]] to be joined with this [[KTable]] + * @param joiner a function that computes the join result for a pair of matching records + * @param named a [[Named]] config used to name the processor in the topology + * @param materialized a `Materialized` that describes how the `StateStore` for the resulting [[KTable]] + * should be materialized. + * @return a [[KTable]] that contains join-records for each key and values computed by the given joiner, + * one for each matched record-pair with the same key + * @see `org.apache.kafka.streams.kstream.KTable#join` + */ + def join[VO, VR](other: KTable[K, VO], named: Named, materialized: Materialized[K, VR, ByteArrayKeyValueStore])( + joiner: (V, VO) => VR + ): KTable[K, VR] = + new KTable(inner.join[VO, VR](other.inner, joiner.asValueJoiner, named, materialized)) + + /** + * Join records of this [[KTable]] with another [[KTable]]'s records using non-windowed left equi join. + * + * @param other the other [[KTable]] to be joined with this [[KTable]] + * @param joiner a function that computes the join result for a pair of matching records + * @return a [[KTable]] that contains join-records for each key and values computed by the given joiner, + * one for each matched record-pair with the same key + * @see `org.apache.kafka.streams.kstream.KTable#leftJoin` + */ + def leftJoin[VO, VR](other: KTable[K, VO])(joiner: (V, VO) => VR): KTable[K, VR] = + new KTable(inner.leftJoin[VO, VR](other.inner, joiner.asValueJoiner)) + + /** + * Join records of this [[KTable]] with another [[KTable]]'s records using non-windowed left equi join. + * + * @param other the other [[KTable]] to be joined with this [[KTable]] + * @param named a [[Named]] config used to name the processor in the topology + * @param joiner a function that computes the join result for a pair of matching records + * @return a [[KTable]] that contains join-records for each key and values computed by the given joiner, + * one for each matched record-pair with the same key + * @see `org.apache.kafka.streams.kstream.KTable#leftJoin` + */ + def leftJoin[VO, VR](other: KTable[K, VO], named: Named)(joiner: (V, VO) => VR): KTable[K, VR] = + new KTable(inner.leftJoin[VO, VR](other.inner, joiner.asValueJoiner, named)) + + /** + * Join records of this [[KTable]] with another [[KTable]]'s records using non-windowed left equi join. + * + * @param other the other [[KTable]] to be joined with this [[KTable]] + * @param joiner a function that computes the join result for a pair of matching records + * @param materialized a `Materialized` that describes how the `StateStore` for the resulting [[KTable]] + * should be materialized. + * @return a [[KTable]] that contains join-records for each key and values computed by the given joiner, + * one for each matched record-pair with the same key + * @see `org.apache.kafka.streams.kstream.KTable#leftJoin` + */ + def leftJoin[VO, VR](other: KTable[K, VO], materialized: Materialized[K, VR, ByteArrayKeyValueStore])( + joiner: (V, VO) => VR + ): KTable[K, VR] = + new KTable(inner.leftJoin[VO, VR](other.inner, joiner.asValueJoiner, materialized)) + + /** + * Join records of this [[KTable]] with another [[KTable]]'s records using non-windowed left equi join. + * + * @param other the other [[KTable]] to be joined with this [[KTable]] + * @param named a [[Named]] config used to name the processor in the topology + * @param joiner a function that computes the join result for a pair of matching records + * @param materialized a `Materialized` that describes how the `StateStore` for the resulting [[KTable]] + * should be materialized. + * @return a [[KTable]] that contains join-records for each key and values computed by the given joiner, + * one for each matched record-pair with the same key + * @see `org.apache.kafka.streams.kstream.KTable#leftJoin` + */ + def leftJoin[VO, VR](other: KTable[K, VO], named: Named, materialized: Materialized[K, VR, ByteArrayKeyValueStore])( + joiner: (V, VO) => VR + ): KTable[K, VR] = + new KTable(inner.leftJoin[VO, VR](other.inner, joiner.asValueJoiner, named, materialized)) + + /** + * Join records of this [[KTable]] with another [[KTable]]'s records using non-windowed outer equi join. + * + * @param other the other [[KTable]] to be joined with this [[KTable]] + * @param joiner a function that computes the join result for a pair of matching records + * @return a [[KTable]] that contains join-records for each key and values computed by the given joiner, + * one for each matched record-pair with the same key + * @see `org.apache.kafka.streams.kstream.KTable#leftJoin` + */ + def outerJoin[VO, VR](other: KTable[K, VO])(joiner: (V, VO) => VR): KTable[K, VR] = + new KTable(inner.outerJoin[VO, VR](other.inner, joiner.asValueJoiner)) + + /** + * Join records of this [[KTable]] with another [[KTable]]'s records using non-windowed outer equi join. + * + * @param other the other [[KTable]] to be joined with this [[KTable]] + * @param named a [[Named]] config used to name the processor in the topology + * @param joiner a function that computes the join result for a pair of matching records + * @return a [[KTable]] that contains join-records for each key and values computed by the given joiner, + * one for each matched record-pair with the same key + * @see `org.apache.kafka.streams.kstream.KTable#leftJoin` + */ + def outerJoin[VO, VR](other: KTable[K, VO], named: Named)(joiner: (V, VO) => VR): KTable[K, VR] = + new KTable(inner.outerJoin[VO, VR](other.inner, joiner.asValueJoiner, named)) + + /** + * Join records of this [[KTable]] with another [[KTable]]'s records using non-windowed outer equi join. + * + * @param other the other [[KTable]] to be joined with this [[KTable]] + * @param joiner a function that computes the join result for a pair of matching records + * @param materialized a `Materialized` that describes how the `StateStore` for the resulting [[KTable]] + * should be materialized. + * @return a [[KTable]] that contains join-records for each key and values computed by the given joiner, + * one for each matched record-pair with the same key + * @see `org.apache.kafka.streams.kstream.KTable#leftJoin` + */ + def outerJoin[VO, VR](other: KTable[K, VO], materialized: Materialized[K, VR, ByteArrayKeyValueStore])( + joiner: (V, VO) => VR + ): KTable[K, VR] = + new KTable(inner.outerJoin[VO, VR](other.inner, joiner.asValueJoiner, materialized)) + + /** + * Join records of this [[KTable]] with another [[KTable]]'s records using non-windowed outer equi join. + * + * @param other the other [[KTable]] to be joined with this [[KTable]] + * @param named a [[Named]] config used to name the processor in the topology + * @param joiner a function that computes the join result for a pair of matching records + * @param materialized a `Materialized` that describes how the `StateStore` for the resulting [[KTable]] + * should be materialized. + * @return a [[KTable]] that contains join-records for each key and values computed by the given joiner, + * one for each matched record-pair with the same key + * @see `org.apache.kafka.streams.kstream.KTable#leftJoin` + */ + def outerJoin[VO, VR](other: KTable[K, VO], named: Named, materialized: Materialized[K, VR, ByteArrayKeyValueStore])( + joiner: (V, VO) => VR + ): KTable[K, VR] = + new KTable(inner.outerJoin[VO, VR](other.inner, joiner.asValueJoiner, named, materialized)) + + /** + * Join records of this [[KTable]] with another [[KTable]]'s records using non-windowed inner join. Records from this + * table are joined according to the result of keyExtractor on the other KTable. + * + * @param other the other [[KTable]] to be joined with this [[KTable]], keyed on the value obtained from keyExtractor + * @param keyExtractor a function that extracts the foreign key from this table's value + * @param joiner a function that computes the join result for a pair of matching records + * @param materialized a `Materialized` that describes how the `StateStore` for the resulting [[KTable]] + * should be materialized. + * @return a [[KTable]] that contains join-records for each key and values computed by the given joiner, + * one for each matched record-pair with the same key + */ + def join[VR, KO, VO]( + other: KTable[KO, VO], + keyExtractor: Function[V, KO], + joiner: ValueJoiner[V, VO, VR], + materialized: Materialized[K, VR, KeyValueStore[Bytes, Array[Byte]]] + ): KTable[K, VR] = + new KTable(inner.join(other.inner, keyExtractor.asJavaFunction, joiner, materialized)) + + /** + * Join records of this [[KTable]] with another [[KTable]]'s records using non-windowed inner join. Records from this + * table are joined according to the result of keyExtractor on the other KTable. + * + * @param other the other [[KTable]] to be joined with this [[KTable]], keyed on the value obtained from keyExtractor + * @param keyExtractor a function that extracts the foreign key from this table's value + * @param joiner a function that computes the join result for a pair of matching records + * @param named a [[Named]] config used to name the processor in the topology + * @param materialized a `Materialized` that describes how the `StateStore` for the resulting [[KTable]] + * should be materialized. + * @return a [[KTable]] that contains join-records for each key and values computed by the given joiner, + * one for each matched record-pair with the same key + */ + @deprecated("Use join(KTable, Function, ValueJoiner, TableJoined, Materialized) instead", since = "3.1") + def join[VR, KO, VO]( + other: KTable[KO, VO], + keyExtractor: Function[V, KO], + joiner: ValueJoiner[V, VO, VR], + named: Named, + materialized: Materialized[K, VR, KeyValueStore[Bytes, Array[Byte]]] + ): KTable[K, VR] = + new KTable(inner.join(other.inner, keyExtractor.asJavaFunction, joiner, named, materialized)) + + /** + * Join records of this [[KTable]] with another [[KTable]]'s records using non-windowed inner join. Records from this + * table are joined according to the result of keyExtractor on the other KTable. + * + * @param other the other [[KTable]] to be joined with this [[KTable]], keyed on the value obtained from keyExtractor + * @param keyExtractor a function that extracts the foreign key from this table's value + * @param joiner a function that computes the join result for a pair of matching records + * @param tableJoined a [[TableJoined]] used to configure partitioners and names of internal topics and stores + * @param materialized a `Materialized` that describes how the `StateStore` for the resulting [[KTable]] + * should be materialized. + * @return a [[KTable]] that contains join-records for each key and values computed by the given joiner, + * one for each matched record-pair with the same key + */ + def join[VR, KO, VO]( + other: KTable[KO, VO], + keyExtractor: Function[V, KO], + joiner: ValueJoiner[V, VO, VR], + tableJoined: TableJoined[K, KO], + materialized: Materialized[K, VR, KeyValueStore[Bytes, Array[Byte]]] + ): KTable[K, VR] = + new KTable(inner.join(other.inner, keyExtractor.asJavaFunction, joiner, tableJoined, materialized)) + + /** + * Join records of this [[KTable]] with another [[KTable]]'s records using non-windowed left join. Records from this + * table are joined according to the result of keyExtractor on the other KTable. + * + * @param other the other [[KTable]] to be joined with this [[KTable]], keyed on the value obtained from keyExtractor + * @param keyExtractor a function that extracts the foreign key from this table's value + * @param joiner a function that computes the join result for a pair of matching records + * @param materialized a `Materialized` that describes how the `StateStore` for the resulting [[KTable]] + * should be materialized. + * @return a [[KTable]] that contains join-records for each key and values computed by the given joiner, + * one for each matched record-pair with the same key + */ + def leftJoin[VR, KO, VO]( + other: KTable[KO, VO], + keyExtractor: Function[V, KO], + joiner: ValueJoiner[V, VO, VR], + materialized: Materialized[K, VR, KeyValueStore[Bytes, Array[Byte]]] + ): KTable[K, VR] = + new KTable(inner.leftJoin(other.inner, keyExtractor.asJavaFunction, joiner, materialized)) + + /** + * Join records of this [[KTable]] with another [[KTable]]'s records using non-windowed left join. Records from this + * table are joined according to the result of keyExtractor on the other KTable. + * + * @param other the other [[KTable]] to be joined with this [[KTable]], keyed on the value obtained from keyExtractor + * @param keyExtractor a function that extracts the foreign key from this table's value + * @param joiner a function that computes the join result for a pair of matching records + * @param named a [[Named]] config used to name the processor in the topology + * @param materialized a `Materialized` that describes how the `StateStore` for the resulting [[KTable]] + * should be materialized. + * @return a [[KTable]] that contains join-records for each key and values computed by the given joiner, + * one for each matched record-pair with the same key + */ + @deprecated("Use leftJoin(KTable, Function, ValueJoiner, TableJoined, Materialized) instead", since = "3.1") + def leftJoin[VR, KO, VO]( + other: KTable[KO, VO], + keyExtractor: Function[V, KO], + joiner: ValueJoiner[V, VO, VR], + named: Named, + materialized: Materialized[K, VR, KeyValueStore[Bytes, Array[Byte]]] + ): KTable[K, VR] = + new KTable(inner.leftJoin(other.inner, keyExtractor.asJavaFunction, joiner, named, materialized)) + + /** + * Join records of this [[KTable]] with another [[KTable]]'s records using non-windowed left join. Records from this + * table are joined according to the result of keyExtractor on the other KTable. + * + * @param other the other [[KTable]] to be joined with this [[KTable]], keyed on the value obtained from keyExtractor + * @param keyExtractor a function that extracts the foreign key from this table's value + * @param joiner a function that computes the join result for a pair of matching records + * @param tableJoined a [[TableJoined]] used to configure partitioners and names of internal topics and stores + * @param materialized a `Materialized` that describes how the `StateStore` for the resulting [[KTable]] + * should be materialized. + * @return a [[KTable]] that contains join-records for each key and values computed by the given joiner, + * one for each matched record-pair with the same key + */ + def leftJoin[VR, KO, VO]( + other: KTable[KO, VO], + keyExtractor: Function[V, KO], + joiner: ValueJoiner[V, VO, VR], + tableJoined: TableJoined[K, KO], + materialized: Materialized[K, VR, KeyValueStore[Bytes, Array[Byte]]] + ): KTable[K, VR] = + new KTable(inner.leftJoin(other.inner, keyExtractor.asJavaFunction, joiner, tableJoined, materialized)) + + /** + * Get the name of the local state store used that can be used to query this [[KTable]]. + * + * @return the underlying state store name, or `null` if this [[KTable]] cannot be queried. + */ + def queryableStoreName: String = + inner.queryableStoreName +} diff --git a/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/Materialized.scala b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/Materialized.scala new file mode 100644 index 0000000000000..421ac5afeb3ad --- /dev/null +++ b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/Materialized.scala @@ -0,0 +1,110 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.streams.scala.kstream + +import org.apache.kafka.common.serialization.Serde +import org.apache.kafka.streams.kstream.{Materialized => MaterializedJ} +import org.apache.kafka.streams.processor.StateStore +import org.apache.kafka.streams.scala.{ByteArrayKeyValueStore, ByteArraySessionStore, ByteArrayWindowStore} +import org.apache.kafka.streams.state.{KeyValueBytesStoreSupplier, SessionBytesStoreSupplier, WindowBytesStoreSupplier} + +object Materialized { + + /** + * Materialize a [[StateStore]] with the provided key and value [[Serde]]s. + * An internal name will be used for the store. + * + * @tparam K key type + * @tparam V value type + * @tparam S store type + * @param keySerde the key [[Serde]] to use. + * @param valueSerde the value [[Serde]] to use. + * @return a new [[Materialized]] instance with the given key and value serdes + */ + def `with`[K, V, S <: StateStore](implicit keySerde: Serde[K], valueSerde: Serde[V]): MaterializedJ[K, V, S] = + MaterializedJ.`with`(keySerde, valueSerde) + + /** + * Materialize a [[StateStore]] with the given name. + * + * @tparam K key type of the store + * @tparam V value type of the store + * @tparam S type of the [[StateStore]] + * @param storeName the name of the underlying [[org.apache.kafka.streams.scala.kstream.KTable]] state store; + * valid characters are ASCII alphanumerics, '.', '_' and '-'. + * @param keySerde the key serde to use. + * @param valueSerde the value serde to use. + * @return a new [[Materialized]] instance with the given storeName + */ + def as[K, V, S <: StateStore]( + storeName: String + )(implicit keySerde: Serde[K], valueSerde: Serde[V]): MaterializedJ[K, V, S] = + MaterializedJ.as(storeName).withKeySerde(keySerde).withValueSerde(valueSerde) + + /** + * Materialize a [[org.apache.kafka.streams.state.WindowStore]] using the provided [[WindowBytesStoreSupplier]]. + * + * Important: Custom subclasses are allowed here, but they should respect the retention contract: + * Window stores are required to retain windows at least as long as (window size + window grace period). + * Stores constructed via [[org.apache.kafka.streams.state.Stores]] already satisfy this contract. + * + * @tparam K key type of the store + * @tparam V value type of the store + * @param supplier the [[WindowBytesStoreSupplier]] used to materialize the store + * @param keySerde the key serde to use. + * @param valueSerde the value serde to use. + * @return a new [[Materialized]] instance with the given supplier + */ + def as[K, V]( + supplier: WindowBytesStoreSupplier + )(implicit keySerde: Serde[K], valueSerde: Serde[V]): MaterializedJ[K, V, ByteArrayWindowStore] = + MaterializedJ.as(supplier).withKeySerde(keySerde).withValueSerde(valueSerde) + + /** + * Materialize a [[org.apache.kafka.streams.state.SessionStore]] using the provided [[SessionBytesStoreSupplier]]. + * + * Important: Custom subclasses are allowed here, but they should respect the retention contract: + * Session stores are required to retain windows at least as long as (session inactivity gap + session grace period). + * Stores constructed via [[org.apache.kafka.streams.state.Stores]] already satisfy this contract. + * + * @tparam K key type of the store + * @tparam V value type of the store + * @param supplier the [[SessionBytesStoreSupplier]] used to materialize the store + * @param keySerde the key serde to use. + * @param valueSerde the value serde to use. + * @return a new [[Materialized]] instance with the given supplier + */ + def as[K, V]( + supplier: SessionBytesStoreSupplier + )(implicit keySerde: Serde[K], valueSerde: Serde[V]): MaterializedJ[K, V, ByteArraySessionStore] = + MaterializedJ.as(supplier).withKeySerde(keySerde).withValueSerde(valueSerde) + + /** + * Materialize a [[org.apache.kafka.streams.state.KeyValueStore]] using the provided [[KeyValueBytesStoreSupplier]]. + * + * @tparam K key type of the store + * @tparam V value type of the store + * @param supplier the [[KeyValueBytesStoreSupplier]] used to materialize the store + * @param keySerde the key serde to use. + * @param valueSerde the value serde to use. + * @return a new [[Materialized]] instance with the given supplier + */ + def as[K, V]( + supplier: KeyValueBytesStoreSupplier + )(implicit keySerde: Serde[K], valueSerde: Serde[V]): MaterializedJ[K, V, ByteArrayKeyValueStore] = + MaterializedJ.as(supplier).withKeySerde(keySerde).withValueSerde(valueSerde) +} diff --git a/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/Produced.scala b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/Produced.scala new file mode 100644 index 0000000000000..48f917875867b --- /dev/null +++ b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/Produced.scala @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.streams.scala.kstream + +import org.apache.kafka.common.serialization.Serde +import org.apache.kafka.streams.kstream.{Produced => ProducedJ} +import org.apache.kafka.streams.processor.StreamPartitioner + +object Produced { + + /** + * Create a Produced instance with provided keySerde and valueSerde. + * + * @tparam K key type + * @tparam V value type + * @param keySerde Serde to use for serializing the key + * @param valueSerde Serde to use for serializing the value + * @return A new [[Produced]] instance configured with keySerde and valueSerde + * @see KStream#through(String, Produced) + * @see KStream#to(String, Produced) + */ + def `with`[K, V](implicit keySerde: Serde[K], valueSerde: Serde[V]): ProducedJ[K, V] = + ProducedJ.`with`(keySerde, valueSerde) + + /** + * Create a Produced instance with provided keySerde, valueSerde, and partitioner. + * + * @tparam K key type + * @tparam V value type + * @param partitioner the function used to determine how records are distributed among partitions of the topic, + * if not specified and `keySerde` provides a + * [[org.apache.kafka.streams.kstream.internals.WindowedSerializer]] for the key + * [[org.apache.kafka.streams.kstream.internals.WindowedStreamPartitioner]] will be + * used—otherwise [[org.apache.kafka.clients.producer.internals.DefaultPartitioner]] + * will be used + * @param keySerde Serde to use for serializing the key + * @param valueSerde Serde to use for serializing the value + * @return A new [[Produced]] instance configured with keySerde, valueSerde, and partitioner + * @see KStream#through(String, Produced) + * @see KStream#to(String, Produced) + */ + def `with`[K, V]( + partitioner: StreamPartitioner[K, V] + )(implicit keySerde: Serde[K], valueSerde: Serde[V]): ProducedJ[K, V] = + ProducedJ.`with`(keySerde, valueSerde, partitioner) +} diff --git a/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/Repartitioned.scala b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/Repartitioned.scala new file mode 100644 index 0000000000000..5f33efa78aa4f --- /dev/null +++ b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/Repartitioned.scala @@ -0,0 +1,87 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.streams.scala.kstream + +import org.apache.kafka.common.serialization.Serde +import org.apache.kafka.streams.kstream.{Repartitioned => RepartitionedJ} +import org.apache.kafka.streams.processor.StreamPartitioner + +object Repartitioned { + + /** + * Create a Repartitioned instance with provided keySerde and valueSerde. + * + * @tparam K key type + * @tparam V value type + * @param keySerde Serde to use for serializing the key + * @param valueSerde Serde to use for serializing the value + * @return A new [[Repartitioned]] instance configured with keySerde and valueSerde + * @see KStream#repartition(Repartitioned) + */ + def `with`[K, V](implicit keySerde: Serde[K], valueSerde: Serde[V]): RepartitionedJ[K, V] = + RepartitionedJ.`with`(keySerde, valueSerde) + + /** + * Create a Repartitioned instance with provided keySerde, valueSerde, and name used as part of the repartition topic. + * + * @tparam K key type + * @tparam V value type + * @param name the name used as a processor named and part of the repartition topic name. + * @param keySerde Serde to use for serializing the key + * @param valueSerde Serde to use for serializing the value + * @return A new [[Repartitioned]] instance configured with keySerde, valueSerde, and processor and repartition topic name + * @see KStream#repartition(Repartitioned) + */ + def `with`[K, V](name: String)(implicit keySerde: Serde[K], valueSerde: Serde[V]): RepartitionedJ[K, V] = + RepartitionedJ.`as`(name).withKeySerde(keySerde).withValueSerde(valueSerde) + + /** + * Create a Repartitioned instance with provided keySerde, valueSerde, and partitioner. + * + * @tparam K key type + * @tparam V value type + * @param partitioner the function used to determine how records are distributed among partitions of the topic, + * if not specified and `keySerde` provides a + * [[org.apache.kafka.streams.kstream.internals.WindowedSerializer]] for the key + * [[org.apache.kafka.streams.kstream.internals.WindowedStreamPartitioner]] will be + * used—otherwise [[org.apache.kafka.clients.producer.internals.DefaultPartitioner]] + * will be used + * @param keySerde Serde to use for serializing the key + * @param valueSerde Serde to use for serializing the value + * @return A new [[Repartitioned]] instance configured with keySerde, valueSerde, and partitioner + * @see KStream#repartition(Repartitioned) + */ + def `with`[K, V]( + partitioner: StreamPartitioner[K, V] + )(implicit keySerde: Serde[K], valueSerde: Serde[V]): RepartitionedJ[K, V] = + RepartitionedJ.`streamPartitioner`(partitioner).withKeySerde(keySerde).withValueSerde(valueSerde) + + /** + * Create a Repartitioned instance with provided keySerde, valueSerde, and number of partitions for repartition topic. + * + * @tparam K key type + * @tparam V value type + * @param numberOfPartitions number of partitions used when creating repartition topic + * @param keySerde Serde to use for serializing the key + * @param valueSerde Serde to use for serializing the value + * @return A new [[Repartitioned]] instance configured with keySerde, valueSerde, and number of partitions + * @see KStream#repartition(Repartitioned) + */ + def `with`[K, V](numberOfPartitions: Int)(implicit keySerde: Serde[K], valueSerde: Serde[V]): RepartitionedJ[K, V] = + RepartitionedJ.`numberOfPartitions`(numberOfPartitions).withKeySerde(keySerde).withValueSerde(valueSerde) + +} diff --git a/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/SessionWindowedCogroupedKStream.scala b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/SessionWindowedCogroupedKStream.scala new file mode 100644 index 0000000000000..1b20179d5d38d --- /dev/null +++ b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/SessionWindowedCogroupedKStream.scala @@ -0,0 +1,64 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.streams.scala +package kstream + +import org.apache.kafka.streams.kstream.{SessionWindowedCogroupedKStream => SessionWindowedCogroupedKStreamJ, Windowed} +import org.apache.kafka.streams.scala.FunctionsCompatConversions.{InitializerFromFunction, MergerFromFunction} + +/** + * Wraps the Java class SessionWindowedCogroupedKStream and delegates method calls to the underlying Java object. + * + * @tparam K Type of keys + * @tparam V Type of values + * @param inner The underlying Java abstraction for SessionWindowedCogroupedKStream + * @see `org.apache.kafka.streams.kstream.SessionWindowedCogroupedKStream` + */ +class SessionWindowedCogroupedKStream[K, V](val inner: SessionWindowedCogroupedKStreamJ[K, V]) { + + /** + * Aggregate the values of records in this stream by the grouped key and defined `SessionWindows`. + * + * @param initializer the initializer function + * @param merger a function that combines two aggregation results. + * @param materialized an instance of `Materialized` used to materialize a state store. + * @return a windowed [[KTable]] that contains "update" records with unmodified keys, and values that represent + * the latest (rolling) aggregate for each key within a window + * @see `org.apache.kafka.streams.kstream.SessionWindowedCogroupedKStream#aggregate` + */ + def aggregate(initializer: => V, merger: (K, V, V) => V)(implicit + materialized: Materialized[K, V, ByteArraySessionStore] + ): KTable[Windowed[K], V] = + new KTable(inner.aggregate((() => initializer).asInitializer, merger.asMerger, materialized)) + + /** + * Aggregate the values of records in this stream by the grouped key and defined `SessionWindows`. + * + * @param initializer the initializer function + * @param merger a function that combines two aggregation results. + * @param named a [[Named]] config used to name the processor in the topology + * @param materialized an instance of `Materialized` used to materialize a state store. + * @return a windowed [[KTable]] that contains "update" records with unmodified keys, and values that represent + * the latest (rolling) aggregate for each key within a window + * @see `org.apache.kafka.streams.kstream.SessionWindowedCogroupedKStream#aggregate` + */ + def aggregate(initializer: => V, merger: (K, V, V) => V, named: Named)(implicit + materialized: Materialized[K, V, ByteArraySessionStore] + ): KTable[Windowed[K], V] = + new KTable(inner.aggregate((() => initializer).asInitializer, merger.asMerger, named, materialized)) + +} diff --git a/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/SessionWindowedKStream.scala b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/SessionWindowedKStream.scala new file mode 100644 index 0000000000000..3d6e157ecdced --- /dev/null +++ b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/SessionWindowedKStream.scala @@ -0,0 +1,148 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.streams.scala +package kstream + +import org.apache.kafka.streams.kstream.internals.KTableImpl +import org.apache.kafka.streams.scala.serialization.Serdes +import org.apache.kafka.streams.kstream.{KTable => KTableJ, SessionWindowedKStream => SessionWindowedKStreamJ, Windowed} +import org.apache.kafka.streams.scala.FunctionsCompatConversions.{ + AggregatorFromFunction, + InitializerFromFunction, + MergerFromFunction, + ReducerFromFunction, + ValueMapperFromFunction +} + +/** + * Wraps the Java class SessionWindowedKStream and delegates method calls to the underlying Java object. + * + * @tparam K Type of keys + * @tparam V Type of values + * @param inner The underlying Java abstraction for SessionWindowedKStream + * @see `org.apache.kafka.streams.kstream.SessionWindowedKStream` + */ +class SessionWindowedKStream[K, V](val inner: SessionWindowedKStreamJ[K, V]) { + + /** + * Aggregate the values of records in this stream by the grouped key and defined `SessionWindows`. + * + * @param initializer the initializer function + * @param aggregator the aggregator function + * @param merger the merger function + * @param materialized an instance of `Materialized` used to materialize a state store. + * @return a windowed [[KTable]] that contains "update" records with unmodified keys, and values that represent + * the latest (rolling) aggregate for each key within a window + * @see `org.apache.kafka.streams.kstream.SessionWindowedKStream#aggregate` + */ + def aggregate[VR](initializer: => VR)(aggregator: (K, V, VR) => VR, merger: (K, VR, VR) => VR)(implicit + materialized: Materialized[K, VR, ByteArraySessionStore] + ): KTable[Windowed[K], VR] = + new KTable( + inner.aggregate((() => initializer).asInitializer, aggregator.asAggregator, merger.asMerger, materialized) + ) + + /** + * Aggregate the values of records in this stream by the grouped key and defined `SessionWindows`. + * + * @param initializer the initializer function + * @param aggregator the aggregator function + * @param merger the merger function + * @param named a [[Named]] config used to name the processor in the topology + * @param materialized an instance of `Materialized` used to materialize a state store. + * @return a windowed [[KTable]] that contains "update" records with unmodified keys, and values that represent + * the latest (rolling) aggregate for each key within a window + * @see `org.apache.kafka.streams.kstream.SessionWindowedKStream#aggregate` + */ + def aggregate[VR](initializer: => VR, named: Named)(aggregator: (K, V, VR) => VR, merger: (K, VR, VR) => VR)(implicit + materialized: Materialized[K, VR, ByteArraySessionStore] + ): KTable[Windowed[K], VR] = + new KTable( + inner.aggregate((() => initializer).asInitializer, aggregator.asAggregator, merger.asMerger, named, materialized) + ) + + /** + * Count the number of records in this stream by the grouped key into `SessionWindows`. + * + * @param materialized an instance of `Materialized` used to materialize a state store. + * @return a windowed [[KTable]] that contains "update" records with unmodified keys and `Long` values + * that represent the latest (rolling) count (i.e., number of records) for each key within a window + * @see `org.apache.kafka.streams.kstream.SessionWindowedKStream#count` + */ + def count()(implicit materialized: Materialized[K, Long, ByteArraySessionStore]): KTable[Windowed[K], Long] = { + val javaCountTable: KTableJ[Windowed[K], java.lang.Long] = + inner.count(materialized.asInstanceOf[Materialized[K, java.lang.Long, ByteArraySessionStore]]) + val tableImpl = javaCountTable.asInstanceOf[KTableImpl[Windowed[K], ByteArraySessionStore, java.lang.Long]] + new KTable( + javaCountTable.mapValues[Long]( + ((l: java.lang.Long) => Long2long(l)).asValueMapper, + Materialized.`with`[Windowed[K], Long, ByteArrayKeyValueStore](tableImpl.keySerde(), Serdes.longSerde) + ) + ) + } + + /** + * Count the number of records in this stream by the grouped key into `SessionWindows`. + * + * @param named a [[Named]] config used to name the processor in the topology + * @param materialized an instance of `Materialized` used to materialize a state store. + * @return a windowed [[KTable]] that contains "update" records with unmodified keys and `Long` values + * that represent the latest (rolling) count (i.e., number of records) for each key within a window + * @see `org.apache.kafka.streams.kstream.SessionWindowedKStream#count` + */ + def count( + named: Named + )(implicit materialized: Materialized[K, Long, ByteArraySessionStore]): KTable[Windowed[K], Long] = { + val javaCountTable: KTableJ[Windowed[K], java.lang.Long] = + inner.count(named, materialized.asInstanceOf[Materialized[K, java.lang.Long, ByteArraySessionStore]]) + val tableImpl = javaCountTable.asInstanceOf[KTableImpl[Windowed[K], ByteArraySessionStore, java.lang.Long]] + new KTable( + javaCountTable.mapValues[Long]( + ((l: java.lang.Long) => Long2long(l)).asValueMapper, + Materialized.`with`[Windowed[K], Long, ByteArrayKeyValueStore](tableImpl.keySerde(), Serdes.longSerde) + ) + ) + } + + /** + * Combine values of this stream by the grouped key into `SessionWindows`. + * + * @param reducer a reducer function that computes a new aggregate result. + * @param materialized an instance of `Materialized` used to materialize a state store. + * @return a windowed [[KTable]] that contains "update" records with unmodified keys, and values that represent + * the latest (rolling) aggregate for each key within a window + * @see `org.apache.kafka.streams.kstream.SessionWindowedKStream#reduce` + */ + def reduce(reducer: (V, V) => V)(implicit + materialized: Materialized[K, V, ByteArraySessionStore] + ): KTable[Windowed[K], V] = + new KTable(inner.reduce(reducer.asReducer, materialized)) + + /** + * Combine values of this stream by the grouped key into `SessionWindows`. + * + * @param reducer a reducer function that computes a new aggregate result. + * @param materialized an instance of `Materialized` used to materialize a state store. + * @return a windowed [[KTable]] that contains "update" records with unmodified keys, and values that represent + * the latest (rolling) aggregate for each key within a window + * @see `org.apache.kafka.streams.kstream.SessionWindowedKStream#reduce` + */ + def reduce(reducer: (V, V) => V, named: Named)(implicit + materialized: Materialized[K, V, ByteArraySessionStore] + ): KTable[Windowed[K], V] = + new KTable(inner.reduce(reducer.asReducer, named, materialized)) +} diff --git a/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/StreamJoined.scala b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/StreamJoined.scala new file mode 100644 index 0000000000000..9caad638e4cd7 --- /dev/null +++ b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/StreamJoined.scala @@ -0,0 +1,91 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.streams.scala.kstream + +import org.apache.kafka.common.serialization.Serde +import org.apache.kafka.streams.kstream.{StreamJoined => StreamJoinedJ} +import org.apache.kafka.streams.state.WindowBytesStoreSupplier + +object StreamJoined { + + /** + * Create an instance of [[StreamJoined]] with key, value, and otherValue [[Serde]] + * instances. + * `null` values are accepted and will be replaced by the default serdes as defined in config. + * + * @tparam K key type + * @tparam V value type + * @tparam VO other value type + * @param keySerde the key serde to use. + * @param valueSerde the value serde to use. + * @param otherValueSerde the otherValue serde to use. If `null` the default value serde from config will be used + * @return new [[StreamJoined]] instance with the provided serdes + */ + def `with`[K, V, VO](implicit + keySerde: Serde[K], + valueSerde: Serde[V], + otherValueSerde: Serde[VO] + ): StreamJoinedJ[K, V, VO] = + StreamJoinedJ.`with`(keySerde, valueSerde, otherValueSerde) + + /** + * Create an instance of [[StreamJoined]] with store suppliers for the calling stream + * and the other stream. Also adds the key, value, and otherValue [[Serde]] + * instances. + * `null` values are accepted and will be replaced by the default serdes as defined in config. + * + * @tparam K key type + * @tparam V value type + * @tparam VO other value type + * @param supplier store supplier to use + * @param otherSupplier other store supplier to use + * @param keySerde the key serde to use. + * @param valueSerde the value serde to use. + * @param otherValueSerde the otherValue serde to use. If `null` the default value serde from config will be used + * @return new [[StreamJoined]] instance with the provided store suppliers and serdes + */ + def `with`[K, V, VO]( + supplier: WindowBytesStoreSupplier, + otherSupplier: WindowBytesStoreSupplier + )(implicit keySerde: Serde[K], valueSerde: Serde[V], otherValueSerde: Serde[VO]): StreamJoinedJ[K, V, VO] = + StreamJoinedJ + .`with`(supplier, otherSupplier) + .withKeySerde(keySerde) + .withValueSerde(valueSerde) + .withOtherValueSerde(otherValueSerde) + + /** + * Create an instance of [[StreamJoined]] with the name used for naming + * the state stores involved in the join. Also adds the key, value, and otherValue [[Serde]] + * instances. + * `null` values are accepted and will be replaced by the default serdes as defined in config. + * + * @tparam K key type + * @tparam V value type + * @tparam VO other value type + * @param storeName the name to use as a base name for the state stores of the join + * @param keySerde the key serde to use. + * @param valueSerde the value serde to use. + * @param otherValueSerde the otherValue serde to use. If `null` the default value serde from config will be used + * @return new [[StreamJoined]] instance with the provided store suppliers and serdes + */ + def as[K, V, VO]( + storeName: String + )(implicit keySerde: Serde[K], valueSerde: Serde[V], otherValueSerde: Serde[VO]): StreamJoinedJ[K, V, VO] = + StreamJoinedJ.as(storeName).withKeySerde(keySerde).withValueSerde(valueSerde).withOtherValueSerde(otherValueSerde) + +} diff --git a/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/TimeWindowedCogroupedKStream.scala b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/TimeWindowedCogroupedKStream.scala new file mode 100644 index 0000000000000..ad24228ecc686 --- /dev/null +++ b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/TimeWindowedCogroupedKStream.scala @@ -0,0 +1,62 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.streams.scala +package kstream + +import org.apache.kafka.streams.kstream.{TimeWindowedCogroupedKStream => TimeWindowedCogroupedKStreamJ, Windowed} +import org.apache.kafka.streams.scala.FunctionsCompatConversions.InitializerFromFunction + +/** + * Wraps the Java class TimeWindowedCogroupedKStream and delegates method calls to the underlying Java object. + * + * @tparam K Type of keys + * @tparam V Type of values + * @param inner The underlying Java abstraction for TimeWindowedCogroupedKStream + * @see `org.apache.kafka.streams.kstream.TimeWindowedCogroupedKStream` + */ +class TimeWindowedCogroupedKStream[K, V](val inner: TimeWindowedCogroupedKStreamJ[K, V]) { + + /** + * Aggregate the values of records in these streams by the grouped key and defined window. + * + * @param initializer an initializer function that computes an initial intermediate aggregation result + * @param materialized an instance of `Materialized` used to materialize a state store. + * @return a [[KTable]] that contains "update" records with unmodified keys, and values that represent the latest + * (rolling) aggregate for each key + * @see `org.apache.kafka.streams.kstream.TimeWindowedCogroupedKStream#aggregate` + */ + def aggregate(initializer: => V)(implicit + materialized: Materialized[K, V, ByteArrayWindowStore] + ): KTable[Windowed[K], V] = + new KTable(inner.aggregate((() => initializer).asInitializer, materialized)) + + /** + * Aggregate the values of records in these streams by the grouped key and defined window. + * + * @param initializer an initializer function that computes an initial intermediate aggregation result + * @param named a [[Named]] config used to name the processor in the topology + * @param materialized an instance of `Materialized` used to materialize a state store. + * @return a [[KTable]] that contains "update" records with unmodified keys, and values that represent the latest + * (rolling) aggregate for each key + * @see `org.apache.kafka.streams.kstream.TimeWindowedCogroupedKStream#aggregate` + */ + def aggregate(initializer: => V, named: Named)(implicit + materialized: Materialized[K, V, ByteArrayWindowStore] + ): KTable[Windowed[K], V] = + new KTable(inner.aggregate((() => initializer).asInitializer, named, materialized)) + +} diff --git a/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/TimeWindowedKStream.scala b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/TimeWindowedKStream.scala new file mode 100644 index 0000000000000..4fcf227e03723 --- /dev/null +++ b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/TimeWindowedKStream.scala @@ -0,0 +1,142 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.streams.scala +package kstream + +import org.apache.kafka.streams.kstream.internals.KTableImpl +import org.apache.kafka.streams.scala.serialization.Serdes +import org.apache.kafka.streams.kstream.{KTable => KTableJ, TimeWindowedKStream => TimeWindowedKStreamJ, Windowed} +import org.apache.kafka.streams.scala.FunctionsCompatConversions.{ + AggregatorFromFunction, + InitializerFromFunction, + ReducerFromFunction, + ValueMapperFromFunction +} + +/** + * Wraps the Java class TimeWindowedKStream and delegates method calls to the underlying Java object. + * + * @tparam K Type of keys + * @tparam V Type of values + * @param inner The underlying Java abstraction for TimeWindowedKStream + * @see `org.apache.kafka.streams.kstream.TimeWindowedKStream` + */ +class TimeWindowedKStream[K, V](val inner: TimeWindowedKStreamJ[K, V]) { + + /** + * Aggregate the values of records in this stream by the grouped key. + * + * @param initializer an initializer function that computes an initial intermediate aggregation result + * @param aggregator an aggregator function that computes a new aggregate result + * @param materialized an instance of `Materialized` used to materialize a state store. + * @return a [[KTable]] that contains "update" records with unmodified keys, and values that represent the + * latest (rolling) aggregate for each key + * @see `org.apache.kafka.streams.kstream.TimeWindowedKStream#aggregate` + */ + def aggregate[VR](initializer: => VR)(aggregator: (K, V, VR) => VR)(implicit + materialized: Materialized[K, VR, ByteArrayWindowStore] + ): KTable[Windowed[K], VR] = + new KTable(inner.aggregate((() => initializer).asInitializer, aggregator.asAggregator, materialized)) + + /** + * Aggregate the values of records in this stream by the grouped key. + * + * @param initializer an initializer function that computes an initial intermediate aggregation result + * @param named a [[Named]] config used to name the processor in the topology + * @param aggregator an aggregator function that computes a new aggregate result + * @param materialized an instance of `Materialized` used to materialize a state store. + * @return a [[KTable]] that contains "update" records with unmodified keys, and values that represent the + * latest (rolling) aggregate for each key + * @see `org.apache.kafka.streams.kstream.TimeWindowedKStream#aggregate` + */ + def aggregate[VR](initializer: => VR, named: Named)(aggregator: (K, V, VR) => VR)(implicit + materialized: Materialized[K, VR, ByteArrayWindowStore] + ): KTable[Windowed[K], VR] = + new KTable(inner.aggregate((() => initializer).asInitializer, aggregator.asAggregator, named, materialized)) + + /** + * Count the number of records in this stream by the grouped key and the defined windows. + * + * @param materialized an instance of `Materialized` used to materialize a state store. + * @return a [[KTable]] that contains "update" records with unmodified keys and `Long` values that + * represent the latest (rolling) count (i.e., number of records) for each key + * @see `org.apache.kafka.streams.kstream.TimeWindowedKStream#count` + */ + def count()(implicit materialized: Materialized[K, Long, ByteArrayWindowStore]): KTable[Windowed[K], Long] = { + val javaCountTable: KTableJ[Windowed[K], java.lang.Long] = + inner.count(materialized.asInstanceOf[Materialized[K, java.lang.Long, ByteArrayWindowStore]]) + val tableImpl = javaCountTable.asInstanceOf[KTableImpl[Windowed[K], ByteArrayWindowStore, java.lang.Long]] + new KTable( + javaCountTable.mapValues[Long]( + ((l: java.lang.Long) => Long2long(l)).asValueMapper, + Materialized.`with`[Windowed[K], Long, ByteArrayKeyValueStore](tableImpl.keySerde(), Serdes.longSerde) + ) + ) + } + + /** + * Count the number of records in this stream by the grouped key and the defined windows. + * + * @param named a [[Named]] config used to name the processor in the topology + * @param materialized an instance of `Materialized` used to materialize a state store. + * @return a [[KTable]] that contains "update" records with unmodified keys and `Long` values that + * represent the latest (rolling) count (i.e., number of records) for each key + * @see `org.apache.kafka.streams.kstream.TimeWindowedKStream#count` + */ + def count( + named: Named + )(implicit materialized: Materialized[K, Long, ByteArrayWindowStore]): KTable[Windowed[K], Long] = { + val javaCountTable: KTableJ[Windowed[K], java.lang.Long] = + inner.count(named, materialized.asInstanceOf[Materialized[K, java.lang.Long, ByteArrayWindowStore]]) + val tableImpl = javaCountTable.asInstanceOf[KTableImpl[Windowed[K], ByteArrayWindowStore, java.lang.Long]] + new KTable( + javaCountTable.mapValues[Long]( + ((l: java.lang.Long) => Long2long(l)).asValueMapper, + Materialized.`with`[Windowed[K], Long, ByteArrayKeyValueStore](tableImpl.keySerde(), Serdes.longSerde) + ) + ) + } + + /** + * Combine the values of records in this stream by the grouped key. + * + * @param reducer a function that computes a new aggregate result + * @param materialized an instance of `Materialized` used to materialize a state store. + * @return a [[KTable]] that contains "update" records with unmodified keys, and values that represent the + * latest (rolling) aggregate for each key + * @see `org.apache.kafka.streams.kstream.TimeWindowedKStream#reduce` + */ + def reduce(reducer: (V, V) => V)(implicit + materialized: Materialized[K, V, ByteArrayWindowStore] + ): KTable[Windowed[K], V] = + new KTable(inner.reduce(reducer.asReducer, materialized)) + + /** + * Combine the values of records in this stream by the grouped key. + * + * @param reducer a function that computes a new aggregate result + * @param named a [[Named]] config used to name the processor in the topology + * @param materialized an instance of `Materialized` used to materialize a state store. + * @return a [[KTable]] that contains "update" records with unmodified keys, and values that represent the + * latest (rolling) aggregate for each key + * @see `org.apache.kafka.streams.kstream.TimeWindowedKStream#reduce` + */ + def reduce(reducer: (V, V) => V, named: Named)(implicit + materialized: Materialized[K, V, ByteArrayWindowStore] + ): KTable[Windowed[K], V] = + new KTable(inner.reduce(reducer.asReducer, materialized)) +} diff --git a/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/package.scala b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/package.scala new file mode 100644 index 0000000000000..7365c68ad1803 --- /dev/null +++ b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/package.scala @@ -0,0 +1,31 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.streams.scala + +import org.apache.kafka.streams.processor.StateStore + +package object kstream { + type Materialized[K, V, S <: StateStore] = org.apache.kafka.streams.kstream.Materialized[K, V, S] + type Grouped[K, V] = org.apache.kafka.streams.kstream.Grouped[K, V] + type Consumed[K, V] = org.apache.kafka.streams.kstream.Consumed[K, V] + type Produced[K, V] = org.apache.kafka.streams.kstream.Produced[K, V] + type Repartitioned[K, V] = org.apache.kafka.streams.kstream.Repartitioned[K, V] + type Joined[K, V, VO] = org.apache.kafka.streams.kstream.Joined[K, V, VO] + type StreamJoined[K, V, VO] = org.apache.kafka.streams.kstream.StreamJoined[K, V, VO] + type Named = org.apache.kafka.streams.kstream.Named + type Branched[K, V] = org.apache.kafka.streams.kstream.Branched[K, V] +} diff --git a/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/package.scala b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/package.scala new file mode 100644 index 0000000000000..6a3906dd58931 --- /dev/null +++ b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/package.scala @@ -0,0 +1,26 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.streams + +import org.apache.kafka.streams.state.{KeyValueStore, SessionStore, WindowStore} +import org.apache.kafka.common.utils.Bytes + +package object scala { + type ByteArrayKeyValueStore = KeyValueStore[Bytes, Array[Byte]] + type ByteArraySessionStore = SessionStore[Bytes, Array[Byte]] + type ByteArrayWindowStore = WindowStore[Bytes, Array[Byte]] +} diff --git a/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/serialization/Serdes.scala b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/serialization/Serdes.scala new file mode 100644 index 0000000000000..0c72358c15f3e --- /dev/null +++ b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/serialization/Serdes.scala @@ -0,0 +1,89 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.streams.scala.serialization + +import java.nio.ByteBuffer +import java.util +import java.util.UUID + +import org.apache.kafka.common.serialization.{Deserializer, Serde, Serializer, Serdes => JSerdes} +import org.apache.kafka.streams.kstream.WindowedSerdes + +object Serdes extends LowPrioritySerdes { + implicit def stringSerde: Serde[String] = JSerdes.String() + implicit def longSerde: Serde[Long] = JSerdes.Long().asInstanceOf[Serde[Long]] + implicit def javaLongSerde: Serde[java.lang.Long] = JSerdes.Long() + implicit def byteArraySerde: Serde[Array[Byte]] = JSerdes.ByteArray() + implicit def bytesSerde: Serde[org.apache.kafka.common.utils.Bytes] = JSerdes.Bytes() + implicit def byteBufferSerde: Serde[ByteBuffer] = JSerdes.ByteBuffer() + implicit def shortSerde: Serde[Short] = JSerdes.Short().asInstanceOf[Serde[Short]] + implicit def javaShortSerde: Serde[java.lang.Short] = JSerdes.Short() + implicit def floatSerde: Serde[Float] = JSerdes.Float().asInstanceOf[Serde[Float]] + implicit def javaFloatSerde: Serde[java.lang.Float] = JSerdes.Float() + implicit def doubleSerde: Serde[Double] = JSerdes.Double().asInstanceOf[Serde[Double]] + implicit def javaDoubleSerde: Serde[java.lang.Double] = JSerdes.Double() + implicit def intSerde: Serde[Int] = JSerdes.Integer().asInstanceOf[Serde[Int]] + implicit def javaIntegerSerde: Serde[java.lang.Integer] = JSerdes.Integer() + implicit def uuidSerde: Serde[UUID] = JSerdes.UUID() + + implicit def sessionWindowedSerde[T](implicit tSerde: Serde[T]): WindowedSerdes.SessionWindowedSerde[T] = + new WindowedSerdes.SessionWindowedSerde[T](tSerde) + + def fromFn[T >: Null](serializer: T => Array[Byte], deserializer: Array[Byte] => Option[T]): Serde[T] = + JSerdes.serdeFrom( + new Serializer[T] { + override def serialize(topic: String, data: T): Array[Byte] = serializer(data) + override def configure(configs: util.Map[String, _], isKey: Boolean): Unit = () + override def close(): Unit = () + }, + new Deserializer[T] { + override def deserialize(topic: String, data: Array[Byte]): T = deserializer(data).orNull + override def configure(configs: util.Map[String, _], isKey: Boolean): Unit = () + override def close(): Unit = () + } + ) + + def fromFn[T >: Null]( + serializer: (String, T) => Array[Byte], + deserializer: (String, Array[Byte]) => Option[T] + ): Serde[T] = + JSerdes.serdeFrom( + new Serializer[T] { + override def serialize(topic: String, data: T): Array[Byte] = serializer(topic, data) + override def configure(configs: util.Map[String, _], isKey: Boolean): Unit = () + override def close(): Unit = () + }, + new Deserializer[T] { + override def deserialize(topic: String, data: Array[Byte]): T = deserializer(topic, data).orNull + override def configure(configs: util.Map[String, _], isKey: Boolean): Unit = () + override def close(): Unit = () + } + ) +} + +trait LowPrioritySerdes { + + implicit val nullSerde: Serde[Null] = + Serdes.fromFn[Null]( + { _: Null => + null + }, + { _: Array[Byte] => + None + } + ) +} diff --git a/streams/streams-scala/bin/test/log4j.properties b/streams/streams-scala/bin/test/log4j.properties new file mode 100644 index 0000000000000..93ffc165654a2 --- /dev/null +++ b/streams/streams-scala/bin/test/log4j.properties @@ -0,0 +1,34 @@ +# Copyright (C) 2018 Lightbend Inc. +# Copyright (C) 2017-2018 Alexis Seigneurin. +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Set root logger level to DEBUG and its only appender to A1. +log4j.rootLogger=INFO, R + +# A1 is set to be a ConsoleAppender. +log4j.appender.A1=org.apache.log4j.ConsoleAppender + +log4j.appender.R=org.apache.log4j.RollingFileAppender +log4j.appender.R.File=logs/kafka-streams-scala.log + +log4j.appender.R.MaxFileSize=100KB +# Keep one backup file +log4j.appender.R.MaxBackupIndex=1 + +# A1 uses PatternLayout. +log4j.appender.R.layout=org.apache.log4j.PatternLayout +log4j.appender.R.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n diff --git a/streams/streams-scala/bin/test/org/apache/kafka/streams/scala/StreamToTableJoinScalaIntegrationTestImplicitSerdes.scala b/streams/streams-scala/bin/test/org/apache/kafka/streams/scala/StreamToTableJoinScalaIntegrationTestImplicitSerdes.scala new file mode 100644 index 0000000000000..e9577bcf73c6b --- /dev/null +++ b/streams/streams-scala/bin/test/org/apache/kafka/streams/scala/StreamToTableJoinScalaIntegrationTestImplicitSerdes.scala @@ -0,0 +1,174 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.streams.scala + +import java.util.Properties + +import org.apache.kafka.streams.{KafkaStreams, KeyValue, StreamsConfig} +import org.apache.kafka.streams.scala.serialization.{Serdes => NewSerdes} +import org.apache.kafka.streams.scala.ImplicitConversions._ +import org.apache.kafka.streams.scala.kstream._ +import org.apache.kafka.streams.scala.utils.StreamToTableJoinScalaIntegrationTestBase +import org.junit.jupiter.api._ +import org.junit.jupiter.api.Assertions._ + +/** + * Test suite that does an example to demonstrate stream-table joins in Kafka Streams + *

+ * The suite contains the test case using Scala APIs `testShouldCountClicksPerRegion` and the same test case using the + * Java APIs `testShouldCountClicksPerRegionJava`. The idea is to demonstrate that both generate the same result. + */ +@Tag("integration") +class StreamToTableJoinScalaIntegrationTestImplicitSerdes extends StreamToTableJoinScalaIntegrationTestBase { + + @Test def testShouldCountClicksPerRegion(): Unit = { + + // DefaultSerdes brings into scope implicit serdes (mostly for primitives) that will set up all Grouped, Produced, + // Consumed and Joined instances. So all APIs below that accept Grouped, Produced, Consumed or Joined will + // get these instances automatically + import org.apache.kafka.streams.scala.serialization.Serdes._ + + val streamsConfiguration: Properties = getStreamsConfiguration() + + val builder = new StreamsBuilder() + + val userClicksStream: KStream[String, Long] = builder.stream(userClicksTopic) + + val userRegionsTable: KTable[String, String] = builder.table(userRegionsTopic) + + // Compute the total per region by summing the individual click counts per region. + val clicksPerRegion: KTable[String, Long] = + userClicksStream + + // Join the stream against the table. + .leftJoin(userRegionsTable)((clicks, region) => (if (region == null) "UNKNOWN" else region, clicks)) + + // Change the stream from -> to -> + .map((_, regionWithClicks) => regionWithClicks) + + // Compute the total per region by summing the individual click counts per region. + .groupByKey + .reduce(_ + _) + + // Write the (continuously updating) results to the output topic. + clicksPerRegion.toStream.to(outputTopic) + + val streams: KafkaStreams = new KafkaStreams(builder.build(), streamsConfiguration) + streams.start() + + val actualClicksPerRegion: java.util.List[KeyValue[String, Long]] = + produceNConsume(userClicksTopic, userRegionsTopic, outputTopic) + + assertTrue(!actualClicksPerRegion.isEmpty, "Expected to process some data") + + streams.close() + } + + @Test + def testShouldCountClicksPerRegionWithNamedRepartitionTopic(): Unit = { + + // DefaultSerdes brings into scope implicit serdes (mostly for primitives) that will set up all Grouped, Produced, + // Consumed and Joined instances. So all APIs below that accept Grouped, Produced, Consumed or Joined will + // get these instances automatically + import org.apache.kafka.streams.scala.serialization.Serdes._ + + val streamsConfiguration: Properties = getStreamsConfiguration() + + val builder = new StreamsBuilder() + + val userClicksStream: KStream[String, Long] = builder.stream(userClicksTopic) + + val userRegionsTable: KTable[String, String] = builder.table(userRegionsTopic) + + // Compute the total per region by summing the individual click counts per region. + val clicksPerRegion: KTable[String, Long] = + userClicksStream + + // Join the stream against the table. + .leftJoin(userRegionsTable)((clicks, region) => (if (region == null) "UNKNOWN" else region, clicks)) + + // Change the stream from -> to -> + .map((_, regionWithClicks) => regionWithClicks) + + // Compute the total per region by summing the individual click counts per region. + .groupByKey + .reduce(_ + _) + + // Write the (continuously updating) results to the output topic. + clicksPerRegion.toStream.to(outputTopic) + + val streams: KafkaStreams = new KafkaStreams(builder.build(), streamsConfiguration) + streams.start() + + val actualClicksPerRegion: java.util.List[KeyValue[String, Long]] = + produceNConsume(userClicksTopic, userRegionsTopic, outputTopic) + + assertTrue(!actualClicksPerRegion.isEmpty, "Expected to process some data") + + streams.close() + } + + @Test + def testShouldCountClicksPerRegionJava(): Unit = { + + import java.lang.{Long => JLong} + + import org.apache.kafka.streams.kstream.{KStream => KStreamJ, KTable => KTableJ, _} + import org.apache.kafka.streams.{KafkaStreams => KafkaStreamsJ, StreamsBuilder => StreamsBuilderJ} + + val streamsConfiguration: Properties = getStreamsConfiguration() + + streamsConfiguration.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, NewSerdes.stringSerde.getClass.getName) + streamsConfiguration.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, NewSerdes.stringSerde.getClass.getName) + + val builder: StreamsBuilderJ = new StreamsBuilderJ() + + val userClicksStream: KStreamJ[String, JLong] = + builder.stream[String, JLong](userClicksTopicJ, Consumed.`with`(NewSerdes.stringSerde, NewSerdes.javaLongSerde)) + + val userRegionsTable: KTableJ[String, String] = + builder.table[String, String](userRegionsTopicJ, Consumed.`with`(NewSerdes.stringSerde, NewSerdes.stringSerde)) + + // Join the stream against the table. + val valueJoinerJ: ValueJoiner[JLong, String, (String, JLong)] = + (clicks: JLong, region: String) => (if (region == null) "UNKNOWN" else region, clicks) + val userClicksJoinRegion: KStreamJ[String, (String, JLong)] = userClicksStream.leftJoin( + userRegionsTable, + valueJoinerJ, + Joined.`with`[String, JLong, String](NewSerdes.stringSerde, NewSerdes.javaLongSerde, NewSerdes.stringSerde) + ) + + // Change the stream from -> to -> + val clicksByRegion: KStreamJ[String, JLong] = userClicksJoinRegion.map { (_, regionWithClicks) => + new KeyValue(regionWithClicks._1, regionWithClicks._2) + } + + // Compute the total per region by summing the individual click counts per region. + val clicksPerRegion: KTableJ[String, JLong] = clicksByRegion + .groupByKey(Grouped.`with`(NewSerdes.stringSerde, NewSerdes.javaLongSerde)) + .reduce((v1, v2) => v1 + v2) + + // Write the (continuously updating) results to the output topic. + clicksPerRegion.toStream.to(outputTopicJ, Produced.`with`(NewSerdes.stringSerde, NewSerdes.javaLongSerde)) + + val streams = new KafkaStreamsJ(builder.build(), streamsConfiguration) + + streams.start() + produceNConsume(userClicksTopicJ, userRegionsTopicJ, outputTopicJ) + streams.close() + } +} diff --git a/streams/streams-scala/bin/test/org/apache/kafka/streams/scala/TopologyTest.scala b/streams/streams-scala/bin/test/org/apache/kafka/streams/scala/TopologyTest.scala new file mode 100644 index 0000000000000..b38b0c3a941f3 --- /dev/null +++ b/streams/streams-scala/bin/test/org/apache/kafka/streams/scala/TopologyTest.scala @@ -0,0 +1,482 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.streams.scala + +import java.time.Duration +import java.util +import java.util.{Locale, Properties} +import java.util.regex.Pattern +import org.apache.kafka.common.serialization.{Serdes => SerdesJ} +import org.apache.kafka.streams.kstream.{ + Aggregator, + Initializer, + JoinWindows, + KeyValueMapper, + Reducer, + Transformer, + ValueJoiner, + ValueMapper, + KGroupedStream => KGroupedStreamJ, + KStream => KStreamJ, + KTable => KTableJ, + Materialized => MaterializedJ, + StreamJoined => StreamJoinedJ +} +import org.apache.kafka.streams.processor.{api, ProcessorContext} +import org.apache.kafka.streams.processor.api.{Processor, ProcessorSupplier} +import org.apache.kafka.streams.scala.ImplicitConversions._ +import org.apache.kafka.streams.scala.serialization.{Serdes => NewSerdes} +import org.apache.kafka.streams.scala.serialization.Serdes._ +import org.apache.kafka.streams.scala.kstream._ +import org.apache.kafka.streams.{KeyValue, StreamsConfig, TopologyDescription, StreamsBuilder => StreamsBuilderJ} +import org.junit.jupiter.api.Assertions._ +import org.junit.jupiter.api._ + +import scala.annotation.nowarn +import scala.jdk.CollectionConverters._ + +/** + * Test suite that verifies that the topology built by the Java and Scala APIs match. + */ +//noinspection ScalaDeprecation +@Timeout(600) +class TopologyTest { + private val inputTopic = "input-topic" + private val userClicksTopic = "user-clicks-topic" + private val userRegionsTopic = "user-regions-topic" + + private val pattern = Pattern.compile("\\W+", Pattern.UNICODE_CHARACTER_CLASS) + + @Test + def shouldBuildIdenticalTopologyInJavaNScalaSimple(): Unit = { + + // build the Scala topology + def getTopologyScala: TopologyDescription = { + + import org.apache.kafka.streams.scala.serialization.Serdes._ + + val streamBuilder = new StreamsBuilder + val textLines = streamBuilder.stream[String, String](inputTopic) + + val _: KStream[String, String] = textLines.flatMapValues(v => pattern.split(v.toLowerCase)) + + streamBuilder.build().describe() + } + + // build the Java topology + def getTopologyJava: TopologyDescription = { + val streamBuilder = new StreamsBuilderJ + val textLines = streamBuilder.stream[String, String](inputTopic) + val _: KStreamJ[String, String] = textLines.flatMapValues(s => pattern.split(s.toLowerCase).toBuffer.asJava) + streamBuilder.build().describe() + } + + // should match + assertEquals(getTopologyScala, getTopologyJava) + } + + @Test + def shouldBuildIdenticalTopologyInJavaNScalaAggregate(): Unit = { + + // build the Scala topology + def getTopologyScala: TopologyDescription = { + + import org.apache.kafka.streams.scala.serialization.Serdes._ + + val streamBuilder = new StreamsBuilder + val textLines = streamBuilder.stream[String, String](inputTopic) + + textLines + .flatMapValues(v => pattern.split(v.toLowerCase)) + .groupBy((_, v) => v) + .count() + + streamBuilder.build().describe() + } + + // build the Java topology + def getTopologyJava: TopologyDescription = { + + val streamBuilder = new StreamsBuilderJ + val textLines: KStreamJ[String, String] = streamBuilder.stream[String, String](inputTopic) + + val splits: KStreamJ[String, String] = + textLines.flatMapValues(s => pattern.split(s.toLowerCase).toBuffer.asJava) + + val grouped: KGroupedStreamJ[String, String] = splits.groupBy((_, v) => v) + + grouped.count() + + streamBuilder.build().describe() + } + + // should match + assertEquals(getTopologyScala, getTopologyJava) + } + + @Test def shouldBuildIdenticalTopologyInJavaNScalaCogroupSimple(): Unit = { + + // build the Scala topology + def getTopologyScala: TopologyDescription = { + + import org.apache.kafka.streams.scala.serialization.Serdes._ + + val streamBuilder = new StreamsBuilder + val textLines = streamBuilder.stream[String, String](inputTopic) + textLines + .mapValues(v => v.length) + .groupByKey + .cogroup((_, v1, v2: Long) => v1 + v2) + .aggregate(0L) + + streamBuilder.build().describe() + } + + // build the Java topology + def getTopologyJava: TopologyDescription = { + + val streamBuilder = new StreamsBuilderJ + val textLines: KStreamJ[String, String] = streamBuilder.stream[String, String](inputTopic) + + val splits: KStreamJ[String, Int] = textLines.mapValues( + new ValueMapper[String, Int] { + def apply(s: String): Int = s.length + } + ) + + splits.groupByKey + .cogroup((k: String, v: Int, a: Long) => a + v) + .aggregate(() => 0L) + + streamBuilder.build().describe() + } + + // should match + assertEquals(getTopologyScala, getTopologyJava) + } + + @Test def shouldBuildIdenticalTopologyInJavaNScalaCogroup(): Unit = { + + // build the Scala topology + def getTopologyScala: TopologyDescription = { + + import org.apache.kafka.streams.scala.serialization.Serdes._ + + val streamBuilder = new StreamsBuilder + val textLines1 = streamBuilder.stream[String, String](inputTopic) + val textLines2 = streamBuilder.stream[String, String]("inputTopic2") + + textLines1 + .mapValues(v => v.length) + .groupByKey + .cogroup((_, v1, v2: Long) => v1 + v2) + .cogroup(textLines2.groupByKey, (_, v: String, a) => v.length + a) + .aggregate(0L) + + streamBuilder.build().describe() + } + + // build the Java topology + def getTopologyJava: TopologyDescription = { + + val streamBuilder = new StreamsBuilderJ + val textLines1: KStreamJ[String, String] = streamBuilder.stream[String, String](inputTopic) + val textLines2: KStreamJ[String, String] = streamBuilder.stream[String, String]("inputTopic2") + + val splits: KStreamJ[String, Int] = textLines1.mapValues( + new ValueMapper[String, Int] { + def apply(s: String): Int = s.length + } + ) + + splits.groupByKey + .cogroup((k: String, v: Int, a: Long) => a + v) + .cogroup(textLines2.groupByKey(), (k: String, v: String, a: Long) => v.length + a) + .aggregate(() => 0L) + + streamBuilder.build().describe() + } + + // should match + assertEquals(getTopologyScala, getTopologyJava) + } + + @Test def shouldBuildIdenticalTopologyInJavaNScalaJoin(): Unit = { + + // build the Scala topology + def getTopologyScala: TopologyDescription = { + import org.apache.kafka.streams.scala.serialization.Serdes._ + + val builder = new StreamsBuilder() + + val userClicksStream: KStream[String, Long] = builder.stream(userClicksTopic) + + val userRegionsTable: KTable[String, String] = builder.table(userRegionsTopic) + + // clicks per region + userClicksStream + .leftJoin(userRegionsTable)((clicks, region) => (if (region == null) "UNKNOWN" else region, clicks)) + .map((_, regionWithClicks) => regionWithClicks) + .groupByKey + .reduce(_ + _) + + builder.build().describe() + } + + // build the Java topology + def getTopologyJava: TopologyDescription = { + + import java.lang.{Long => JLong} + + val builder: StreamsBuilderJ = new StreamsBuilderJ() + + val userClicksStream: KStreamJ[String, JLong] = + builder.stream[String, JLong](userClicksTopic, Consumed.`with`[String, JLong]) + + val userRegionsTable: KTableJ[String, String] = + builder.table[String, String](userRegionsTopic, Consumed.`with`[String, String]) + + // Join the stream against the table. + val valueJoinerJ: ValueJoiner[JLong, String, (String, JLong)] = + (clicks: JLong, region: String) => (if (region == null) "UNKNOWN" else region, clicks) + val userClicksJoinRegion: KStreamJ[String, (String, JLong)] = userClicksStream.leftJoin( + userRegionsTable, + valueJoinerJ, + Joined.`with`[String, JLong, String] + ) + + // Change the stream from -> to -> + val clicksByRegion: KStreamJ[String, JLong] = userClicksJoinRegion.map { (_, regionWithClicks) => + new KeyValue(regionWithClicks._1, regionWithClicks._2) + } + + // Compute the total per region by summing the individual click counts per region. + clicksByRegion + .groupByKey(Grouped.`with`[String, JLong]) + .reduce((v1, v2) => v1 + v2) + + builder.build().describe() + } + + // should match + assertEquals(getTopologyScala, getTopologyJava) + } + + @nowarn + @Test + def shouldBuildIdenticalTopologyInJavaNScalaTransform(): Unit = { + + // build the Scala topology + def getTopologyScala: TopologyDescription = { + + import org.apache.kafka.streams.scala.serialization.Serdes._ + + val streamBuilder = new StreamsBuilder + val textLines = streamBuilder.stream[String, String](inputTopic) + + val _: KTable[String, Long] = textLines + .transform(() => + new Transformer[String, String, KeyValue[String, String]] { + override def init(context: ProcessorContext): Unit = () + override def transform(key: String, value: String): KeyValue[String, String] = + new KeyValue(key, value.toLowerCase) + override def close(): Unit = () + } + ) + .groupBy((_, v) => v) + .count() + + streamBuilder.build().describe() + } + + @nowarn + // build the Java topology + def getTopologyJava: TopologyDescription = { + + val streamBuilder = new StreamsBuilderJ + val textLines: KStreamJ[String, String] = streamBuilder.stream[String, String](inputTopic) + + val lowered: KStreamJ[String, String] = textLines.transform(() => + new Transformer[String, String, KeyValue[String, String]] { + override def init(context: ProcessorContext): Unit = () + override def transform(key: String, value: String): KeyValue[String, String] = + new KeyValue(key, value.toLowerCase) + override def close(): Unit = () + } + ) + + val grouped: KGroupedStreamJ[String, String] = lowered.groupBy((_, v) => v) + + // word counts + grouped.count() + + streamBuilder.build().describe() + } + + // should match + assertEquals(getTopologyScala, getTopologyJava) + } + + @Test + def shouldBuildIdenticalTopologyInJavaNScalaProperties(): Unit = { + + val props = new Properties() + props.put(StreamsConfig.TOPOLOGY_OPTIMIZATION_CONFIG, StreamsConfig.OPTIMIZE) + + val propsNoOptimization = new Properties() + propsNoOptimization.put(StreamsConfig.TOPOLOGY_OPTIMIZATION_CONFIG, StreamsConfig.NO_OPTIMIZATION) + + val AGGREGATION_TOPIC = "aggregationTopic" + val REDUCE_TOPIC = "reduceTopic" + val JOINED_TOPIC = "joinedTopic" + + // build the Scala topology + def getTopologyScala: StreamsBuilder = { + + val aggregator = (_: String, v: String, agg: Int) => agg + v.length + val reducer = (v1: String, v2: String) => v1 + ":" + v2 + val processorValueCollector: util.List[String] = new util.ArrayList[String] + + val builder: StreamsBuilder = new StreamsBuilder + + val sourceStream: KStream[String, String] = + builder.stream(inputTopic)(Consumed.`with`(NewSerdes.stringSerde, NewSerdes.stringSerde)) + + val mappedStream: KStream[String, String] = + sourceStream.map((k: String, v: String) => (k.toUpperCase(Locale.getDefault), v)) + mappedStream + .filter((k: String, _: String) => k == "B") + .mapValues((v: String) => v.toUpperCase(Locale.getDefault)) + .process(new SimpleProcessorSupplier(processorValueCollector)) + + val stream2 = mappedStream.groupByKey + .aggregate(0)(aggregator)(Materialized.`with`(NewSerdes.stringSerde, NewSerdes.intSerde)) + .toStream + stream2.to(AGGREGATION_TOPIC)(Produced.`with`(NewSerdes.stringSerde, NewSerdes.intSerde)) + + // adding operators for case where the repartition node is further downstream + val stream3 = mappedStream + .filter((_: String, _: String) => true) + .peek((k: String, v: String) => System.out.println(k + ":" + v)) + .groupByKey + .reduce(reducer)(Materialized.`with`(NewSerdes.stringSerde, NewSerdes.stringSerde)) + .toStream + stream3.to(REDUCE_TOPIC)(Produced.`with`(NewSerdes.stringSerde, NewSerdes.stringSerde)) + + mappedStream + .filter((k: String, _: String) => k == "A") + .join(stream2)( + (v1: String, v2: Int) => v1 + ":" + v2.toString, + JoinWindows.ofTimeDifferenceAndGrace(Duration.ofMillis(5000), Duration.ofHours(24)) + )( + StreamJoined.`with`(NewSerdes.stringSerde, NewSerdes.stringSerde, NewSerdes.intSerde) + ) + .to(JOINED_TOPIC) + + mappedStream + .filter((k: String, _: String) => k == "A") + .join(stream3)( + (v1: String, v2: String) => v1 + ":" + v2.toString, + JoinWindows.ofTimeDifferenceAndGrace(Duration.ofMillis(5000), Duration.ofHours(24)) + )( + StreamJoined.`with`(NewSerdes.stringSerde, NewSerdes.stringSerde, NewSerdes.stringSerde) + ) + .to(JOINED_TOPIC) + + builder + } + + // build the Java topology + def getTopologyJava: StreamsBuilderJ = { + + val keyValueMapper: KeyValueMapper[String, String, KeyValue[String, String]] = + (key, value) => KeyValue.pair(key.toUpperCase(Locale.getDefault), value) + val initializer: Initializer[Integer] = () => 0 + val aggregator: Aggregator[String, String, Integer] = (_, value, aggregate) => aggregate + value.length + val reducer: Reducer[String] = (v1, v2) => v1 + ":" + v2 + val valueMapper: ValueMapper[String, String] = v => v.toUpperCase(Locale.getDefault) + val processorValueCollector = new util.ArrayList[String] + val processorSupplier = new SimpleProcessorSupplier(processorValueCollector) + val valueJoiner2: ValueJoiner[String, Integer, String] = (value1, value2) => value1 + ":" + value2.toString + val valueJoiner3: ValueJoiner[String, String, String] = (value1, value2) => value1 + ":" + value2 + + val builder = new StreamsBuilderJ + + val sourceStream = builder.stream(inputTopic, Consumed.`with`(NewSerdes.stringSerde, NewSerdes.stringSerde)) + + val mappedStream: KStreamJ[String, String] = + sourceStream.map(keyValueMapper) + mappedStream + .filter((key, _) => key == "B") + .mapValues[String](valueMapper) + .process(processorSupplier) + + val stream2: KStreamJ[String, Integer] = mappedStream.groupByKey + .aggregate(initializer, aggregator, MaterializedJ.`with`(NewSerdes.stringSerde, SerdesJ.Integer)) + .toStream + stream2.to(AGGREGATION_TOPIC, Produced.`with`(NewSerdes.stringSerde, SerdesJ.Integer)) + + // adding operators for case where the repartition node is further downstream + val stream3 = mappedStream + .filter((_, _) => true) + .peek((k, v) => System.out.println(k + ":" + v)) + .groupByKey + .reduce(reducer, MaterializedJ.`with`(NewSerdes.stringSerde, NewSerdes.stringSerde)) + .toStream + stream3.to(REDUCE_TOPIC, Produced.`with`(NewSerdes.stringSerde, NewSerdes.stringSerde)) + + mappedStream + .filter((key, _) => key == "A") + .join[Integer, String]( + stream2, + valueJoiner2, + JoinWindows.ofTimeDifferenceAndGrace(Duration.ofMillis(5000), Duration.ofHours(24)), + StreamJoinedJ.`with`(NewSerdes.stringSerde, NewSerdes.stringSerde, SerdesJ.Integer) + ) + .to(JOINED_TOPIC) + + mappedStream + .filter((key, _) => key == "A") + .join( + stream3, + valueJoiner3, + JoinWindows.ofTimeDifferenceAndGrace(Duration.ofMillis(5000), Duration.ofHours(24)), + StreamJoinedJ.`with`(NewSerdes.stringSerde, NewSerdes.stringSerde, SerdesJ.String) + ) + .to(JOINED_TOPIC) + + builder + } + + assertNotEquals( + getTopologyScala.build(props).describe.toString, + getTopologyScala.build(propsNoOptimization).describe.toString + ) + assertEquals( + getTopologyScala.build(propsNoOptimization).describe.toString, + getTopologyJava.build(propsNoOptimization).describe.toString + ) + assertEquals(getTopologyScala.build(props).describe.toString, getTopologyJava.build(props).describe.toString) + } + + private class SimpleProcessorSupplier private[TopologyTest] (val valueList: util.List[String]) + extends ProcessorSupplier[String, String, Void, Void] { + + override def get(): Processor[String, String, Void, Void] = + (record: api.Record[String, String]) => valueList.add(record.value()) + } +} diff --git a/streams/streams-scala/bin/test/org/apache/kafka/streams/scala/WordCountTest.scala b/streams/streams-scala/bin/test/org/apache/kafka/streams/scala/WordCountTest.scala new file mode 100644 index 0000000000000..bd75354204f18 --- /dev/null +++ b/streams/streams-scala/bin/test/org/apache/kafka/streams/scala/WordCountTest.scala @@ -0,0 +1,249 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.streams.scala + +import java.util.Properties +import java.util.regex.Pattern +import org.junit.jupiter.api.Assertions._ +import org.junit.jupiter.api._ +import org.apache.kafka.streams.scala.serialization.{Serdes => NewSerdes} +import org.apache.kafka.streams.{KafkaStreams, KeyValue, StreamsConfig} +import org.apache.kafka.streams.scala.kstream._ +import org.apache.kafka.streams.integration.utils.{EmbeddedKafkaCluster, IntegrationTestUtils} +import org.apache.kafka.clients.consumer.ConsumerConfig +import org.apache.kafka.clients.producer.ProducerConfig +import org.apache.kafka.common.utils.{MockTime, Utils} +import ImplicitConversions._ +import org.apache.kafka.common.serialization.{LongDeserializer, StringDeserializer, StringSerializer} +import org.apache.kafka.test.TestUtils +import org.junit.jupiter.api.Tag + +import java.io.File + +/** + * Test suite that does a classic word count example. + *

+ * The suite contains the test case using Scala APIs `testShouldCountWords` and the same test case using the + * Java APIs `testShouldCountWordsJava`. The idea is to demonstrate that both generate the same result. + */ +@Tag("integration") +class WordCountTest extends WordCountTestData { + + private val cluster: EmbeddedKafkaCluster = new EmbeddedKafkaCluster(1) + + final private val alignedTime = (System.currentTimeMillis() / 1000 + 1) * 1000 + private val mockTime: MockTime = cluster.time + mockTime.setCurrentTimeMs(alignedTime) + + private val testFolder: File = TestUtils.tempDirectory() + + @BeforeEach + def startKafkaCluster(): Unit = { + cluster.start() + cluster.createTopic(inputTopic) + cluster.createTopic(outputTopic) + cluster.createTopic(inputTopicJ) + cluster.createTopic(outputTopicJ) + } + + @AfterEach + def stopKafkaCluster(): Unit = { + cluster.stop() + Utils.delete(testFolder) + } + + @Test + def testShouldCountWords(): Unit = { + import org.apache.kafka.streams.scala.serialization.Serdes._ + + val streamsConfiguration = getStreamsConfiguration() + + val streamBuilder = new StreamsBuilder + val textLines = streamBuilder.stream[String, String](inputTopic) + + val pattern = Pattern.compile("\\W+", Pattern.UNICODE_CHARACTER_CLASS) + + // generate word counts + val wordCounts: KTable[String, Long] = + textLines + .flatMapValues(v => pattern.split(v.toLowerCase)) + .groupBy((_, v) => v) + .count() + + // write to output topic + wordCounts.toStream.to(outputTopic) + + val streams = new KafkaStreams(streamBuilder.build(), streamsConfiguration) + streams.start() + + // produce and consume synchronously + val actualWordCounts: java.util.List[KeyValue[String, Long]] = produceNConsume(inputTopic, outputTopic) + + streams.close() + + import scala.jdk.CollectionConverters._ + assertEquals(actualWordCounts.asScala.take(expectedWordCounts.size).sortBy(_.key), expectedWordCounts.sortBy(_.key)) + } + + @Test + def testShouldCountWordsMaterialized(): Unit = { + import org.apache.kafka.streams.scala.serialization.Serdes._ + + val streamsConfiguration = getStreamsConfiguration() + + val streamBuilder = new StreamsBuilder + val textLines = streamBuilder.stream[String, String](inputTopic) + + val pattern = Pattern.compile("\\W+", Pattern.UNICODE_CHARACTER_CLASS) + + // generate word counts + val wordCounts: KTable[String, Long] = + textLines + .flatMapValues(v => pattern.split(v.toLowerCase)) + .groupBy((k, v) => v) + .count()(Materialized.as("word-count")) + + // write to output topic + wordCounts.toStream.to(outputTopic) + + val streams = new KafkaStreams(streamBuilder.build(), streamsConfiguration) + streams.start() + + // produce and consume synchronously + val actualWordCounts: java.util.List[KeyValue[String, Long]] = produceNConsume(inputTopic, outputTopic) + + streams.close() + + import scala.jdk.CollectionConverters._ + assertEquals(actualWordCounts.asScala.take(expectedWordCounts.size).sortBy(_.key), expectedWordCounts.sortBy(_.key)) + } + + @Test + def testShouldCountWordsJava(): Unit = { + + import org.apache.kafka.streams.{KafkaStreams => KafkaStreamsJ, StreamsBuilder => StreamsBuilderJ} + import org.apache.kafka.streams.kstream.{ + KTable => KTableJ, + KStream => KStreamJ, + KGroupedStream => KGroupedStreamJ, + _ + } + import scala.jdk.CollectionConverters._ + + val streamsConfiguration = getStreamsConfiguration() + streamsConfiguration.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, NewSerdes.stringSerde.getClass.getName) + streamsConfiguration.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, NewSerdes.stringSerde.getClass.getName) + + val streamBuilder = new StreamsBuilderJ + val textLines: KStreamJ[String, String] = streamBuilder.stream[String, String](inputTopicJ) + + val pattern = Pattern.compile("\\W+", Pattern.UNICODE_CHARACTER_CLASS) + + val splits: KStreamJ[String, String] = textLines.flatMapValues { line => + pattern.split(line.toLowerCase).toBuffer.asJava + } + + val grouped: KGroupedStreamJ[String, String] = splits.groupBy { (_, v) => + v + } + + val wordCounts: KTableJ[String, java.lang.Long] = grouped.count() + + wordCounts.toStream.to(outputTopicJ, Produced.`with`(NewSerdes.stringSerde, NewSerdes.javaLongSerde)) + + val streams: KafkaStreamsJ = new KafkaStreamsJ(streamBuilder.build(), streamsConfiguration) + streams.start() + + val actualWordCounts: java.util.List[KeyValue[String, Long]] = produceNConsume(inputTopicJ, outputTopicJ) + + streams.close() + + assertEquals(actualWordCounts.asScala.take(expectedWordCounts.size).sortBy(_.key), expectedWordCounts.sortBy(_.key)) + } + + private def getStreamsConfiguration(): Properties = { + val streamsConfiguration: Properties = new Properties() + + streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG, "wordcount-test") + streamsConfiguration.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, cluster.bootstrapServers()) + streamsConfiguration.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, "10000") + streamsConfiguration.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest") + streamsConfiguration.put(StreamsConfig.STATE_DIR_CONFIG, testFolder.getPath) + streamsConfiguration + } + + private def getProducerConfig(): Properties = { + val p = new Properties() + p.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, cluster.bootstrapServers()) + p.put(ProducerConfig.ACKS_CONFIG, "all") + p.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer]) + p.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer]) + p + } + + private def getConsumerConfig(): Properties = { + val p = new Properties() + p.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, cluster.bootstrapServers()) + p.put(ConsumerConfig.GROUP_ID_CONFIG, "wordcount-scala-integration-test-standard-consumer") + p.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest") + p.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, classOf[StringDeserializer]) + p.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, classOf[LongDeserializer]) + p + } + + private def produceNConsume(inputTopic: String, outputTopic: String): java.util.List[KeyValue[String, Long]] = { + + val linesProducerConfig: Properties = getProducerConfig() + + import scala.jdk.CollectionConverters._ + IntegrationTestUtils.produceValuesSynchronously(inputTopic, inputValues.asJava, linesProducerConfig, mockTime) + + val consumerConfig = getConsumerConfig() + + IntegrationTestUtils.waitUntilMinKeyValueRecordsReceived(consumerConfig, outputTopic, expectedWordCounts.size) + } +} + +trait WordCountTestData { + val inputTopic = s"inputTopic" + val outputTopic = s"outputTopic" + val inputTopicJ = s"inputTopicJ" + val outputTopicJ = s"outputTopicJ" + + val inputValues = List( + "Hello Kafka Streams", + "All streams lead to Kafka", + "Join Kafka Summit", + "И теперь пошли русские слова" + ) + + val expectedWordCounts: List[KeyValue[String, Long]] = List( + new KeyValue("hello", 1L), + new KeyValue("all", 1L), + new KeyValue("streams", 2L), + new KeyValue("lead", 1L), + new KeyValue("to", 1L), + new KeyValue("join", 1L), + new KeyValue("kafka", 3L), + new KeyValue("summit", 1L), + new KeyValue("и", 1L), + new KeyValue("теперь", 1L), + new KeyValue("пошли", 1L), + new KeyValue("русские", 1L), + new KeyValue("слова", 1L) + ) +} diff --git a/streams/streams-scala/bin/test/org/apache/kafka/streams/scala/kstream/ConsumedTest.scala b/streams/streams-scala/bin/test/org/apache/kafka/streams/scala/kstream/ConsumedTest.scala new file mode 100644 index 0000000000000..0b44165164b93 --- /dev/null +++ b/streams/streams-scala/bin/test/org/apache/kafka/streams/scala/kstream/ConsumedTest.scala @@ -0,0 +1,72 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.streams.scala.kstream + +import org.apache.kafka.streams.Topology +import org.apache.kafka.streams.kstream.internals.ConsumedInternal +import org.apache.kafka.streams.processor.FailOnInvalidTimestamp +import org.apache.kafka.streams.scala.serialization.Serdes +import org.apache.kafka.streams.scala.serialization.Serdes._ +import org.junit.jupiter.api.Assertions.assertEquals +import org.junit.jupiter.api.Test + +class ConsumedTest { + + @Test + def testCreateConsumed(): Unit = { + val consumed: Consumed[String, Long] = Consumed.`with`[String, Long] + + val internalConsumed = new ConsumedInternal(consumed) + assertEquals(Serdes.stringSerde.getClass, internalConsumed.keySerde.getClass) + assertEquals(Serdes.longSerde.getClass, internalConsumed.valueSerde.getClass) + } + + @Test + def testCreateConsumedWithTimestampExtractorAndResetPolicy(): Unit = { + val timestampExtractor = new FailOnInvalidTimestamp() + val resetPolicy = Topology.AutoOffsetReset.LATEST + val consumed: Consumed[String, Long] = + Consumed.`with`[String, Long](timestampExtractor, resetPolicy) + + val internalConsumed = new ConsumedInternal(consumed) + assertEquals(Serdes.stringSerde.getClass, internalConsumed.keySerde.getClass) + assertEquals(Serdes.longSerde.getClass, internalConsumed.valueSerde.getClass) + assertEquals(timestampExtractor, internalConsumed.timestampExtractor) + assertEquals(resetPolicy, internalConsumed.offsetResetPolicy) + } + + @Test + def testCreateConsumedWithTimestampExtractor(): Unit = { + val timestampExtractor = new FailOnInvalidTimestamp() + val consumed: Consumed[String, Long] = Consumed.`with`[String, Long](timestampExtractor) + + val internalConsumed = new ConsumedInternal(consumed) + assertEquals(Serdes.stringSerde.getClass, internalConsumed.keySerde.getClass) + assertEquals(Serdes.longSerde.getClass, internalConsumed.valueSerde.getClass) + assertEquals(timestampExtractor, internalConsumed.timestampExtractor) + } + @Test + def testCreateConsumedWithResetPolicy(): Unit = { + val resetPolicy = Topology.AutoOffsetReset.LATEST + val consumed: Consumed[String, Long] = Consumed.`with`[String, Long](resetPolicy) + + val internalConsumed = new ConsumedInternal(consumed) + assertEquals(Serdes.stringSerde.getClass, internalConsumed.keySerde.getClass) + assertEquals(Serdes.longSerde.getClass, internalConsumed.valueSerde.getClass) + assertEquals(resetPolicy, internalConsumed.offsetResetPolicy) + } +} diff --git a/streams/streams-scala/bin/test/org/apache/kafka/streams/scala/kstream/GroupedTest.scala b/streams/streams-scala/bin/test/org/apache/kafka/streams/scala/kstream/GroupedTest.scala new file mode 100644 index 0000000000000..02f333ec6b1db --- /dev/null +++ b/streams/streams-scala/bin/test/org/apache/kafka/streams/scala/kstream/GroupedTest.scala @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.streams.scala.kstream + +import org.apache.kafka.streams.kstream.internals.GroupedInternal +import org.apache.kafka.streams.scala.serialization.Serdes +import org.apache.kafka.streams.scala.serialization.Serdes._ +import org.junit.jupiter.api.Assertions.assertEquals +import org.junit.jupiter.api.Test + +class GroupedTest { + + @Test + def testCreateGrouped(): Unit = { + val grouped: Grouped[String, Long] = Grouped.`with`[String, Long] + + val internalGrouped = new GroupedInternal[String, Long](grouped) + assertEquals(Serdes.stringSerde.getClass, internalGrouped.keySerde.getClass) + assertEquals(Serdes.longSerde.getClass, internalGrouped.valueSerde.getClass) + } + + @Test + def testCreateGroupedWithRepartitionTopicName(): Unit = { + val repartitionTopicName = "repartition-topic" + val grouped: Grouped[String, Long] = Grouped.`with`(repartitionTopicName) + + val internalGrouped = new GroupedInternal[String, Long](grouped) + assertEquals(Serdes.stringSerde.getClass, internalGrouped.keySerde.getClass) + assertEquals(Serdes.longSerde.getClass, internalGrouped.valueSerde.getClass) + assertEquals(repartitionTopicName, internalGrouped.name()) + } +} diff --git a/streams/streams-scala/bin/test/org/apache/kafka/streams/scala/kstream/JoinedTest.scala b/streams/streams-scala/bin/test/org/apache/kafka/streams/scala/kstream/JoinedTest.scala new file mode 100644 index 0000000000000..4e6fa563f57d0 --- /dev/null +++ b/streams/streams-scala/bin/test/org/apache/kafka/streams/scala/kstream/JoinedTest.scala @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.streams.scala.kstream + +import org.apache.kafka.streams.scala.serialization.Serdes +import org.apache.kafka.streams.scala.serialization.Serdes._ +import org.junit.jupiter.api.Assertions.assertEquals +import org.junit.jupiter.api.Test + +class JoinedTest { + + @Test + def testCreateJoined(): Unit = { + val joined: Joined[String, Long, Int] = Joined.`with`[String, Long, Int] + + assertEquals(joined.keySerde.getClass, Serdes.stringSerde.getClass) + assertEquals(joined.valueSerde.getClass, Serdes.longSerde.getClass) + assertEquals(joined.otherValueSerde.getClass, Serdes.intSerde.getClass) + } + + @Test + def testCreateJoinedWithSerdesAndRepartitionTopicName(): Unit = { + val repartitionTopicName = "repartition-topic" + val joined: Joined[String, Long, Int] = Joined.`with`(repartitionTopicName) + + assertEquals(joined.keySerde.getClass, Serdes.stringSerde.getClass) + assertEquals(joined.valueSerde.getClass, Serdes.longSerde.getClass) + assertEquals(joined.otherValueSerde.getClass, Serdes.intSerde.getClass) + } +} diff --git a/streams/streams-scala/bin/test/org/apache/kafka/streams/scala/kstream/KStreamSplitTest.scala b/streams/streams-scala/bin/test/org/apache/kafka/streams/scala/kstream/KStreamSplitTest.scala new file mode 100644 index 0000000000000..bbcc1b503f4fa --- /dev/null +++ b/streams/streams-scala/bin/test/org/apache/kafka/streams/scala/kstream/KStreamSplitTest.scala @@ -0,0 +1,125 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.streams.scala.kstream + +import org.apache.kafka.streams.kstream.Named +import org.apache.kafka.streams.scala.ImplicitConversions._ +import org.apache.kafka.streams.scala.StreamsBuilder +import org.apache.kafka.streams.scala.serialization.Serdes._ +import org.apache.kafka.streams.scala.utils.TestDriver +import org.junit.jupiter.api.Assertions._ +import org.junit.jupiter.api.Test + +import scala.jdk.CollectionConverters._ + +class KStreamSplitTest extends TestDriver { + + @Test + def testRouteMessagesAccordingToPredicates(): Unit = { + val builder = new StreamsBuilder() + val sourceTopic = "source" + val sinkTopic = Array("default", "even", "three"); + + val m = builder + .stream[Integer, Integer](sourceTopic) + .split(Named.as("_")) + .branch((_, v) => v % 2 == 0) + .branch((_, v) => v % 3 == 0) + .defaultBranch() + + m("_0").to(sinkTopic(0)) + m("_1").to(sinkTopic(1)) + m("_2").to(sinkTopic(2)) + + val testDriver = createTestDriver(builder) + val testInput = testDriver.createInput[Integer, Integer](sourceTopic) + val testOutput = sinkTopic.map(name => testDriver.createOutput[Integer, Integer](name)) + + testInput.pipeValueList( + List(1, 2, 3, 4, 5) + .map(Integer.valueOf) + .asJava + ) + assertEquals(List(1, 5), testOutput(0).readValuesToList().asScala) + assertEquals(List(2, 4), testOutput(1).readValuesToList().asScala) + assertEquals(List(3), testOutput(2).readValuesToList().asScala) + + testDriver.close() + } + + @Test + def testRouteMessagesToConsumers(): Unit = { + val builder = new StreamsBuilder() + val sourceTopic = "source" + + val m = builder + .stream[Integer, Integer](sourceTopic) + .split(Named.as("_")) + .branch((_, v) => v % 2 == 0, Branched.withConsumer(ks => ks.to("even"), "consumedEvens")) + .branch((_, v) => v % 3 == 0, Branched.withFunction(ks => ks.mapValues(x => x * x), "mapped")) + .noDefaultBranch() + + m("_mapped").to("mapped") + + val testDriver = createTestDriver(builder) + val testInput = testDriver.createInput[Integer, Integer](sourceTopic) + testInput.pipeValueList( + List(1, 2, 3, 4, 5, 9) + .map(Integer.valueOf) + .asJava + ) + + val even = testDriver.createOutput[Integer, Integer]("even") + val mapped = testDriver.createOutput[Integer, Integer]("mapped") + + assertEquals(List(2, 4), even.readValuesToList().asScala) + assertEquals(List(9, 81), mapped.readValuesToList().asScala) + + testDriver.close() + } + + @Test + def testRouteMessagesToAnonymousConsumers(): Unit = { + val builder = new StreamsBuilder() + val sourceTopic = "source" + + val m = builder + .stream[Integer, Integer](sourceTopic) + .split(Named.as("_")) + .branch((_, v) => v % 2 == 0, Branched.withConsumer(ks => ks.to("even"))) + .branch((_, v) => v % 3 == 0, Branched.withFunction(ks => ks.mapValues(x => x * x))) + .noDefaultBranch() + + m("_2").to("mapped") + + val testDriver = createTestDriver(builder) + val testInput = testDriver.createInput[Integer, Integer](sourceTopic) + testInput.pipeValueList( + List(1, 2, 3, 4, 5, 9) + .map(Integer.valueOf) + .asJava + ) + + val even = testDriver.createOutput[Integer, Integer]("even") + val mapped = testDriver.createOutput[Integer, Integer]("mapped") + + assertEquals(List(2, 4), even.readValuesToList().asScala) + assertEquals(List(9, 81), mapped.readValuesToList().asScala) + + testDriver.close() + } +} diff --git a/streams/streams-scala/bin/test/org/apache/kafka/streams/scala/kstream/KStreamTest.scala b/streams/streams-scala/bin/test/org/apache/kafka/streams/scala/kstream/KStreamTest.scala new file mode 100644 index 0000000000000..0b61984a96ab7 --- /dev/null +++ b/streams/streams-scala/bin/test/org/apache/kafka/streams/scala/kstream/KStreamTest.scala @@ -0,0 +1,474 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.streams.scala.kstream + +import java.time.Duration.ofSeconds +import java.time.{Duration, Instant} +import org.apache.kafka.streams.KeyValue +import org.apache.kafka.streams.kstream.{ + JoinWindows, + Named, + Transformer, + ValueTransformer, + ValueTransformerSupplier, + ValueTransformerWithKey, + ValueTransformerWithKeySupplier +} +import org.apache.kafka.streams.processor.ProcessorContext +import org.apache.kafka.streams.scala.ImplicitConversions._ +import org.apache.kafka.streams.scala.serialization.Serdes._ +import org.apache.kafka.streams.scala.StreamsBuilder +import org.apache.kafka.streams.scala.utils.TestDriver +import org.junit.jupiter.api.Assertions.{assertEquals, assertTrue} +import org.junit.jupiter.api.Test + +import scala.annotation.nowarn +import scala.jdk.CollectionConverters._ + +class KStreamTest extends TestDriver { + + @Test + def testFilterRecordsSatisfyingPredicate(): Unit = { + val builder = new StreamsBuilder() + val sourceTopic = "source" + val sinkTopic = "sink" + + builder.stream[String, String](sourceTopic).filter((_, value) => value != "value2").to(sinkTopic) + + val testDriver = createTestDriver(builder) + val testInput = testDriver.createInput[String, String](sourceTopic) + val testOutput = testDriver.createOutput[String, String](sinkTopic) + + testInput.pipeInput("1", "value1") + assertEquals("value1", testOutput.readValue) + + testInput.pipeInput("2", "value2") + assertTrue(testOutput.isEmpty) + + testInput.pipeInput("3", "value3") + assertEquals("value3", testOutput.readValue) + + assertTrue(testOutput.isEmpty) + + testDriver.close() + } + + @Test + def testFilterRecordsNotSatisfyingPredicate(): Unit = { + val builder = new StreamsBuilder() + val sourceTopic = "source" + val sinkTopic = "sink" + + builder.stream[String, String](sourceTopic).filterNot((_, value) => value == "value2").to(sinkTopic) + + val testDriver = createTestDriver(builder) + val testInput = testDriver.createInput[String, String](sourceTopic) + val testOutput = testDriver.createOutput[String, String](sinkTopic) + + testInput.pipeInput("1", "value1") + assertEquals("value1", testOutput.readValue) + + testInput.pipeInput("2", "value2") + assertTrue(testOutput.isEmpty) + + testInput.pipeInput("3", "value3") + assertEquals("value3", testOutput.readValue) + + assertTrue(testOutput.isEmpty) + + testDriver.close() + } + + @Test + def testForeachActionsOnRecords(): Unit = { + val builder = new StreamsBuilder() + val sourceTopic = "source" + + var acc = "" + builder.stream[String, String](sourceTopic).foreach((_, value) => acc += value) + + val testDriver = createTestDriver(builder) + val testInput = testDriver.createInput[String, String](sourceTopic) + + testInput.pipeInput("1", "value1") + assertEquals("value1", acc) + + testInput.pipeInput("2", "value2") + assertEquals("value1value2", acc) + + testDriver.close() + } + + @Test + def testPeekActionsOnRecords(): Unit = { + val builder = new StreamsBuilder() + val sourceTopic = "source" + val sinkTopic = "sink" + + var acc = "" + builder.stream[String, String](sourceTopic).peek((_, v) => acc += v).to(sinkTopic) + + val testDriver = createTestDriver(builder) + val testInput = testDriver.createInput[String, String](sourceTopic) + val testOutput = testDriver.createOutput[String, String](sinkTopic) + + testInput.pipeInput("1", "value1") + assertEquals("value1", acc) + assertEquals("value1", testOutput.readValue) + + testInput.pipeInput("2", "value2") + assertEquals("value1value2", acc) + assertEquals("value2", testOutput.readValue) + + testDriver.close() + } + + @Test + def testSelectNewKey(): Unit = { + val builder = new StreamsBuilder() + val sourceTopic = "source" + val sinkTopic = "sink" + + builder.stream[String, String](sourceTopic).selectKey((_, value) => value).to(sinkTopic) + + val testDriver = createTestDriver(builder) + val testInput = testDriver.createInput[String, String](sourceTopic) + val testOutput = testDriver.createOutput[String, String](sinkTopic) + + testInput.pipeInput("1", "value1") + assertEquals("value1", testOutput.readKeyValue.key) + + testInput.pipeInput("1", "value2") + assertEquals("value2", testOutput.readKeyValue.key) + + assertTrue(testOutput.isEmpty) + + testDriver.close() + } + + @Test + def testRepartitionKStream(): Unit = { + val builder = new StreamsBuilder() + val sourceTopic = "source" + val repartitionName = "repartition" + val sinkTopic = "sink" + + builder.stream[String, String](sourceTopic).repartition(Repartitioned.`with`(repartitionName)).to(sinkTopic) + + val testDriver = createTestDriver(builder) + val testInput = testDriver.createInput[String, String](sourceTopic) + val testOutput = testDriver.createOutput[String, String](sinkTopic) + + testInput.pipeInput("1", "value1") + val kv1 = testOutput.readKeyValue + assertEquals("1", kv1.key) + assertEquals("value1", kv1.value) + + testInput.pipeInput("2", "value2") + val kv2 = testOutput.readKeyValue + assertEquals("2", kv2.key) + assertEquals("value2", kv2.value) + + assertTrue(testOutput.isEmpty) + + // appId == "test" + testDriver.producedTopicNames() contains "test-" + repartitionName + "-repartition" + + testDriver.close() + } + + //noinspection ScalaDeprecation + @Test + def testJoinCorrectlyRecords(): Unit = { + val builder = new StreamsBuilder() + val sourceTopic1 = "source1" + val sourceTopic2 = "source2" + val sinkTopic = "sink" + + val stream1 = builder.stream[String, String](sourceTopic1) + val stream2 = builder.stream[String, String](sourceTopic2) + stream1 + .join(stream2)((a, b) => s"$a-$b", JoinWindows.ofTimeDifferenceAndGrace(ofSeconds(1), Duration.ofHours(24))) + .to(sinkTopic) + + val now = Instant.now() + + val testDriver = createTestDriver(builder, now) + val testInput1 = testDriver.createInput[String, String](sourceTopic1) + val testInput2 = testDriver.createInput[String, String](sourceTopic2) + val testOutput = testDriver.createOutput[String, String](sinkTopic) + + testInput1.pipeInput("1", "topic1value1", now) + testInput2.pipeInput("1", "topic2value1", now) + + assertEquals("topic1value1-topic2value1", testOutput.readValue) + + assertTrue(testOutput.isEmpty) + + testDriver.close() + } + + @nowarn + @Test + def testTransformCorrectlyRecords(): Unit = { + class TestTransformer extends Transformer[String, String, KeyValue[String, String]] { + override def init(context: ProcessorContext): Unit = {} + + override def transform(key: String, value: String): KeyValue[String, String] = + new KeyValue(s"$key-transformed", s"$value-transformed") + + override def close(): Unit = {} + } + val builder = new StreamsBuilder() + val sourceTopic = "source" + val sinkTopic = "sink" + + val stream = builder.stream[String, String](sourceTopic) + stream + .transform(() => new TestTransformer) + .to(sinkTopic) + + val now = Instant.now() + val testDriver = createTestDriver(builder, now) + val testInput = testDriver.createInput[String, String](sourceTopic) + val testOutput = testDriver.createOutput[String, String](sinkTopic) + + testInput.pipeInput("1", "value", now) + + val result = testOutput.readKeyValue() + assertEquals("value-transformed", result.value) + assertEquals("1-transformed", result.key) + + assertTrue(testOutput.isEmpty) + + testDriver.close() + } + + @nowarn + @Test + def testFlatTransformCorrectlyRecords(): Unit = { + class TestTransformer extends Transformer[String, String, Iterable[KeyValue[String, String]]] { + override def init(context: ProcessorContext): Unit = {} + + override def transform(key: String, value: String): Iterable[KeyValue[String, String]] = + Array(new KeyValue(s"$key-transformed", s"$value-transformed")) + + override def close(): Unit = {} + } + val builder = new StreamsBuilder() + val sourceTopic = "source" + val sinkTopic = "sink" + + val stream = builder.stream[String, String](sourceTopic) + stream + .flatTransform(() => new TestTransformer) + .to(sinkTopic) + + val now = Instant.now() + val testDriver = createTestDriver(builder, now) + val testInput = testDriver.createInput[String, String](sourceTopic) + val testOutput = testDriver.createOutput[String, String](sinkTopic) + + testInput.pipeInput("1", "value", now) + + val result = testOutput.readKeyValue() + assertEquals("value-transformed", result.value) + assertEquals("1-transformed", result.key) + + assertTrue(testOutput.isEmpty) + + testDriver.close() + } + + @nowarn + @Test + def testCorrectlyFlatTransformValuesInRecords(): Unit = { + class TestTransformer extends ValueTransformer[String, Iterable[String]] { + override def init(context: ProcessorContext): Unit = {} + + override def transform(value: String): Iterable[String] = + Array(s"$value-transformed") + + override def close(): Unit = {} + } + val builder = new StreamsBuilder() + val sourceTopic = "source" + val sinkTopic = "sink" + + val stream = builder.stream[String, String](sourceTopic) + stream + .flatTransformValues(new ValueTransformerSupplier[String, Iterable[String]] { + def get(): ValueTransformer[String, Iterable[String]] = + new TestTransformer + }) + .to(sinkTopic) + + val now = Instant.now() + val testDriver = createTestDriver(builder, now) + val testInput = testDriver.createInput[String, String](sourceTopic) + val testOutput = testDriver.createOutput[String, String](sinkTopic) + + testInput.pipeInput("1", "value", now) + + assertEquals("value-transformed", testOutput.readValue) + + assertTrue(testOutput.isEmpty) + + testDriver.close() + } + + @nowarn + @Test + def testCorrectlyFlatTransformValuesInRecordsWithKey(): Unit = { + class TestTransformer extends ValueTransformerWithKey[String, String, Iterable[String]] { + override def init(context: ProcessorContext): Unit = {} + + override def transform(key: String, value: String): Iterable[String] = + Array(s"$value-transformed-$key") + + override def close(): Unit = {} + } + val builder = new StreamsBuilder() + val sourceTopic = "source" + val sinkTopic = "sink" + + val stream = builder.stream[String, String](sourceTopic) + stream + .flatTransformValues(new ValueTransformerWithKeySupplier[String, String, Iterable[String]] { + def get(): ValueTransformerWithKey[String, String, Iterable[String]] = + new TestTransformer + }) + .to(sinkTopic) + + val now = Instant.now() + val testDriver = createTestDriver(builder, now) + val testInput = testDriver.createInput[String, String](sourceTopic) + val testOutput = testDriver.createOutput[String, String](sinkTopic) + + testInput.pipeInput("1", "value", now) + + assertEquals("value-transformed-1", testOutput.readValue) + + assertTrue(testOutput.isEmpty) + + testDriver.close() + } + + @Test + def testJoinTwoKStreamToTables(): Unit = { + val builder = new StreamsBuilder() + val sourceTopic1 = "source1" + val sourceTopic2 = "source2" + val sinkTopic = "sink" + + val table1 = builder.stream[String, String](sourceTopic1).toTable + val table2 = builder.stream[String, String](sourceTopic2).toTable + table1.join(table2)((a, b) => a + b).toStream.to(sinkTopic) + + val testDriver = createTestDriver(builder) + val testInput1 = testDriver.createInput[String, String](sourceTopic1) + val testInput2 = testDriver.createInput[String, String](sourceTopic2) + val testOutput = testDriver.createOutput[String, String](sinkTopic) + + testInput1.pipeInput("1", "topic1value1") + testInput2.pipeInput("1", "topic2value1") + + assertEquals("topic1value1topic2value1", testOutput.readValue) + + assertTrue(testOutput.isEmpty) + + testDriver.close() + } + + @Test + def testSettingNameOnFilter(): Unit = { + val builder = new StreamsBuilder() + val sourceTopic = "source" + val sinkTopic = "sink" + + builder + .stream[String, String](sourceTopic) + .filter((_, value) => value != "value2", Named.as("my-name")) + .to(sinkTopic) + + import scala.jdk.CollectionConverters._ + + val filterNode = builder.build().describe().subtopologies().asScala.head.nodes().asScala.toList(1) + assertEquals("my-name", filterNode.name()) + } + + @Test + def testSettingNameOnOutputTable(): Unit = { + val builder = new StreamsBuilder() + val sourceTopic1 = "source1" + val sinkTopic = "sink" + + builder + .stream[String, String](sourceTopic1) + .toTable(Named.as("my-name")) + .toStream + .to(sinkTopic) + + import scala.jdk.CollectionConverters._ + + val tableNode = builder.build().describe().subtopologies().asScala.head.nodes().asScala.toList(1) + assertEquals("my-name", tableNode.name()) + } + + @Test + def testSettingNameOnJoin(): Unit = { + val builder = new StreamsBuilder() + val sourceTopic1 = "source" + val sourceGTable = "table" + val sinkTopic = "sink" + + val stream = builder.stream[String, String](sourceTopic1) + val table = builder.globalTable[String, String](sourceGTable) + stream + .join(table, Named.as("my-name"))((a, b) => s"$a-$b", (a, b) => a + b) + .to(sinkTopic) + + import scala.jdk.CollectionConverters._ + + val joinNode = builder.build().describe().subtopologies().asScala.head.nodes().asScala.toList(1) + assertEquals("my-name", joinNode.name()) + } + + @nowarn + @Test + def testSettingNameOnTransform(): Unit = { + class TestTransformer extends Transformer[String, String, KeyValue[String, String]] { + override def init(context: ProcessorContext): Unit = {} + + override def transform(key: String, value: String): KeyValue[String, String] = + new KeyValue(s"$key-transformed", s"$value-transformed") + + override def close(): Unit = {} + } + val builder = new StreamsBuilder() + val sourceTopic = "source" + val sinkTopic = "sink" + + val stream = builder.stream[String, String](sourceTopic) + stream + .transform(() => new TestTransformer, Named.as("my-name")) + .to(sinkTopic) + + val transformNode = builder.build().describe().subtopologies().asScala.head.nodes().asScala.toList(1) + assertEquals("my-name", transformNode.name()) + } +} diff --git a/streams/streams-scala/bin/test/org/apache/kafka/streams/scala/kstream/KTableTest.scala b/streams/streams-scala/bin/test/org/apache/kafka/streams/scala/kstream/KTableTest.scala new file mode 100644 index 0000000000000..9e872601ef131 --- /dev/null +++ b/streams/streams-scala/bin/test/org/apache/kafka/streams/scala/kstream/KTableTest.scala @@ -0,0 +1,537 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.streams.scala.kstream + +import org.apache.kafka.streams.kstream.Suppressed.BufferConfig +import org.apache.kafka.streams.kstream.{ + Named, + SlidingWindows, + SessionWindows, + TimeWindows, + Windowed, + Suppressed => JSuppressed +} +import org.apache.kafka.streams.scala.ImplicitConversions._ +import org.apache.kafka.streams.scala.serialization.Serdes._ +import org.apache.kafka.streams.scala.utils.TestDriver +import org.apache.kafka.streams.scala.{ByteArrayKeyValueStore, StreamsBuilder} +import org.junit.jupiter.api.Assertions.{assertEquals, assertNull, assertTrue} +import org.junit.jupiter.api.Test +import java.time.Duration +import java.time.Duration.ofMillis + +import scala.jdk.CollectionConverters._ + +//noinspection ScalaDeprecation +class KTableTest extends TestDriver { + + @Test + def testFilterRecordsSatisfyingPredicate(): Unit = { + val builder = new StreamsBuilder() + val sourceTopic = "source" + val sinkTopic = "sink" + + val table = builder.stream[String, String](sourceTopic).groupBy((key, _) => key).count() + table.filter((key, value) => key.equals("a") && value == 1).toStream.to(sinkTopic) + + val testDriver = createTestDriver(builder) + val testInput = testDriver.createInput[String, String](sourceTopic) + val testOutput = testDriver.createOutput[String, Long](sinkTopic) + + { + testInput.pipeInput("a", "passes filter : add new row to table") + val record = testOutput.readKeyValue + assertEquals("a", record.key) + assertEquals(1, record.value) + } + { + testInput.pipeInput("a", "fails filter : remove existing row from table") + val record = testOutput.readKeyValue + assertEquals("a", record.key) + assertNull(record.value) + } + { + testInput.pipeInput("b", "fails filter : no output") + assertTrue(testOutput.isEmpty) + } + assertTrue(testOutput.isEmpty) + + testDriver.close() + } + + @Test + def testFilterRecordsNotSatisfyingPredicate(): Unit = { + val builder = new StreamsBuilder() + val sourceTopic = "source" + val sinkTopic = "sink" + + val table = builder.stream[String, String](sourceTopic).groupBy((key, _) => key).count() + table.filterNot((_, value) => value > 1).toStream.to(sinkTopic) + + val testDriver = createTestDriver(builder) + val testInput = testDriver.createInput[String, String](sourceTopic) + val testOutput = testDriver.createOutput[String, Long](sinkTopic) + + { + testInput.pipeInput("1", "value1") + val record = testOutput.readKeyValue + assertEquals("1", record.key) + assertEquals(1, record.value) + } + { + testInput.pipeInput("1", "value2") + val record = testOutput.readKeyValue + assertEquals("1", record.key) + assertNull(record.value) + } + { + testInput.pipeInput("2", "value1") + val record = testOutput.readKeyValue + assertEquals("2", record.key) + assertEquals(1, record.value) + } + assertTrue(testOutput.isEmpty) + + testDriver.close() + } + + @Test + def testJoinCorrectlyRecords(): Unit = { + val builder = new StreamsBuilder() + val sourceTopic1 = "source1" + val sourceTopic2 = "source2" + val sinkTopic = "sink" + + val table1 = builder.stream[String, String](sourceTopic1).groupBy((key, _) => key).count() + val table2 = builder.stream[String, String](sourceTopic2).groupBy((key, _) => key).count() + table1.join(table2)((a, b) => a + b).toStream.to(sinkTopic) + + val testDriver = createTestDriver(builder) + val testInput1 = testDriver.createInput[String, String](sourceTopic1) + val testInput2 = testDriver.createInput[String, String](sourceTopic2) + val testOutput = testDriver.createOutput[String, Long](sinkTopic) + + testInput1.pipeInput("1", "topic1value1") + testInput2.pipeInput("1", "topic2value1") + assertEquals(2, testOutput.readValue) + + assertTrue(testOutput.isEmpty) + + testDriver.close() + } + + @Test + def testJoinCorrectlyRecordsAndStateStore(): Unit = { + val builder = new StreamsBuilder() + val sourceTopic1 = "source1" + val sourceTopic2 = "source2" + val sinkTopic = "sink" + val stateStore = "store" + val materialized = Materialized.as[String, Long, ByteArrayKeyValueStore](stateStore) + + val table1 = builder.stream[String, String](sourceTopic1).groupBy((key, _) => key).count() + val table2 = builder.stream[String, String](sourceTopic2).groupBy((key, _) => key).count() + table1.join(table2, materialized)((a, b) => a + b).toStream.to(sinkTopic) + + val testDriver = createTestDriver(builder) + val testInput1 = testDriver.createInput[String, String](sourceTopic1) + val testInput2 = testDriver.createInput[String, String](sourceTopic2) + val testOutput = testDriver.createOutput[String, Long](sinkTopic) + + testInput1.pipeInput("1", "topic1value1") + testInput2.pipeInput("1", "topic2value1") + assertEquals(2, testOutput.readValue) + assertEquals(2, testDriver.getKeyValueStore[String, Long](stateStore).get("1")) + + assertTrue(testOutput.isEmpty) + + testDriver.close() + } + + @Test + def testCorrectlySuppressResultsUsingSuppressedUntilTimeLimit(): Unit = { + val builder = new StreamsBuilder() + val sourceTopic = "source" + val sinkTopic = "sink" + val window = TimeWindows.ofSizeAndGrace(Duration.ofSeconds(1L), Duration.ofHours(24)) + val suppression = JSuppressed.untilTimeLimit[Windowed[String]](Duration.ofSeconds(2L), BufferConfig.unbounded()) + + val table: KTable[Windowed[String], Long] = builder + .stream[String, String](sourceTopic) + .groupByKey + .windowedBy(window) + .count() + .suppress(suppression) + + table.toStream((k, _) => s"${k.window().start()}:${k.window().end()}:${k.key()}").to(sinkTopic) + + val testDriver = createTestDriver(builder) + val testInput = testDriver.createInput[String, String](sourceTopic) + val testOutput = testDriver.createOutput[String, Long](sinkTopic) + + { + // publish key=1 @ time 0 => count==1 + testInput.pipeInput("1", "value1", 0L) + assertTrue(testOutput.isEmpty) + } + { + // publish key=1 @ time 1 => count==2 + testInput.pipeInput("1", "value2", 1L) + assertTrue(testOutput.isEmpty) + } + { + // move event time past the first window, but before the suppression window + testInput.pipeInput("2", "value1", 1001L) + assertTrue(testOutput.isEmpty) + } + { + // move event time riiiight before suppression window ends + testInput.pipeInput("2", "value2", 1999L) + assertTrue(testOutput.isEmpty) + } + { + // publish a late event before suppression window terminates => count==3 + testInput.pipeInput("1", "value3", 999L) + assertTrue(testOutput.isEmpty) + } + { + // move event time right past the suppression window of the first window. + testInput.pipeInput("2", "value3", 2001L) + val record = testOutput.readKeyValue + assertEquals("0:1000:1", record.key) + assertEquals(3L, record.value) + } + assertTrue(testOutput.isEmpty) + + testDriver.close() + } + + @Test + def testCorrectlyGroupByKeyWindowedBySlidingWindow(): Unit = { + val builder = new StreamsBuilder() + val sourceTopic = "source" + val sinkTopic = "sink" + val window = SlidingWindows.ofTimeDifferenceAndGrace(ofMillis(1000L), ofMillis(1000L)) + val suppression = JSuppressed.untilWindowCloses(BufferConfig.unbounded()) + + val table: KTable[Windowed[String], Long] = builder + .stream[String, String](sourceTopic) + .groupByKey + .windowedBy(window) + .count() + .suppress(suppression) + + table.toStream((k, _) => s"${k.window().start()}:${k.window().end()}:${k.key()}").to(sinkTopic) + + val testDriver = createTestDriver(builder) + val testInput = testDriver.createInput[String, String](sourceTopic) + val testOutput = testDriver.createOutput[String, Long](sinkTopic) + + { + // publish key=1 @ time 0 => count==1 + testInput.pipeInput("1", "value1", 0L) + assertTrue(testOutput.isEmpty) + } + { + // move event time right past the grace period of the first window. + testInput.pipeInput("2", "value3", 5001L) + val record = testOutput.readKeyValue + assertEquals("0:1000:1", record.key) + assertEquals(1L, record.value) + } + assertTrue(testOutput.isEmpty) + + testDriver.close() + } + + @Test + def testCorrectlySuppressResultsUsingSuppressedUntilWindowClosesByWindowed(): Unit = { + val builder = new StreamsBuilder() + val sourceTopic = "source" + val sinkTopic = "sink" + val window = TimeWindows.ofSizeAndGrace(Duration.ofSeconds(1L), Duration.ofSeconds(1L)) + val suppression = JSuppressed.untilWindowCloses(BufferConfig.unbounded()) + + val table: KTable[Windowed[String], Long] = builder + .stream[String, String](sourceTopic) + .groupByKey + .windowedBy(window) + .count() + .suppress(suppression) + + table.toStream((k, _) => s"${k.window().start()}:${k.window().end()}:${k.key()}").to(sinkTopic) + + val testDriver = createTestDriver(builder) + val testInput = testDriver.createInput[String, String](sourceTopic) + val testOutput = testDriver.createOutput[String, Long](sinkTopic) + + { + // publish key=1 @ time 0 => count==1 + testInput.pipeInput("1", "value1", 0L) + assertTrue(testOutput.isEmpty) + } + { + // publish key=1 @ time 1 => count==2 + testInput.pipeInput("1", "value2", 1L) + assertTrue(testOutput.isEmpty) + } + { + // move event time past the window, but before the grace period + testInput.pipeInput("2", "value1", 1001L) + assertTrue(testOutput.isEmpty) + } + { + // move event time riiiight before grace period ends + testInput.pipeInput("2", "value2", 1999L) + assertTrue(testOutput.isEmpty) + } + { + // publish a late event before grace period terminates => count==3 + testInput.pipeInput("1", "value3", 999L) + assertTrue(testOutput.isEmpty) + } + { + // move event time right past the grace period of the first window. + testInput.pipeInput("2", "value3", 2001L) + val record = testOutput.readKeyValue + assertEquals("0:1000:1", record.key) + assertEquals(3L, record.value) + } + assertTrue(testOutput.isEmpty) + + testDriver.close() + } + + @Test + def testCorrectlySuppressResultsUsingSuppressedUntilWindowClosesBySession(): Unit = { + val builder = new StreamsBuilder() + val sourceTopic = "source" + val sinkTopic = "sink" + // Very similar to SuppressScenarioTest.shouldSupportFinalResultsForSessionWindows + val window = SessionWindows.ofInactivityGapAndGrace(Duration.ofMillis(5L), Duration.ofMillis(10L)) + val suppression = JSuppressed.untilWindowCloses(BufferConfig.unbounded()) + + val table: KTable[Windowed[String], Long] = builder + .stream[String, String](sourceTopic) + .groupByKey + .windowedBy(window) + .count() + .suppress(suppression) + + table.toStream((k, _) => s"${k.window().start()}:${k.window().end()}:${k.key()}").to(sinkTopic) + + val testDriver = createTestDriver(builder) + val testInput = testDriver.createInput[String, String](sourceTopic) + val testOutput = testDriver.createOutput[String, Long](sinkTopic) + + { + // first window + testInput.pipeInput("k1", "v1", 0L) + assertTrue(testOutput.isEmpty) + } + { + // first window + testInput.pipeInput("k1", "v1", 1L) + assertTrue(testOutput.isEmpty) + } + { + // new window, but grace period hasn't ended for first window + testInput.pipeInput("k1", "v1", 8L) + assertTrue(testOutput.isEmpty) + } + { + // out-of-order event for first window, included since grade period hasn't passed + testInput.pipeInput("k1", "v1", 2L) + assertTrue(testOutput.isEmpty) + } + { + // add to second window + testInput.pipeInput("k1", "v1", 13L) + assertTrue(testOutput.isEmpty) + } + { + // add out-of-order to second window + testInput.pipeInput("k1", "v1", 10L) + assertTrue(testOutput.isEmpty) + } + { + // push stream time forward to flush other events through + testInput.pipeInput("k1", "v1", 30L) + // late event should get dropped from the stream + testInput.pipeInput("k1", "v1", 3L) + // should now have to results + val r1 = testOutput.readRecord + assertEquals("0:2:k1", r1.key) + assertEquals(3L, r1.value) + assertEquals(2L, r1.timestamp) + val r2 = testOutput.readRecord + assertEquals("8:13:k1", r2.key) + assertEquals(3L, r2.value) + assertEquals(13L, r2.timestamp) + } + assertTrue(testOutput.isEmpty) + + testDriver.close() + } + + @Test + def testCorrectlySuppressResultsUsingSuppressedUntilTimeLimtByNonWindowed(): Unit = { + val builder = new StreamsBuilder() + val sourceTopic = "source" + val sinkTopic = "sink" + val suppression = JSuppressed.untilTimeLimit[String](Duration.ofSeconds(2L), BufferConfig.unbounded()) + + val table: KTable[String, Long] = builder + .stream[String, String](sourceTopic) + .groupByKey + .count() + .suppress(suppression) + + table.toStream.to(sinkTopic) + + val testDriver = createTestDriver(builder) + val testInput = testDriver.createInput[String, String](sourceTopic) + val testOutput = testDriver.createOutput[String, Long](sinkTopic) + + { + // publish key=1 @ time 0 => count==1 + testInput.pipeInput("1", "value1", 0L) + assertTrue(testOutput.isEmpty) + } + { + // publish key=1 @ time 1 => count==2 + testInput.pipeInput("1", "value2", 1L) + assertTrue(testOutput.isEmpty) + } + { + // move event time past the window, but before the grace period + testInput.pipeInput("2", "value1", 1001L) + assertTrue(testOutput.isEmpty) + } + { + // move event time right before grace period ends + testInput.pipeInput("2", "value2", 1999L) + assertTrue(testOutput.isEmpty) + } + { + // publish a late event before grace period terminates => count==3 + testInput.pipeInput("1", "value3", 999L) + assertTrue(testOutput.isEmpty) + } + { + // move event time right past the grace period of the first window. + testInput.pipeInput("2", "value3", 2001L) + val record = testOutput.readKeyValue + assertEquals("1", record.key) + assertEquals(3L, record.value) + } + assertTrue(testOutput.isEmpty) + + testDriver.close() + } + + @Test + def testSettingNameOnFilterProcessor(): Unit = { + val builder = new StreamsBuilder() + val sourceTopic = "source" + val sinkTopic = "sink" + + val table = builder.stream[String, String](sourceTopic).groupBy((key, _) => key).count() + table + .filter((key, value) => key.equals("a") && value == 1, Named.as("my-name")) + .toStream + .to(sinkTopic) + + import scala.jdk.CollectionConverters._ + + val filterNode = builder.build().describe().subtopologies().asScala.toList(1).nodes().asScala.toList(3) + assertEquals("my-name", filterNode.name()) + } + + @Test + def testSettingNameOnCountProcessor(): Unit = { + val builder = new StreamsBuilder() + val sourceTopic = "source" + val sinkTopic = "sink" + + val table = builder.stream[String, String](sourceTopic).groupBy((key, _) => key).count(Named.as("my-name")) + table.toStream.to(sinkTopic) + + import scala.jdk.CollectionConverters._ + + val countNode = builder.build().describe().subtopologies().asScala.toList(1).nodes().asScala.toList(1) + assertEquals("my-name", countNode.name()) + } + + @Test + def testSettingNameOnJoinProcessor(): Unit = { + val builder = new StreamsBuilder() + val sourceTopic1 = "source1" + val sourceTopic2 = "source2" + val sinkTopic = "sink" + + val table1 = builder.stream[String, String](sourceTopic1).groupBy((key, _) => key).count() + val table2 = builder.stream[String, String](sourceTopic2).groupBy((key, _) => key).count() + table1 + .join(table2, Named.as("my-name"))((a, b) => a + b) + .toStream + .to(sinkTopic) + + val joinNodeLeft = builder.build().describe().subtopologies().asScala.toList(1).nodes().asScala.toList(6) + val joinNodeRight = builder.build().describe().subtopologies().asScala.toList(1).nodes().asScala.toList(7) + assertTrue(joinNodeLeft.name().contains("my-name")) + assertTrue(joinNodeRight.name().contains("my-name")) + } + + @Test + def testMapValuesWithValueMapperWithMaterialized(): Unit = { + val builder = new StreamsBuilder() + val sourceTopic = "source" + val stateStore = "store" + val materialized = Materialized.as[String, Long, ByteArrayKeyValueStore](stateStore) + + val table = builder.stream[String, String](sourceTopic).toTable + table.mapValues(value => value.length.toLong, materialized) + + val testDriver = createTestDriver(builder) + val testInput = testDriver.createInput[String, String](sourceTopic) + + testInput.pipeInput("1", "topic1value1") + assertEquals(12, testDriver.getKeyValueStore[String, Long](stateStore).get("1")) + + testDriver.close() + } + + @Test + def testMapValuesWithValueMapperWithKeyAndWithMaterialized(): Unit = { + val builder = new StreamsBuilder() + val sourceTopic = "source" + val stateStore = "store" + val materialized = Materialized.as[String, Long, ByteArrayKeyValueStore](stateStore) + + val table = builder.stream[String, String](sourceTopic).toTable + table.mapValues((key, value) => key.length + value.length.toLong, materialized) + + val testDriver = createTestDriver(builder) + val testInput = testDriver.createInput[String, String](sourceTopic) + + testInput.pipeInput("1", "topic1value1") + assertEquals(13, testDriver.getKeyValueStore[String, Long](stateStore).get("1")) + + testDriver.close() + } +} diff --git a/streams/streams-scala/bin/test/org/apache/kafka/streams/scala/kstream/MaterializedTest.scala b/streams/streams-scala/bin/test/org/apache/kafka/streams/scala/kstream/MaterializedTest.scala new file mode 100644 index 0000000000000..9e0c466c26da0 --- /dev/null +++ b/streams/streams-scala/bin/test/org/apache/kafka/streams/scala/kstream/MaterializedTest.scala @@ -0,0 +1,88 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.streams.scala.kstream + +import org.apache.kafka.streams.kstream.internals.MaterializedInternal +import org.apache.kafka.streams.scala._ +import org.apache.kafka.streams.scala.serialization.Serdes +import org.apache.kafka.streams.scala.serialization.Serdes._ +import org.apache.kafka.streams.state.Stores +import org.junit.jupiter.api.Assertions.assertEquals +import org.junit.jupiter.api.Test + +import java.time.Duration + +class MaterializedTest { + + @Test + def testCreateMaterializedWithSerdes(): Unit = { + val materialized: Materialized[String, Long, ByteArrayKeyValueStore] = + Materialized.`with`[String, Long, ByteArrayKeyValueStore] + + val internalMaterialized = new MaterializedInternal(materialized) + assertEquals(Serdes.stringSerde.getClass, internalMaterialized.keySerde.getClass) + assertEquals(Serdes.longSerde.getClass, internalMaterialized.valueSerde.getClass) + } + + @Test + def testCreateMaterializedWithSerdesAndStoreName(): Unit = { + val storeName = "store" + val materialized: Materialized[String, Long, ByteArrayKeyValueStore] = + Materialized.as[String, Long, ByteArrayKeyValueStore](storeName) + + val internalMaterialized = new MaterializedInternal(materialized) + assertEquals(Serdes.stringSerde.getClass, internalMaterialized.keySerde.getClass) + assertEquals(Serdes.longSerde.getClass, internalMaterialized.valueSerde.getClass) + assertEquals(storeName, internalMaterialized.storeName) + } + + @Test + def testCreateMaterializedWithSerdesAndWindowStoreSupplier(): Unit = { + val storeSupplier = Stores.persistentWindowStore("store", Duration.ofMillis(1), Duration.ofMillis(1), true) + val materialized: Materialized[String, Long, ByteArrayWindowStore] = + Materialized.as[String, Long](storeSupplier) + + val internalMaterialized = new MaterializedInternal(materialized) + assertEquals(Serdes.stringSerde.getClass, internalMaterialized.keySerde.getClass) + assertEquals(Serdes.longSerde.getClass, internalMaterialized.valueSerde.getClass) + assertEquals(storeSupplier, internalMaterialized.storeSupplier) + } + + @Test + def testCreateMaterializedWithSerdesAndKeyValueStoreSupplier(): Unit = { + val storeSupplier = Stores.persistentKeyValueStore("store") + val materialized: Materialized[String, Long, ByteArrayKeyValueStore] = + Materialized.as[String, Long](storeSupplier) + + val internalMaterialized = new MaterializedInternal(materialized) + assertEquals(Serdes.stringSerde.getClass, internalMaterialized.keySerde.getClass) + assertEquals(Serdes.longSerde.getClass, internalMaterialized.valueSerde.getClass) + assertEquals(storeSupplier, internalMaterialized.storeSupplier) + } + + @Test + def testCreateMaterializedWithSerdesAndSessionStoreSupplier(): Unit = { + val storeSupplier = Stores.persistentSessionStore("store", Duration.ofMillis(1)) + val materialized: Materialized[String, Long, ByteArraySessionStore] = + Materialized.as[String, Long](storeSupplier) + + val internalMaterialized = new MaterializedInternal(materialized) + assertEquals(Serdes.stringSerde.getClass, internalMaterialized.keySerde.getClass) + assertEquals(Serdes.longSerde.getClass, internalMaterialized.valueSerde.getClass) + assertEquals(storeSupplier, internalMaterialized.storeSupplier) + } +} diff --git a/streams/streams-scala/bin/test/org/apache/kafka/streams/scala/kstream/ProducedTest.scala b/streams/streams-scala/bin/test/org/apache/kafka/streams/scala/kstream/ProducedTest.scala new file mode 100644 index 0000000000000..69c4b1706096f --- /dev/null +++ b/streams/streams-scala/bin/test/org/apache/kafka/streams/scala/kstream/ProducedTest.scala @@ -0,0 +1,49 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.streams.scala.kstream + +import org.apache.kafka.streams.kstream.internals.ProducedInternal +import org.apache.kafka.streams.processor.StreamPartitioner +import org.apache.kafka.streams.scala.serialization.Serdes +import org.apache.kafka.streams.scala.serialization.Serdes._ +import org.junit.jupiter.api.Assertions.assertEquals +import org.junit.jupiter.api.Test + +class ProducedTest { + + @Test + def testCreateProducedWithSerdes(): Unit = { + val produced: Produced[String, Long] = Produced.`with`[String, Long] + + val internalProduced = new ProducedInternal(produced) + assertEquals(Serdes.stringSerde.getClass, internalProduced.keySerde.getClass) + assertEquals(Serdes.longSerde.getClass, internalProduced.valueSerde.getClass) + } + + @Test + def testCreateProducedWithSerdesAndStreamPartitioner(): Unit = { + val partitioner = new StreamPartitioner[String, Long] { + override def partition(topic: String, key: String, value: Long, numPartitions: Int): Integer = 0 + } + val produced: Produced[String, Long] = Produced.`with`(partitioner) + + val internalProduced = new ProducedInternal(produced) + assertEquals(Serdes.stringSerde.getClass, internalProduced.keySerde.getClass) + assertEquals(Serdes.longSerde.getClass, internalProduced.valueSerde.getClass) + assertEquals(partitioner, internalProduced.streamPartitioner) + } +} diff --git a/streams/streams-scala/bin/test/org/apache/kafka/streams/scala/kstream/RepartitionedTest.scala b/streams/streams-scala/bin/test/org/apache/kafka/streams/scala/kstream/RepartitionedTest.scala new file mode 100644 index 0000000000000..4c8d8951b0af6 --- /dev/null +++ b/streams/streams-scala/bin/test/org/apache/kafka/streams/scala/kstream/RepartitionedTest.scala @@ -0,0 +1,90 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.streams.scala.kstream + +import org.apache.kafka.streams.kstream.internals.RepartitionedInternal +import org.apache.kafka.streams.processor.StreamPartitioner +import org.apache.kafka.streams.scala.serialization.Serdes +import org.apache.kafka.streams.scala.serialization.Serdes._ +import org.junit.jupiter.api.Assertions.assertEquals +import org.junit.jupiter.api.Test + +class RepartitionedTest { + + @Test + def testCreateRepartitionedWithSerdes(): Unit = { + val repartitioned: Repartitioned[String, Long] = Repartitioned.`with`[String, Long] + + val internalRepartitioned = new RepartitionedInternal(repartitioned) + assertEquals(Serdes.stringSerde.getClass, internalRepartitioned.keySerde.getClass) + assertEquals(Serdes.longSerde.getClass, internalRepartitioned.valueSerde.getClass) + } + + @Test + def testCreateRepartitionedWithSerdesAndNumPartitions(): Unit = { + val repartitioned: Repartitioned[String, Long] = Repartitioned.`with`[String, Long](5) + + val internalRepartitioned = new RepartitionedInternal(repartitioned) + assertEquals(Serdes.stringSerde.getClass, internalRepartitioned.keySerde.getClass) + assertEquals(Serdes.longSerde.getClass, internalRepartitioned.valueSerde.getClass) + assertEquals(5, internalRepartitioned.numberOfPartitions) + + } + + @Test + def testCreateRepartitionedWithSerdesAndTopicName(): Unit = { + val repartitioned: Repartitioned[String, Long] = Repartitioned.`with`[String, Long]("repartitionTopic") + + val internalRepartitioned = new RepartitionedInternal(repartitioned) + assertEquals(Serdes.stringSerde.getClass, internalRepartitioned.keySerde.getClass) + assertEquals(Serdes.longSerde.getClass, internalRepartitioned.valueSerde.getClass) + assertEquals("repartitionTopic", internalRepartitioned.name) + } + + @Test + def testCreateRepartitionedWithSerdesAndTopicNameAndNumPartitionsAndStreamPartitioner(): Unit = { + val partitioner = new StreamPartitioner[String, Long] { + override def partition(topic: String, key: String, value: Long, numPartitions: Int): Integer = 0 + } + val repartitioned: Repartitioned[String, Long] = Repartitioned.`with`[String, Long](partitioner) + + val internalRepartitioned = new RepartitionedInternal(repartitioned) + assertEquals(Serdes.stringSerde.getClass, internalRepartitioned.keySerde.getClass) + assertEquals(Serdes.longSerde.getClass, internalRepartitioned.valueSerde.getClass) + assertEquals(partitioner, internalRepartitioned.streamPartitioner) + } + + @Test + def testCreateRepartitionedWithTopicNameAndNumPartitionsAndStreamPartitioner(): Unit = { + val partitioner = new StreamPartitioner[String, Long] { + override def partition(topic: String, key: String, value: Long, numPartitions: Int): Integer = 0 + } + val repartitioned: Repartitioned[String, Long] = + Repartitioned + .`with`[String, Long](5) + .withName("repartitionTopic") + .withStreamPartitioner(partitioner) + + val internalRepartitioned = new RepartitionedInternal(repartitioned) + assertEquals(Serdes.stringSerde.getClass, internalRepartitioned.keySerde.getClass) + assertEquals(Serdes.longSerde.getClass, internalRepartitioned.valueSerde.getClass) + assertEquals(5, internalRepartitioned.numberOfPartitions) + assertEquals("repartitionTopic", internalRepartitioned.name) + assertEquals(partitioner, internalRepartitioned.streamPartitioner) + } + +} diff --git a/streams/streams-scala/bin/test/org/apache/kafka/streams/scala/kstream/StreamJoinedTest.scala b/streams/streams-scala/bin/test/org/apache/kafka/streams/scala/kstream/StreamJoinedTest.scala new file mode 100644 index 0000000000000..0717d05604b89 --- /dev/null +++ b/streams/streams-scala/bin/test/org/apache/kafka/streams/scala/kstream/StreamJoinedTest.scala @@ -0,0 +1,69 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.streams.scala.kstream + +import org.apache.kafka.streams.kstream.internals.StreamJoinedInternal +import org.apache.kafka.streams.scala.serialization.Serdes +import org.apache.kafka.streams.scala.serialization.Serdes._ +import org.apache.kafka.streams.state.Stores +import org.junit.jupiter.api.Assertions.assertEquals +import org.junit.jupiter.api.Test + +import java.time.Duration + +class StreamJoinedTest { + + @Test + def testCreateStreamJoinedWithSerdes(): Unit = { + val streamJoined: StreamJoined[String, String, Long] = StreamJoined.`with`[String, String, Long] + + val streamJoinedInternal = new StreamJoinedInternal[String, String, Long](streamJoined) + assertEquals(Serdes.stringSerde.getClass, streamJoinedInternal.keySerde().getClass) + assertEquals(Serdes.stringSerde.getClass, streamJoinedInternal.valueSerde().getClass) + assertEquals(Serdes.longSerde.getClass, streamJoinedInternal.otherValueSerde().getClass) + } + + @Test + def testCreateStreamJoinedWithSerdesAndStoreSuppliers(): Unit = { + val storeSupplier = Stores.inMemoryWindowStore("myStore", Duration.ofMillis(500), Duration.ofMillis(250), false) + + val otherStoreSupplier = + Stores.inMemoryWindowStore("otherStore", Duration.ofMillis(500), Duration.ofMillis(250), false) + + val streamJoined: StreamJoined[String, String, Long] = + StreamJoined.`with`[String, String, Long](storeSupplier, otherStoreSupplier) + + val streamJoinedInternal = new StreamJoinedInternal[String, String, Long](streamJoined) + assertEquals(Serdes.stringSerde.getClass, streamJoinedInternal.keySerde().getClass) + assertEquals(Serdes.stringSerde.getClass, streamJoinedInternal.valueSerde().getClass) + assertEquals(Serdes.longSerde.getClass, streamJoinedInternal.otherValueSerde().getClass) + assertEquals(otherStoreSupplier, streamJoinedInternal.otherStoreSupplier()) + assertEquals(storeSupplier, streamJoinedInternal.thisStoreSupplier()) + } + + @Test + def testCreateStreamJoinedWithSerdesAndStateStoreName(): Unit = { + val streamJoined: StreamJoined[String, String, Long] = StreamJoined.as[String, String, Long]("myStoreName") + + val streamJoinedInternal = new StreamJoinedInternal[String, String, Long](streamJoined) + assertEquals(Serdes.stringSerde.getClass, streamJoinedInternal.keySerde().getClass) + assertEquals(Serdes.stringSerde.getClass, streamJoinedInternal.valueSerde().getClass) + assertEquals(Serdes.longSerde.getClass, streamJoinedInternal.otherValueSerde().getClass) + assertEquals("myStoreName", streamJoinedInternal.storeName()) + } + +} diff --git a/streams/streams-scala/bin/test/org/apache/kafka/streams/scala/utils/StreamToTableJoinScalaIntegrationTestBase.scala b/streams/streams-scala/bin/test/org/apache/kafka/streams/scala/utils/StreamToTableJoinScalaIntegrationTestBase.scala new file mode 100644 index 0000000000000..984cb74a6e2fd --- /dev/null +++ b/streams/streams-scala/bin/test/org/apache/kafka/streams/scala/utils/StreamToTableJoinScalaIntegrationTestBase.scala @@ -0,0 +1,145 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.streams.scala.utils + +import java.util.Properties +import org.apache.kafka.clients.consumer.ConsumerConfig +import org.apache.kafka.clients.producer.ProducerConfig +import org.apache.kafka.common.serialization._ +import org.apache.kafka.common.utils.{MockTime, Utils} +import org.apache.kafka.streams._ +import org.apache.kafka.streams.integration.utils.{EmbeddedKafkaCluster, IntegrationTestUtils} +import org.apache.kafka.test.TestUtils +import org.junit.jupiter.api._ + +import java.io.File + +/** + * Test suite base that prepares Kafka cluster for stream-table joins in Kafka Streams + *

+ */ +@Tag("integration") +class StreamToTableJoinScalaIntegrationTestBase extends StreamToTableJoinTestData { + + private val cluster: EmbeddedKafkaCluster = new EmbeddedKafkaCluster(1) + + final private val alignedTime = (System.currentTimeMillis() / 1000 + 1) * 1000 + private val mockTime: MockTime = cluster.time + mockTime.setCurrentTimeMs(alignedTime) + + private val testFolder: File = TestUtils.tempDirectory() + + @BeforeEach + def startKafkaCluster(): Unit = { + cluster.start() + cluster.createTopic(userClicksTopic) + cluster.createTopic(userRegionsTopic) + cluster.createTopic(outputTopic) + cluster.createTopic(userClicksTopicJ) + cluster.createTopic(userRegionsTopicJ) + cluster.createTopic(outputTopicJ) + } + + @AfterEach + def stopKafkaCluster(): Unit = { + cluster.stop() + Utils.delete(testFolder) + } + + def getStreamsConfiguration(): Properties = { + val streamsConfiguration: Properties = new Properties() + + streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG, "stream-table-join-scala-integration-test") + streamsConfiguration.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, cluster.bootstrapServers()) + streamsConfiguration.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, "1000") + streamsConfiguration.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest") + streamsConfiguration.put(StreamsConfig.STATE_DIR_CONFIG, testFolder.getPath) + + streamsConfiguration + } + + private def getUserRegionsProducerConfig(): Properties = { + val p = new Properties() + p.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, cluster.bootstrapServers()) + p.put(ProducerConfig.ACKS_CONFIG, "all") + p.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer]) + p.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer]) + p + } + + private def getUserClicksProducerConfig(): Properties = { + val p = new Properties() + p.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, cluster.bootstrapServers()) + p.put(ProducerConfig.ACKS_CONFIG, "all") + p.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer]) + p.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, classOf[LongSerializer]) + p + } + + private def getConsumerConfig(): Properties = { + val p = new Properties() + p.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, cluster.bootstrapServers()) + p.put(ConsumerConfig.GROUP_ID_CONFIG, "join-scala-integration-test-standard-consumer") + p.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest") + p.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, classOf[StringDeserializer]) + p.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, classOf[LongDeserializer]) + p + } + + def produceNConsume( + userClicksTopic: String, + userRegionsTopic: String, + outputTopic: String, + waitTillRecordsReceived: Boolean = true + ): java.util.List[KeyValue[String, Long]] = { + + import _root_.scala.jdk.CollectionConverters._ + + // Publish user-region information. + val userRegionsProducerConfig: Properties = getUserRegionsProducerConfig() + IntegrationTestUtils.produceKeyValuesSynchronously( + userRegionsTopic, + userRegions.asJava, + userRegionsProducerConfig, + mockTime, + false + ) + + // Publish user-click information. + val userClicksProducerConfig: Properties = getUserClicksProducerConfig() + IntegrationTestUtils.produceKeyValuesSynchronously( + userClicksTopic, + userClicks.asJava, + userClicksProducerConfig, + mockTime, + false + ) + + if (waitTillRecordsReceived) { + // consume and verify result + val consumerConfig = getConsumerConfig() + + IntegrationTestUtils.waitUntilFinalKeyValueRecordsReceived( + consumerConfig, + outputTopic, + expectedClicksPerRegion.asJava + ) + } else { + java.util.Collections.emptyList() + } + } +} diff --git a/streams/streams-scala/bin/test/org/apache/kafka/streams/scala/utils/StreamToTableJoinTestData.scala b/streams/streams-scala/bin/test/org/apache/kafka/streams/scala/utils/StreamToTableJoinTestData.scala new file mode 100644 index 0000000000000..29d06953e75a8 --- /dev/null +++ b/streams/streams-scala/bin/test/org/apache/kafka/streams/scala/utils/StreamToTableJoinTestData.scala @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.streams.scala.utils + +import org.apache.kafka.streams.KeyValue + +trait StreamToTableJoinTestData { + val brokers = "localhost:9092" + + val userClicksTopic = s"user-clicks" + val userRegionsTopic = s"user-regions" + val outputTopic = s"output-topic" + + val userClicksTopicJ = s"user-clicks-j" + val userRegionsTopicJ = s"user-regions-j" + val outputTopicJ = s"output-topic-j" + + // Input 1: Clicks per user (multiple records allowed per user). + val userClicks: Seq[KeyValue[String, Long]] = Seq( + new KeyValue("alice", 13L), + new KeyValue("bob", 4L), + new KeyValue("chao", 25L), + new KeyValue("bob", 19L), + new KeyValue("dave", 56L), + new KeyValue("eve", 78L), + new KeyValue("alice", 40L), + new KeyValue("fang", 99L) + ) + + // Input 2: Region per user (multiple records allowed per user). + val userRegions: Seq[KeyValue[String, String]] = Seq( + new KeyValue("alice", "asia"), /* Alice lived in Asia originally... */ + new KeyValue("bob", "americas"), + new KeyValue("chao", "asia"), + new KeyValue("dave", "europe"), + new KeyValue("alice", "europe"), /* ...but moved to Europe some time later. */ + new KeyValue("eve", "americas"), + new KeyValue("fang", "asia") + ) + + val expectedClicksPerRegion: Seq[KeyValue[String, Long]] = Seq( + new KeyValue("americas", 101L), + new KeyValue("europe", 109L), + new KeyValue("asia", 124L) + ) +} diff --git a/streams/streams-scala/bin/test/org/apache/kafka/streams/scala/utils/TestDriver.scala b/streams/streams-scala/bin/test/org/apache/kafka/streams/scala/utils/TestDriver.scala new file mode 100644 index 0000000000000..23a24178fb941 --- /dev/null +++ b/streams/streams-scala/bin/test/org/apache/kafka/streams/scala/utils/TestDriver.scala @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.streams.scala.utils + +import java.time.Instant +import java.util.Properties + +import org.apache.kafka.common.serialization.Serde +import org.apache.kafka.streams.scala.StreamsBuilder +import org.apache.kafka.streams.{StreamsConfig, TestInputTopic, TestOutputTopic, TopologyTestDriver} +import org.apache.kafka.test.TestUtils + +trait TestDriver { + def createTestDriver(builder: StreamsBuilder, initialWallClockTime: Instant = Instant.now()): TopologyTestDriver = { + val config = new Properties() + config.put(StreamsConfig.STATE_DIR_CONFIG, TestUtils.tempDirectory().getPath) + new TopologyTestDriver(builder.build(), config, initialWallClockTime) + } + + implicit class TopologyTestDriverOps(inner: TopologyTestDriver) { + def createInput[K, V](topic: String)(implicit serdeKey: Serde[K], serdeValue: Serde[V]): TestInputTopic[K, V] = + inner.createInputTopic(topic, serdeKey.serializer, serdeValue.serializer) + + def createOutput[K, V](topic: String)(implicit serdeKey: Serde[K], serdeValue: Serde[V]): TestOutputTopic[K, V] = + inner.createOutputTopic(topic, serdeKey.deserializer, serdeValue.deserializer) + } +} diff --git a/streams/test-utils/bin/test/log4j.properties b/streams/test-utils/bin/test/log4j.properties new file mode 100644 index 0000000000000..be36f90299a77 --- /dev/null +++ b/streams/test-utils/bin/test/log4j.properties @@ -0,0 +1,21 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +log4j.rootLogger=INFO, stdout + +log4j.appender.stdout=org.apache.log4j.ConsoleAppender +log4j.appender.stdout.layout=org.apache.log4j.PatternLayout +log4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c:%L)%n + +log4j.logger.org.apache.kafka=INFO