Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix issues found after integration tests #79

Merged
merged 29 commits into from
Mar 25, 2025
Merged
Show file tree
Hide file tree
Changes from 26 commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
package com.sneaksanddata.arcane.framework
package services.streaming.base

import services.mssql.MsSqlConnection.BackfillBatch

import com.sneaksanddata.arcane.framework.services.consumers.{StagedBackfillBatch, StagedBackfillOverwriteBatch}
import zio.Task

/**
* A trait that represents a backfill data provider.
*/
trait BackfillStreamingDataProvider:

/**
* Provides the backfill data.
*
* @return A task that represents the backfill data.
*/
def requestBackfill: Task[StagedBackfillOverwriteBatch]
2 changes: 2 additions & 0 deletions src/main/scala/services/streaming/base/BatchProcessor.scala
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
package com.sneaksanddata.arcane.framework
package services.streaming.base

import org.apache.hc.core5.annotation.Obsolete
import zio.stream.ZPipeline

/**
* A trait that represents a batch processor.
* @tparam IncomingType The type of the incoming data.
*/
@Obsolete
trait BatchProcessor[IncomingType, OutgoingType] {

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ import models.app.StreamContext
import models.settings.{BackfillBehavior, BackfillSettings}
import services.streaming.base.StreamingGraphBuilder
import services.streaming.graph_builders.backfill.{GenericBackfillMergeGraphBuilder, GenericBackfillOverwriteGraphBuilder}
import services.streaming.graph_builders.base.GenericStreamingGraphBuilder

import zio.{ZIO, ZLayer}

Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
package com.sneaksanddata.arcane.framework
package services.streaming.graph_builders.base
package services.streaming.graph_builders

import services.app.base.StreamLifetimeService
import services.streaming.base.{HookManager, StreamDataProvider, StreamingGraphBuilder}
import services.streaming.processors.GenericGroupingTransformer
import services.streaming.processors.batch_processors.streaming.{DisposeBatchProcessor, MergeBatchProcessor}
import services.streaming.processors.batch_processors.BackfillApplyBatchProcessor
import services.streaming.processors.transformers.{FieldFilteringTransformer, StagingProcessor}

import zio.stream.ZStream
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
package com.sneaksanddata.arcane.framework
package services.streaming.processors.batch_processors

import logging.ZIOLogAnnotations.*
import models.settings.*
import services.base.MergeServiceClient
import services.merging.JdbcTableManager
import services.streaming.base.{BatchProcessor, OptimizationRequestConvertable, OrphanFilesExpirationRequestConvertable, SnapshotExpirationRequestConvertable, StagedBatchProcessor, StreamingBatchProcessor}

import com.sneaksanddata.arcane.framework.services.consumers.{MergeableBatch, StagedBackfillBatch, StagedBackfillOverwriteBatch}
import com.sneaksanddata.arcane.framework.services.streaming.processors.transformers.IndexedStagedBatches
import zio.stream.ZPipeline
import zio.{ZIO, ZLayer}

/**
* The streaming batch processor that processes the Backfill batches produced by the backfill data provider running in
* the backfill mode with the backfill behavior set to overwrite.
*/
class BackfillApplyBatchProcessor(mergeServiceClient: MergeServiceClient, tableManager: JdbcTableManager, targetTableSettings: TargetTableSettings)
extends StreamingBatchProcessor:

override type BatchType = StagedBackfillOverwriteBatch

/**
* Processes the incoming data.
*
* @return ZPipeline (stream source for the stream graph).
*/
override def process: ZPipeline[Any, Throwable, BatchType, BatchType] =
ZPipeline.mapZIO(batch =>
for _ <- zlog(s"Applying backfill batch to ${batch.targetTableName}")
_ <- tableManager.migrateSchema(batch.schema, batch.targetTableName)
_ <- mergeServiceClient.applyBatch(batch)
yield batch
)

object BackfillApplyBatchProcessor:

/**
* Factory method to create MergeProcessor
*
* @param mergeServiceClient The JDBC consumer.
* @param tableManager The table manager.
* @param targetTableSettings The target table settings.
* @return The initialized MergeProcessor instance
*/
def apply(mergeServiceClient: MergeServiceClient, tableManager: JdbcTableManager, targetTableSettings: TargetTableSettings): BackfillApplyBatchProcessor =
new BackfillApplyBatchProcessor(mergeServiceClient, tableManager, targetTableSettings)

/**
* The required environment for the BackfillMergeBatchProcessor.
*/
type Environment = MergeServiceClient & JdbcTableManager & TargetTableSettings

/**
* The ZLayer that creates the MergeProcessor.
*/
val layer: ZLayer[Environment, Nothing, BackfillApplyBatchProcessor] =
ZLayer {
for
jdbcConsumer <- ZIO.service[MergeServiceClient]
parallelismSettings <- ZIO.service[JdbcTableManager]
targetTableSettings <- ZIO.service[TargetTableSettings]
yield BackfillApplyBatchProcessor(jdbcConsumer, parallelismSettings, targetTableSettings)
}
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,18 @@ import services.consumers.SqlServerChangeTrackingMergeBatch
import services.filters.FieldsFilteringService
import services.lakehouse.base.CatalogWriter
import services.merging.JdbcTableManager
import services.streaming.base.{HookManager, StreamDataProvider}
import services.streaming.graph_builders.base.GenericStreamingGraphBuilder
import services.streaming.base.{BackfillDataProvider, BackfillStreamingDataProvider, HookManager, StreamDataProvider}
import services.streaming.processors.GenericGroupingTransformer
import services.streaming.processors.batch_processors.streaming.{DisposeBatchProcessor, MergeBatchProcessor}
import services.streaming.processors.batch_processors.BackfillApplyBatchProcessor
import services.streaming.processors.transformers.FieldFilteringTransformer.Environment
import services.streaming.processors.transformers.{FieldFilteringTransformer, StagingProcessor}
import services.streaming.processors.utils.TestIndexedStagedBatches
import utils.*

import com.sneaksanddata.arcane.framework.models.app.StreamContext
import com.sneaksanddata.arcane.framework.services.streaming.graph_builders.GenericStreamingGraphBuilder
import com.sneaksanddata.arcane.framework.services.streaming.graph_builders.backfill.GenericBackfillOverwriteGraphBuilder
import org.apache.iceberg.rest.RESTCatalog
import org.apache.iceberg.{Schema, Table}
import org.easymock.EasyMock
Expand Down Expand Up @@ -47,6 +49,7 @@ class GenericStreamRunnerServiceTests extends AsyncFlatSpec with Matchers with E
val jdbcTableManager = mock[JdbcTableManager]
val hookManager = mock[HookManager]
val streamDataProvider = mock[StreamDataProvider]
val backfillDataProvider = mock[BackfillStreamingDataProvider]

val catalogWriter = mock[CatalogWriter[RESTCatalog, Table, Schema]]
val tableMock = mock[Table]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ import com.sneaksanddata.arcane.framework.models.settings.{BackfillBehavior, Bac
import com.sneaksanddata.arcane.framework.services.app.base.StreamLifetimeService
import com.sneaksanddata.arcane.framework.services.streaming.base.{BackfillStreamingMergeDataProvider, BackfillStreamingOverwriteDataProvider, HookManager, StreamDataProvider, StreamingGraphBuilder}
import com.sneaksanddata.arcane.framework.services.streaming.graph_builders.backfill.{GenericBackfillMergeGraphBuilder, GenericBackfillOverwriteGraphBuilder}
import com.sneaksanddata.arcane.framework.services.streaming.graph_builders.base.GenericStreamingGraphBuilder
import com.sneaksanddata.arcane.framework.services.streaming.graph_builders.GenericStreamingGraphBuilder
import com.sneaksanddata.arcane.framework.services.streaming.processors.GenericGroupingTransformer
import com.sneaksanddata.arcane.framework.services.streaming.processors.batch_processors.backfill.BackfillApplyBatchProcessor
import com.sneaksanddata.arcane.framework.services.streaming.processors.batch_processors.streaming.{DisposeBatchProcessor, MergeBatchProcessor}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,11 @@ import services.consumers.SynapseLinkMergeBatch
import services.merging.JdbcTableManager
import services.streaming.processors.batch_processors.streaming.MergeBatchProcessor
import services.streaming.processors.utils.TestIndexedStagedBatches
import services.merging.models.{JdbcOptimizationRequest, JdbcOrphanFilesExpirationRequest, JdbcSnapshotExpirationRequest}
import services.streaming.base.{OptimizationRequestConvertable, OrphanFilesExpirationRequestConvertable, SnapshotExpirationRequestConvertable}
import services.streaming.processors.batch_processors.BackfillApplyBatchProcessor
import services.streaming.processors.transformers.IndexedStagedBatches

import com.sneaksanddata.arcane.framework.utils.{TablePropertiesSettings, TestTargetTableSettings, TestTargetTableSettingsWithMaintenance}

import org.easymock.EasyMock
Expand Down
Loading