Skip to content

Commit c8f939e

Browse files
authored
Merge pull request #102 from mlitvk/cdc_with_tablets
support CDC with tablets
2 parents 215f90a + c53bf33 commit c8f939e

File tree

27 files changed

+2050
-193
lines changed

27 files changed

+2050
-193
lines changed

scylla-cdc-base/src/main/java/com/scylladb/cdc/cql/BaseMasterCQL.java

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,13 +13,18 @@
1313
import com.scylladb.cdc.model.StreamId;
1414
import com.scylladb.cdc.model.Timestamp;
1515
import com.scylladb.cdc.model.master.GenerationMetadata;
16+
import com.scylladb.cdc.model.TableName;
1617

1718
public abstract class BaseMasterCQL implements MasterCQL {
1819

1920
protected abstract CompletableFuture<Optional<Date>> fetchSmallestGenerationAfter(Date after);
2021

2122
protected abstract CompletableFuture<Set<ByteBuffer>> fetchStreamsForGeneration(Date generationStart);
2223

24+
protected abstract CompletableFuture<Optional<Date>> fetchSmallestTableGenerationAfter(TableName table, Date after);
25+
26+
protected abstract CompletableFuture<Set<ByteBuffer>> fetchStreamsForTableGeneration(TableName table, Date generationStart);
27+
2328
@Override
2429
public CompletableFuture<Optional<GenerationId>> fetchFirstGenerationId() {
2530
return fetchSmallestGenerationAfter(new Date(0))
@@ -43,4 +48,27 @@ public CompletableFuture<Optional<Timestamp>> fetchGenerationEnd(GenerationId id
4348
return fetchSmallestGenerationAfter(id.getGenerationStart().toDate()).thenApply(opt -> opt.map(Timestamp::new));
4449
}
4550

51+
@Override
52+
public CompletableFuture<GenerationId> fetchFirstTableGenerationId(TableName table) {
53+
return fetchSmallestTableGenerationAfter(table, new Date(0))
54+
.thenApply(opt -> opt.map(t -> new GenerationId(new Timestamp(t)))
55+
.orElseThrow(() -> new IllegalStateException("No generation found for table: " + table)));
56+
}
57+
58+
@Override
59+
public CompletableFuture<GenerationMetadata> fetchTableGenerationMetadata(TableName table, GenerationId generationId) {
60+
CompletableFuture<Optional<Timestamp>> endFut = fetchTableGenerationEnd(table, generationId);
61+
CompletableFuture<Set<ByteBuffer>> streamsFut = fetchStreamsForTableGeneration(table, generationId.getGenerationStart().toDate());
62+
63+
return endFut.thenCombine(streamsFut, (end, streams) -> {
64+
return new GenerationMetadata(generationId.getGenerationStart(), end, convertStreams(streams));
65+
});
66+
}
67+
68+
@Override
69+
public CompletableFuture<Optional<Timestamp>> fetchTableGenerationEnd(TableName table, GenerationId generationId) {
70+
return fetchSmallestTableGenerationAfter(table, generationId.getGenerationStart().toDate())
71+
.thenApply(opt -> opt.map(t -> new Timestamp(t)));
72+
}
73+
4674
}

scylla-cdc-base/src/main/java/com/scylladb/cdc/cql/MasterCQL.java

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,18 @@
99
import com.scylladb.cdc.model.master.GenerationMetadata;
1010

1111
public interface MasterCQL {
12+
13+
CompletableFuture<Optional<Long>> fetchTableTTL(TableName tableName);
14+
CompletableFuture<Optional<Throwable>> validateTable(TableName table);
15+
Boolean usesTablets(TableName tableName);
16+
17+
// Vnode-based CDC methods
1218
CompletableFuture<Optional<GenerationId>> fetchFirstGenerationId();
1319
CompletableFuture<GenerationMetadata> fetchGenerationMetadata(GenerationId id);
1420
CompletableFuture<Optional<Timestamp>> fetchGenerationEnd(GenerationId id);
15-
CompletableFuture<Optional<Long>> fetchTableTTL(TableName tableName);
16-
CompletableFuture<Optional<Throwable>> validateTable(TableName table);
21+
22+
// Tablet-based CDC methods
23+
CompletableFuture<GenerationId> fetchFirstTableGenerationId(TableName table);
24+
CompletableFuture<GenerationMetadata> fetchTableGenerationMetadata(TableName table, GenerationId generationId);
25+
CompletableFuture<Optional<Timestamp>> fetchTableGenerationEnd(TableName table, GenerationId generationId);
1726
}
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
package com.scylladb.cdc.model.master;
2+
3+
import java.util.concurrent.ExecutionException;
4+
5+
/**
6+
* Abstraction for CDC metadata model.
7+
* Implementations: Generation-based (vnode) and Tablet-based.
8+
*/
9+
public interface CDCMetadataModel {
10+
11+
/**
12+
* Runs the master loop until an exception occurs or the thread is interrupted.
13+
*/
14+
void runMasterLoop() throws InterruptedException, ExecutionException;
15+
16+
}
Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
package com.scylladb.cdc.model.master;
2+
3+
import java.util.ArrayList;
4+
import java.util.Comparator;
5+
import java.util.Date;
6+
import java.util.HashMap;
7+
import java.util.List;
8+
import java.util.Map;
9+
import java.util.Optional;
10+
import java.util.Set;
11+
import java.util.SortedSet;
12+
import java.util.TreeSet;
13+
import java.util.concurrent.ExecutionException;
14+
15+
import com.google.common.base.Preconditions;
16+
import com.google.common.flogger.FluentLogger;
17+
import com.scylladb.cdc.model.GenerationId;
18+
import com.scylladb.cdc.model.StreamId;
19+
import com.scylladb.cdc.model.TableName;
20+
import com.scylladb.cdc.model.TaskId;
21+
import com.scylladb.cdc.model.Timestamp;
22+
import com.scylladb.cdc.transport.GroupedTasks;
23+
24+
public class GenerationBasedCDCMetadataModel implements CDCMetadataModel {
25+
private static final FluentLogger logger = FluentLogger.forEnclosingClass();
26+
27+
private final MasterConfiguration masterConfiguration;
28+
29+
public GenerationBasedCDCMetadataModel(MasterConfiguration masterConfiguration) {
30+
this.masterConfiguration = Preconditions.checkNotNull(masterConfiguration);
31+
}
32+
33+
private GenerationId getGenerationId() throws InterruptedException, ExecutionException {
34+
Optional<GenerationId> generationId = masterConfiguration.transport.getCurrentGenerationId();
35+
if (generationId.isPresent()) {
36+
return generationId.get();
37+
}
38+
while (true) {
39+
generationId = masterConfiguration.cql.fetchFirstGenerationId().get();
40+
if (generationId.isPresent()) {
41+
return generationId.get();
42+
}
43+
Thread.sleep(masterConfiguration.sleepBeforeFirstGenerationMs);
44+
}
45+
}
46+
47+
private boolean generationDone(GenerationMetadata generation, Set<TaskId> tasks) throws ExecutionException, InterruptedException {
48+
if (!generation.isClosed()) {
49+
return false;
50+
}
51+
52+
if (generationTTLExpired(generation)) {
53+
return true;
54+
}
55+
56+
return masterConfiguration.transport.areTasksFullyConsumedUntil(tasks, generation.getEnd().get());
57+
}
58+
59+
private boolean generationTTLExpired(GenerationMetadata generation) throws ExecutionException, InterruptedException {
60+
// Check the CDC tables TTL values.
61+
//
62+
// By default the TTL value is relatively
63+
// small (24 hours), which means that we
64+
// could safely skip some older generations
65+
// (the changes in them have already
66+
// expired).
67+
Date now = Date.from(masterConfiguration.clock.instant());
68+
List<Optional<Long>> tablesTTL = new ArrayList<>();
69+
for (TableName table : masterConfiguration.tables) {
70+
// In case fetching the TTL value was unsuccessful,
71+
// assume that no TTL is set on a table. This way
72+
// the generation will not expire. By "catching"
73+
// the exception here, one "bad" table will
74+
// not disturb the entire master process.
75+
Optional<Long> ttl = masterConfiguration.cql.fetchTableTTL(table).exceptionally(ex -> {
76+
logger.atSevere().withCause(ex).log("Error while fetching TTL " +
77+
"value for table %s.%s", table.keyspace, table.name);
78+
return Optional.empty();
79+
}).get();
80+
tablesTTL.add(ttl);
81+
}
82+
83+
// If tablesTTL is empty or contains a table with TTL disabled,
84+
// use new Date(0) value - meaning there is no lower bound
85+
// of row timestamps the table could possibly contain.
86+
Date lastVisibleChanges = tablesTTL.stream()
87+
// getTime() is in milliseconds, TTL is in seconds
88+
.map(t -> t.map(ttl -> new Date(now.getTime() - 1000L * ttl)).orElse(new Date(0)))
89+
.min(Comparator.naturalOrder())
90+
.orElse(new Date(0));
91+
92+
return lastVisibleChanges.after(generation.getEnd().get().toDate());
93+
}
94+
95+
private GenerationMetadata getNextGeneration(GenerationMetadata generation)
96+
throws InterruptedException, ExecutionException {
97+
return masterConfiguration.cql.fetchGenerationMetadata(generation.getNextGenerationId().get()).get();
98+
}
99+
100+
private GroupedTasks createTasks(GenerationMetadata generation) {
101+
SortedSet<StreamId> streams = generation.getStreams();
102+
Map<TaskId, SortedSet<StreamId>> tasks = new HashMap<>();
103+
for (StreamId s : streams) {
104+
for (TableName t : masterConfiguration.tables) {
105+
TaskId taskId = new TaskId(generation.getId(), s.getVNodeId(), t);
106+
tasks.computeIfAbsent(taskId, id -> new TreeSet<>()).add(s);
107+
}
108+
}
109+
return new GroupedTasks(tasks, generation);
110+
}
111+
112+
private GenerationMetadata refreshEnd(GenerationMetadata generation)
113+
throws InterruptedException, ExecutionException {
114+
Optional<Timestamp> end = masterConfiguration.cql.fetchGenerationEnd(generation.getId()).get();
115+
return end.isPresent() ? generation.withEnd(end.get()) : generation;
116+
}
117+
118+
@Override
119+
public void runMasterLoop() throws InterruptedException, ExecutionException {
120+
GenerationId generationId = getGenerationId();
121+
GenerationMetadata generation = masterConfiguration.cql.fetchGenerationMetadata(generationId).get();
122+
GroupedTasks tasks = createTasks(generation);
123+
while (!Thread.currentThread().isInterrupted()) {
124+
while (generationDone(generation, tasks.getTaskIds())) {
125+
generation = getNextGeneration(generation);
126+
tasks = createTasks(generation);
127+
}
128+
129+
logger.atInfo().log("Master found a new generation: %s. Will call transport.configureWorkers().", generation.getId());
130+
tasks.getTasks().forEach((task, streams) ->
131+
logger.atFine().log("Created Task: %s with streams: %s", task, streams));
132+
133+
masterConfiguration.transport.configureWorkers(tasks);
134+
while (!generationDone(generation, tasks.getTaskIds())) {
135+
Thread.sleep(masterConfiguration.sleepBeforeGenerationDoneMs);
136+
if (!generation.isClosed()) {
137+
generation = refreshEnd(generation);
138+
}
139+
}
140+
}
141+
}
142+
}

0 commit comments

Comments
 (0)