Skip to content

Commit 248048a

Browse files
authored
fix: prevent potential thread starvation in SessionPool (#85)
* fix: prevent thread starvation in session pool * fix: prevent test from blocking on close * fix: wait for executor to shutdown
1 parent b096651 commit 248048a

File tree

3 files changed

+271
-13
lines changed

3 files changed

+271
-13
lines changed

google-cloud-spanner/src/main/java/com/google/cloud/spanner/SessionPool.java

Lines changed: 35 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@
5252
import com.google.common.util.concurrent.ListenableFuture;
5353
import com.google.common.util.concurrent.MoreExecutors;
5454
import com.google.common.util.concurrent.SettableFuture;
55+
import com.google.common.util.concurrent.ThreadFactoryBuilder;
5556
import com.google.common.util.concurrent.Uninterruptibles;
5657
import com.google.protobuf.Empty;
5758
import io.opencensus.common.Scope;
@@ -75,9 +76,11 @@
7576
import java.util.Queue;
7677
import java.util.Random;
7778
import java.util.Set;
79+
import java.util.concurrent.Executors;
7880
import java.util.concurrent.ScheduledExecutorService;
7981
import java.util.concurrent.ScheduledFuture;
8082
import java.util.concurrent.SynchronousQueue;
83+
import java.util.concurrent.ThreadPoolExecutor;
8184
import java.util.concurrent.TimeUnit;
8285
import java.util.concurrent.atomic.AtomicLong;
8386
import java.util.logging.Level;
@@ -1089,6 +1092,7 @@ private static enum Position {
10891092
private final SessionClient sessionClient;
10901093
private final ScheduledExecutorService executor;
10911094
private final ExecutorFactory<ScheduledExecutorService> executorFactory;
1095+
private final ScheduledExecutorService prepareExecutor;
10921096
final PoolMaintainer poolMaintainer;
10931097
private final Clock clock;
10941098
private final Object lock = new Object();
@@ -1209,6 +1213,19 @@ private SessionPool(
12091213
this.options = options;
12101214
this.executorFactory = executorFactory;
12111215
this.executor = executor;
1216+
int prepareThreads;
1217+
if (executor instanceof ThreadPoolExecutor) {
1218+
prepareThreads = Math.max(((ThreadPoolExecutor) executor).getCorePoolSize(), 1);
1219+
} else {
1220+
prepareThreads = 8;
1221+
}
1222+
this.prepareExecutor =
1223+
Executors.newScheduledThreadPool(
1224+
prepareThreads,
1225+
new ThreadFactoryBuilder()
1226+
.setDaemon(true)
1227+
.setNameFormat("session-pool-prepare-%d")
1228+
.build());
12121229
this.sessionClient = sessionClient;
12131230
this.clock = clock;
12141231
this.poolMaintainer = new PoolMaintainer();
@@ -1630,11 +1647,27 @@ ListenableFuture<Void> closeAsync() {
16301647
closureFuture = SettableFuture.create();
16311648
retFuture = closureFuture;
16321649
pendingClosure =
1633-
totalSessions() + numSessionsBeingCreated + 1 /* For pool maintenance thread */;
1650+
totalSessions()
1651+
+ numSessionsBeingCreated
1652+
+ 2 /* For pool maintenance thread + prepareExecutor */;
16341653

16351654
poolMaintainer.close();
16361655
readSessions.clear();
16371656
writePreparedSessions.clear();
1657+
prepareExecutor.shutdown();
1658+
executor.submit(
1659+
new Runnable() {
1660+
@Override
1661+
public void run() {
1662+
try {
1663+
prepareExecutor.awaitTermination(5L, TimeUnit.SECONDS);
1664+
} catch (Throwable t) {
1665+
}
1666+
synchronized (lock) {
1667+
decrementPendingClosures(1);
1668+
}
1669+
}
1670+
});
16381671
for (final PooledSession session : ImmutableList.copyOf(allSessions)) {
16391672
if (session.leakedException != null) {
16401673
logger.log(Level.WARNING, "Leaked session", session.leakedException);
@@ -1712,7 +1745,7 @@ private void prepareSession(final PooledSession sess) {
17121745
synchronized (lock) {
17131746
numSessionsBeingPrepared++;
17141747
}
1715-
executor.submit(
1748+
prepareExecutor.submit(
17161749
new Runnable() {
17171750
@Override
17181751
public void run() {

google-cloud-spanner/src/main/java/com/google/cloud/spanner/SpannerImpl.java

Lines changed: 18 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,8 @@
3939
import java.util.List;
4040
import java.util.Map;
4141
import java.util.concurrent.ExecutionException;
42+
import java.util.concurrent.TimeUnit;
43+
import java.util.concurrent.TimeoutException;
4244
import java.util.logging.Level;
4345
import java.util.logging.Logger;
4446
import javax.annotation.Nullable;
@@ -172,6 +174,10 @@ public BatchClient getBatchClient(DatabaseId db) {
172174

173175
@Override
174176
public void close() {
177+
close(Long.MAX_VALUE, TimeUnit.MILLISECONDS);
178+
}
179+
180+
void close(long timeout, TimeUnit unit) {
175181
List<ListenableFuture<Void>> closureFutures = null;
176182
synchronized (this) {
177183
Preconditions.checkState(!spannerIsClosed, "Cloud Spanner client has been closed");
@@ -184,18 +190,19 @@ public void close() {
184190
dbClients.clear();
185191
}
186192
try {
187-
Futures.successfulAsList(closureFutures).get();
188-
} catch (InterruptedException | ExecutionException e) {
193+
Futures.successfulAsList(closureFutures).get(timeout, unit);
194+
} catch (InterruptedException | ExecutionException | TimeoutException e) {
189195
throw SpannerExceptionFactory.newSpannerException(e);
190-
}
191-
for (SessionClient sessionClient : sessionClients.values()) {
192-
sessionClient.close();
193-
}
194-
sessionClients.clear();
195-
try {
196-
gapicRpc.shutdown();
197-
} catch (RuntimeException e) {
198-
logger.log(Level.WARNING, "Failed to close channels", e);
196+
} finally {
197+
for (SessionClient sessionClient : sessionClients.values()) {
198+
sessionClient.close();
199+
}
200+
sessionClients.clear();
201+
try {
202+
gapicRpc.shutdown();
203+
} catch (RuntimeException e) {
204+
logger.log(Level.WARNING, "Failed to close channels", e);
205+
}
199206
}
200207
}
201208

Lines changed: 218 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,218 @@
1+
/*
2+
* Copyright 2020 Google LLC
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package com.google.cloud.spanner;
18+
19+
import static com.google.common.truth.Truth.assertThat;
20+
21+
import com.google.api.gax.grpc.testing.LocalChannelProvider;
22+
import com.google.cloud.NoCredentials;
23+
import com.google.cloud.grpc.GrpcTransportOptions;
24+
import com.google.cloud.grpc.GrpcTransportOptions.ExecutorFactory;
25+
import com.google.cloud.spanner.MockSpannerServiceImpl.SimulatedExecutionTime;
26+
import com.google.cloud.spanner.MockSpannerServiceImpl.StatementResult;
27+
import com.google.cloud.spanner.TransactionRunner.TransactionCallable;
28+
import com.google.protobuf.ListValue;
29+
import com.google.spanner.v1.ResultSetMetadata;
30+
import com.google.spanner.v1.StructType;
31+
import com.google.spanner.v1.StructType.Field;
32+
import com.google.spanner.v1.TypeCode;
33+
import io.grpc.Server;
34+
import io.grpc.Status;
35+
import io.grpc.inprocess.InProcessServerBuilder;
36+
import java.io.IOException;
37+
import java.util.concurrent.Executors;
38+
import java.util.concurrent.ScheduledExecutorService;
39+
import java.util.concurrent.ScheduledThreadPoolExecutor;
40+
import java.util.concurrent.TimeUnit;
41+
import org.junit.After;
42+
import org.junit.AfterClass;
43+
import org.junit.Before;
44+
import org.junit.BeforeClass;
45+
import org.junit.Test;
46+
import org.junit.runner.RunWith;
47+
import org.junit.runners.JUnit4;
48+
49+
/**
50+
* Tests that a degraded backend that can no longer create any new sessions will not cause an
51+
* application that already has a healthy session pool to stop functioning.
52+
*/
53+
@RunWith(JUnit4.class)
54+
public class BackendExhaustedTest {
55+
private static final String TEST_PROJECT = "my-project";
56+
private static final String TEST_INSTANCE = "my-instance";
57+
private static final String TEST_DATABASE = "my-database";
58+
private static MockSpannerServiceImpl mockSpanner;
59+
private static Server server;
60+
private static LocalChannelProvider channelProvider;
61+
private static final Statement UPDATE_STATEMENT =
62+
Statement.of("UPDATE FOO SET BAR=1 WHERE BAZ=2");
63+
private static final Statement INVALID_UPDATE_STATEMENT =
64+
Statement.of("UPDATE NON_EXISTENT_TABLE SET BAR=1 WHERE BAZ=2");
65+
private static final long UPDATE_COUNT = 1L;
66+
private static final Statement SELECT1 = Statement.of("SELECT 1 AS COL1");
67+
private static final ResultSetMetadata SELECT1_METADATA =
68+
ResultSetMetadata.newBuilder()
69+
.setRowType(
70+
StructType.newBuilder()
71+
.addFields(
72+
Field.newBuilder()
73+
.setName("COL1")
74+
.setType(
75+
com.google.spanner.v1.Type.newBuilder()
76+
.setCode(TypeCode.INT64)
77+
.build())
78+
.build())
79+
.build())
80+
.build();
81+
private static final com.google.spanner.v1.ResultSet SELECT1_RESULTSET =
82+
com.google.spanner.v1.ResultSet.newBuilder()
83+
.addRows(
84+
ListValue.newBuilder()
85+
.addValues(com.google.protobuf.Value.newBuilder().setStringValue("1").build())
86+
.build())
87+
.setMetadata(SELECT1_METADATA)
88+
.build();
89+
private Spanner spanner;
90+
private DatabaseClientImpl client;
91+
92+
@BeforeClass
93+
public static void startStaticServer() throws IOException {
94+
mockSpanner = new MockSpannerServiceImpl();
95+
mockSpanner.setAbortProbability(0.0D); // We don't want any unpredictable aborted transactions.
96+
mockSpanner.putStatementResult(StatementResult.update(UPDATE_STATEMENT, UPDATE_COUNT));
97+
mockSpanner.putStatementResult(StatementResult.query(SELECT1, SELECT1_RESULTSET));
98+
mockSpanner.putStatementResult(
99+
StatementResult.exception(
100+
INVALID_UPDATE_STATEMENT,
101+
Status.INVALID_ARGUMENT.withDescription("invalid statement").asRuntimeException()));
102+
103+
String uniqueName = InProcessServerBuilder.generateName();
104+
server =
105+
InProcessServerBuilder.forName(uniqueName)
106+
// We need to use a real executor for timeouts to occur.
107+
.scheduledExecutorService(new ScheduledThreadPoolExecutor(1))
108+
.addService(mockSpanner)
109+
.build()
110+
.start();
111+
channelProvider = LocalChannelProvider.create(uniqueName);
112+
}
113+
114+
@AfterClass
115+
public static void stopServer() throws InterruptedException {
116+
// Force a shutdown as there are still requests stuck in the server.
117+
server.shutdownNow();
118+
server.awaitTermination();
119+
}
120+
121+
@Before
122+
public void setUp() throws Exception {
123+
SpannerOptions options =
124+
SpannerOptions.newBuilder()
125+
.setProjectId(TEST_PROJECT)
126+
.setChannelProvider(channelProvider)
127+
.setCredentials(NoCredentials.getInstance())
128+
.build();
129+
ExecutorFactory<ScheduledExecutorService> executorFactory =
130+
((GrpcTransportOptions) options.getTransportOptions()).getExecutorFactory();
131+
ScheduledThreadPoolExecutor executor = (ScheduledThreadPoolExecutor) executorFactory.get();
132+
options =
133+
options
134+
.toBuilder()
135+
.setSessionPoolOption(
136+
SessionPoolOptions.newBuilder()
137+
.setMinSessions(executor.getCorePoolSize())
138+
.setMaxSessions(executor.getCorePoolSize() * 3)
139+
.setWriteSessionsFraction(0.0f)
140+
.build())
141+
.build();
142+
executorFactory.release(executor);
143+
144+
spanner = options.getService();
145+
client =
146+
(DatabaseClientImpl)
147+
spanner.getDatabaseClient(DatabaseId.of(TEST_PROJECT, TEST_INSTANCE, TEST_DATABASE));
148+
// Wait until the session pool has initialized.
149+
while (client.pool.getNumberOfSessionsInPool()
150+
< spanner.getOptions().getSessionPoolOptions().getMinSessions()) {
151+
Thread.sleep(1L);
152+
}
153+
}
154+
155+
@After
156+
public void tearDown() throws Exception {
157+
mockSpanner.reset();
158+
mockSpanner.removeAllExecutionTimes();
159+
// This test case force-closes the Spanner instance as it would otherwise wait
160+
// forever on the BatchCreateSessions requests that are 'stuck'.
161+
try {
162+
((SpannerImpl) spanner).close(100L, TimeUnit.MILLISECONDS);
163+
} catch (SpannerException e) {
164+
// ignore any errors during close as they are expected.
165+
}
166+
}
167+
168+
@Test
169+
public void test() throws Exception {
170+
// Simulate very heavy load on the server by effectively stopping session creation.
171+
mockSpanner.setBatchCreateSessionsExecutionTime(
172+
SimulatedExecutionTime.ofMinimumAndRandomTime(Integer.MAX_VALUE, 0));
173+
// Create an executor that can handle twice as many requests as the minimum number of sessions
174+
// in the pool and then start that many read requests. That will initiate the creation of
175+
// additional sessions.
176+
ScheduledExecutorService executor =
177+
Executors.newScheduledThreadPool(
178+
spanner.getOptions().getSessionPoolOptions().getMinSessions() * 2);
179+
// Also temporarily freeze the server to ensure that the requests that can be served will
180+
// continue to be in-flight and keep the sessions in the pool checked out.
181+
mockSpanner.freeze();
182+
for (int i = 0; i < spanner.getOptions().getSessionPoolOptions().getMinSessions() * 2; i++) {
183+
executor.submit(new ReadRunnable());
184+
}
185+
// Now schedule as many write requests as there can be sessions in the pool.
186+
for (int i = 0; i < spanner.getOptions().getSessionPoolOptions().getMaxSessions(); i++) {
187+
executor.submit(new WriteRunnable());
188+
}
189+
// Now unfreeze the server and verify that all requests can be served using the sessions that
190+
// were already present in the pool.
191+
mockSpanner.unfreeze();
192+
executor.shutdown();
193+
assertThat(executor.awaitTermination(10, TimeUnit.SECONDS)).isTrue();
194+
}
195+
196+
private final class ReadRunnable implements Runnable {
197+
@Override
198+
public void run() {
199+
try (ResultSet rs = client.singleUse().executeQuery(SELECT1)) {
200+
while (rs.next()) {}
201+
}
202+
}
203+
}
204+
205+
private final class WriteRunnable implements Runnable {
206+
@Override
207+
public void run() {
208+
TransactionRunner runner = client.readWriteTransaction();
209+
runner.run(
210+
new TransactionCallable<Long>() {
211+
@Override
212+
public Long run(TransactionContext transaction) throws Exception {
213+
return transaction.executeUpdate(UPDATE_STATEMENT);
214+
}
215+
});
216+
}
217+
}
218+
}

0 commit comments

Comments
 (0)