Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -52,4 +52,11 @@ protected BehaviorChangeConfiguration(
+ " unlimited locations")
.defaultValue(-1)
.buildBehaviorChangeConfiguration();

public static final BehaviorChangeConfiguration<Boolean> ENTITY_CACHE_SOFT_VALUES =
PolarisConfiguration.<Boolean>builder()
.key("ENTITY_CACHE_SOFT_VALUES")
.description("Whether or not to use soft values in the entity cache")
.defaultValue(false)
.buildBehaviorChangeConfiguration();
}
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import java.util.List;
import java.util.Optional;
import org.apache.polaris.core.admin.model.StorageConfigInfo;
import org.apache.polaris.core.persistence.cache.EntityWeigher;

/**
* Configurations for features within Polaris. These configurations are intended to be customized
Expand Down Expand Up @@ -190,4 +191,14 @@ protected FeatureConfiguration(
.description("If true, the generic-tables endpoints are enabled")
.defaultValue(false)
.buildFeatureConfiguration();

public static final FeatureConfiguration<Long> ENTITY_CACHE_WEIGHER_TARGET =
PolarisConfiguration.<Long>builder()
.key("ENTITY_CACHE_WEIGHER_TARGET")
.description(
"The maximum weight for the entity cache. This is a heuristic value without any particular"
+ " unit of measurement. It roughly correlates with the total heap size of cached values. Fine-tuning"
+ " requires experimentation in the specific deployment environment")
.defaultValue(100 * EntityWeigher.WEIGHT_PER_MB)
.buildFeatureConfiguration();
}
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.TimeUnit;
import org.apache.polaris.core.PolarisCallContext;
import org.apache.polaris.core.config.BehaviorChangeConfiguration;
import org.apache.polaris.core.config.FeatureConfiguration;
import org.apache.polaris.core.config.PolarisConfiguration;
import org.apache.polaris.core.entity.PolarisBaseEntity;
import org.apache.polaris.core.entity.PolarisEntityType;
import org.apache.polaris.core.entity.PolarisGrantRecord;
Expand Down Expand Up @@ -72,14 +75,21 @@ public EntityCache(@Nonnull PolarisMetaStoreManager polarisMetaStoreManager) {
}
};

// use a Caffeine cache to purge entries when those have not been used for a long time.
// Assuming 1KB per entry, 100K entries is about 100MB.
this.byId =
long weigherTarget =
PolarisConfiguration.loadConfig(FeatureConfiguration.ENTITY_CACHE_WEIGHER_TARGET);
Caffeine<Long, ResolvedPolarisEntity> byIdBuilder =
Caffeine.newBuilder()
.maximumSize(100_000) // Set maximum size to 100,000 elements
.maximumWeight(weigherTarget)
.weigher(EntityWeigher.asWeigher())
.expireAfterAccess(1, TimeUnit.HOURS) // Expire entries after 1 hour of no access
.removalListener(removalListener) // Set the removal listener
.build();
.removalListener(removalListener); // Set the removal listener

if (PolarisConfiguration.loadConfig(BehaviorChangeConfiguration.ENTITY_CACHE_SOFT_VALUES)) {
byIdBuilder.softValues();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just to clarify: What is our expected benefit from using soft values?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In my testing, it improved performance and prevented situations where a large cache lead to OOMs.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for this feedback! I'm glad to see that it had positive impact in you env 👍

However, my experience with soft references in cache is not so bright and I'd advise caution, especially when Parallel GC is used. It may be able to recover from close-to-OOM conditions, but API response times are likely to suffer due to GC pauses.

Given that the default is off (not using soft values). I'm fine with this PR in general.

}

// use a Caffeine cache to purge entries when those have not been used for a long time.
this.byId = byIdBuilder.build();

// remember the meta store manager
this.polarisMetaStoreManager = polarisMetaStoreManager;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.polaris.core.persistence.cache;

import com.github.benmanes.caffeine.cache.Weigher;
import org.apache.polaris.core.persistence.ResolvedPolarisEntity;
import org.checkerframework.checker.index.qual.NonNegative;

/**
* A {@link Weigher} implementation that weighs {@link ResolvedPolarisEntity} objects by the
* approximate size of the entity object.
*/
public class EntityWeigher implements Weigher<Long, ResolvedPolarisEntity> {

/** The amount of weight that is expected to roughly equate to 1MB of memory usage */
public static final long WEIGHT_PER_MB = 1024 * 1024;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need this constant anymore? Why not inline its value in the config default?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To me the constant seems somewhat coupled to the weigher -- i.e. another weigher implementation could treat 1 string character as 1000 weight units, and would have a very different WEIGHT_PER_MB.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So this constant is specifically mean for the config default?

Copy link
Contributor Author

@eric-maynard eric-maynard Apr 9, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Pretty much yep. I also thought it was a helpful constant to have for anyone trying to read this code rather than us just setting the default to e.g. 104857600 and leaving its interpretation as an exercise for the reader.

On the other hand I can see how it's potentially misleading given the extended discussion around the amount of heap we expect a given weight to use, so I'm OK with yanking the constant if you think it's problematic.

edit: We could also consider renaming EntityWeigher to ApproximateStringSizeEntityWeigher or something more idiomatic. That would make this ApproximateStringSizeEntityWeigher.WEIGHT_PER_MB which would be more explicitly "approximate" and coupled to the weigher implementation.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

EntityWeigher SGTM... I believe approximation is implied in its name and API :)

Re: constant, how about renaming it to WEIGHT_LIMIT_DEFAULT and using without extra modifications in the config variable definition?


/* Represents the approximate size of an entity beyond the properties */
private static final int APPROXIMATE_ENTITY_OVERHEAD = 1000;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Where does the value 1000 come from?

Copy link
Contributor Author

@eric-maynard eric-maynard Nov 27, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

See this old comment -- the apparent assumption was that entities are generally ~1KB.

This means in the worst case, we will not get a larger number of entries due to this change. It should strictly be smaller.


/* Represents the amount of bytes that a character is expected to take up */
private static final int APPROXIMATE_BYTES_PER_CHAR = 3;

/** Singleton instance */
private static final EntityWeigher instance = new EntityWeigher();

private EntityWeigher() {}

/** Gets the singleton {@link EntityWeigher} */
public static EntityWeigher getInstance() {
return instance;
}

/**
* Computes the weight of a given entity. The unit here is not exactly bytes, but it's close.
*
* @param key The entity's key; not used
* @param value The entity to be cached
* @return The weight of the entity
*/
@Override
public @NonNegative int weigh(Long key, ResolvedPolarisEntity value) {
return APPROXIMATE_ENTITY_OVERHEAD
+ (value.getEntity().getName().length() * APPROXIMATE_BYTES_PER_CHAR)
+ (value.getEntity().getProperties().length() * APPROXIMATE_BYTES_PER_CHAR)
+ (value.getEntity().getInternalProperties().length() * APPROXIMATE_BYTES_PER_CHAR);
Comment on lines +59 to +62
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The constants in this expression do not "approximate" any actual sizes. I suggest: ENTITY_SIZE_BOUND_HEURISTIC, CALIBRATED_BYTES_PER_CHAR, plus add javadoc about how we arrived at these calibration parameters.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, they do approximate actual sizes. The testing described above details that approximately APPROXIMATE_BYTES_PER_CHAR of heap pressure are applied for each character in the entity that's being stored.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we can leave this "as is" for now. Putting a more general comment in the main thread.

}

/** Factory method to provide a typed Weigher */
public static Weigher<Long, ResolvedPolarisEntity> asWeigher() {
return getInstance();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -16,23 +16,26 @@
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.polaris.core.persistence;
package org.apache.polaris.core.persistence.cache;

import static org.apache.polaris.core.persistence.PrincipalSecretsGenerator.RANDOM_SECRETS;

import java.util.List;
import java.util.stream.Collectors;
import org.apache.iceberg.catalog.TableIdentifier;
import org.apache.polaris.core.PolarisCallContext;
import org.apache.polaris.core.PolarisDefaultDiagServiceImpl;
import org.apache.polaris.core.PolarisDiagnostics;
import org.apache.polaris.core.entity.PolarisBaseEntity;
import org.apache.polaris.core.entity.PolarisEntity;
import org.apache.polaris.core.entity.PolarisEntitySubType;
import org.apache.polaris.core.entity.PolarisEntityType;
import org.apache.polaris.core.entity.PolarisGrantRecord;
import org.apache.polaris.core.entity.PolarisPrivilege;
import org.apache.polaris.core.persistence.cache.EntityCache;
import org.apache.polaris.core.persistence.cache.EntityCacheByNameKey;
import org.apache.polaris.core.persistence.cache.EntityCacheLookupResult;
import org.apache.polaris.core.entity.table.IcebergTableLikeEntity;
import org.apache.polaris.core.persistence.PolarisMetaStoreManager;
import org.apache.polaris.core.persistence.PolarisTestMetaStoreManager;
import org.apache.polaris.core.persistence.ResolvedPolarisEntity;
import org.apache.polaris.core.persistence.transactional.TransactionalMetaStoreManagerImpl;
import org.apache.polaris.core.persistence.transactional.TransactionalPersistence;
import org.apache.polaris.core.persistence.transactional.TreeMapMetaStore;
Expand Down Expand Up @@ -478,4 +481,26 @@ void testRenameAndCacheDestinationBeforeLoadingSource() {
// now the loading by the old name should return null
Assertions.assertThat(cache.getOrLoadEntityByName(callCtx, T4_name)).isNull();
}

/* Helper for `testEntityWeigher` */
private int getEntityWeight(PolarisEntity entity) {
return EntityWeigher.getInstance()
.weigh(-1L, new ResolvedPolarisEntity(diagServices, entity, List.of(), 1));
}

@Test
void testEntityWeigher() {
var smallEntity = new IcebergTableLikeEntity.Builder(TableIdentifier.of("ns.t1"), "").build();
var mediumEntity =
new IcebergTableLikeEntity.Builder(TableIdentifier.of("ns.t1"), "")
.setMetadataLocation("a".repeat(10000))
.build();
var largeEntity =
new IcebergTableLikeEntity.Builder(TableIdentifier.of("ns.t1"), "")
.setMetadataLocation("a".repeat(1000 * 1000))
.build();

Assertions.assertThat(getEntityWeight(smallEntity)).isLessThan(getEntityWeight(mediumEntity));
Assertions.assertThat(getEntityWeight(mediumEntity)).isLessThan(getEntityWeight(largeEntity));
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.polaris.core.persistence.cache;

import java.util.List;
import java.util.Optional;
import org.apache.iceberg.catalog.TableIdentifier;
import org.apache.polaris.core.PolarisDefaultDiagServiceImpl;
import org.apache.polaris.core.PolarisDiagnostics;
import org.apache.polaris.core.entity.table.IcebergTableLikeEntity;
import org.apache.polaris.core.persistence.ResolvedPolarisEntity;
import org.assertj.core.api.Assertions;
import org.junit.jupiter.api.Test;

public class EntityWeigherTest {

private PolarisDiagnostics diagnostics;

public EntityWeigherTest() {
diagnostics = new PolarisDefaultDiagServiceImpl();
}

private ResolvedPolarisEntity getEntity(
String name,
String metadataLocation,
String properties,
Optional<String> internalProperties) {
var entity =
new IcebergTableLikeEntity.Builder(TableIdentifier.of(name), metadataLocation).build();
entity.setProperties(properties);
internalProperties.ifPresent(p -> entity.setInternalProperties(p));
return new ResolvedPolarisEntity(diagnostics, entity, List.of(), 1);
}

@Test
public void testBasicWeight() {
int weight = EntityWeigher.getInstance().weigh(1L, getEntity("t", "", "", Optional.empty()));
Assertions.assertThat(weight).isGreaterThan(0);
}

@Test
public void testNonZeroWeight() {
int weight = EntityWeigher.getInstance().weigh(1L, getEntity("t", "", "", Optional.of("")));
Assertions.assertThat(weight).isGreaterThan(0);
}

@Test
public void testWeightIncreasesWithNameLength() {
int smallWeight =
EntityWeigher.getInstance().weigh(1L, getEntity("t", "", "", Optional.empty()));
int largeWeight =
EntityWeigher.getInstance().weigh(1L, getEntity("looong name", "", "", Optional.empty()));
Assertions.assertThat(smallWeight).isLessThan(largeWeight);
}

@Test
public void testWeightIncreasesWithMetadataLocationLength() {
int smallWeight =
EntityWeigher.getInstance().weigh(1L, getEntity("t", "", "", Optional.empty()));
int largeWeight =
EntityWeigher.getInstance()
.weigh(1L, getEntity("t", "looong location", "", Optional.empty()));
Assertions.assertThat(smallWeight).isLessThan(largeWeight);
}

@Test
public void testWeightIncreasesWithPropertiesLength() {
int smallWeight =
EntityWeigher.getInstance().weigh(1L, getEntity("t", "", "", Optional.empty()));
int largeWeight =
EntityWeigher.getInstance()
.weigh(1L, getEntity("t", "", "looong properties", Optional.empty()));
Assertions.assertThat(smallWeight).isLessThan(largeWeight);
}

@Test
public void testWeightIncreasesWithInternalPropertiesLength() {
int smallWeight =
EntityWeigher.getInstance().weigh(1L, getEntity("t", "", "", Optional.of("")));
int largeWeight =
EntityWeigher.getInstance()
.weigh(1L, getEntity("t", "", "", Optional.of("looong iproperties")));
Assertions.assertThat(smallWeight).isLessThan(largeWeight);
}

@Test
public void testExactWeightCalculation() {
int preciseWeight =
EntityWeigher.getInstance()
.weigh(1L, getEntity("name", "location", "{a: b}", Optional.of("{c: d, e: f}")));
Assertions.assertThat(preciseWeight).isEqualTo(1066);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -904,7 +904,7 @@ void dropEntity(List<PolarisEntityCore> catalogPath, PolarisBaseEntity entityToD
}

/** Grant a privilege to a catalog role */
void grantPrivilege(
public void grantPrivilege(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The changes here look unrelated?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

They are not; the EntityCacheTest was moved into a new package and so these methods were no longer callable from it if they remained package-private

PolarisBaseEntity role,
List<PolarisEntityCore> catalogPath,
PolarisBaseEntity securable,
Expand Down Expand Up @@ -1291,7 +1291,7 @@ PolarisBaseEntity createTestCatalog(String catalogName) {
*
* @return the identity we found
*/
PolarisBaseEntity ensureExistsByName(
public PolarisBaseEntity ensureExistsByName(
List<PolarisEntityCore> catalogPath,
PolarisEntityType entityType,
PolarisEntitySubType entitySubType,
Expand Down Expand Up @@ -1337,7 +1337,7 @@ PolarisBaseEntity ensureExistsByName(
*
* @return the identity we found
*/
PolarisBaseEntity ensureExistsByName(
public PolarisBaseEntity ensureExistsByName(
List<PolarisEntityCore> catalogPath, PolarisEntityType entityType, String name) {
return this.ensureExistsByName(
catalogPath, entityType, PolarisEntitySubType.NULL_SUBTYPE, name);
Expand All @@ -1352,7 +1352,7 @@ PolarisBaseEntity ensureExistsByName(
* @param internalProps updated internal properties
* @return updated entity
*/
PolarisBaseEntity updateEntity(
public PolarisBaseEntity updateEntity(
List<PolarisEntityCore> catalogPath,
PolarisBaseEntity entity,
String props,
Expand Down Expand Up @@ -1844,7 +1844,7 @@ void validateBootstrap() {
this.ensureGrantRecordExists(principalRole, principal, PolarisPrivilege.PRINCIPAL_ROLE_USAGE);
}

void testCreateTestCatalog() {
public void testCreateTestCatalog() {
// create test catalog
this.createTestCatalog("test");

Expand Down Expand Up @@ -2418,7 +2418,7 @@ public void testPrivileges() {
* @param newCatPath new catalog path
* @param newName new name
*/
void renameEntity(
public void renameEntity(
List<PolarisEntityCore> catPath,
PolarisBaseEntity entity,
List<PolarisEntityCore> newCatPath,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -133,10 +133,6 @@ public TestServices build() {
RealmEntityManagerFactory realmEntityManagerFactory =
new RealmEntityManagerFactory(metaStoreManagerFactory) {};

PolarisEntityManager entityManager =
realmEntityManagerFactory.getOrCreateEntityManager(realmContext);
PolarisMetaStoreManager metaStoreManager =
metaStoreManagerFactory.getOrCreateMetaStoreManager(realmContext);
TransactionalPersistence metaStoreSession =
metaStoreManagerFactory.getOrCreateSessionSupplier(realmContext).get();
CallContext callContext =
Expand All @@ -160,6 +156,11 @@ public Map<String, Object> contextVariables() {
return new HashMap<>();
}
};
CallContext.setCurrentContext(callContext);
PolarisEntityManager entityManager =
realmEntityManagerFactory.getOrCreateEntityManager(realmContext);
PolarisMetaStoreManager metaStoreManager =
metaStoreManagerFactory.getOrCreateMetaStoreManager(realmContext);

FileIOFactory fileIOFactory =
fileIOFactorySupplier.apply(
Expand Down