Skip to content

Commit

Permalink
expect regex extracted tokens in database bloom filters (#103)
Browse files Browse the repository at this point in the history
* expect regex extracted tokens in database bloom filters

* add RegexExtractedValueTest

* remove unnecessary test

* TokenizedValue: call tokenizer only when needed, clean up tests

* clear up exception message in BloomFilterFromRecord

* BloomFilterFromRecord: remove ULong.longValue() from constructor, clarify exception messages. Add tests for exceptions.

* set logger level to debug when indexstatement is reached with bloom disabled

* remove consumer class and use for loop in TableFilters

* use try with resources and add comments on equals methods about DSLContext equality

* add Tokenizable interface and decorators, rename BloomFilterFromRecord and make it unconfigurable, make matcher immutable

* add missing assertion to test

* move method after constructors

* fix hard coded filter size and fix testing that different sizes are applied.

* refactor code to simplify, add testing for SQL temp table values created by bloom operations

* use UncheckedIOException constructor

* TableFilters returns a batch that CategoryTableWithFilters executes

* add test for SafeBatch

* update comments and clean up code, add constructors for RegexLikeCondition

* use qualified names update tests

* more descriptive naming of methods and variables, update comments, javadoc and exception messages

* apply spotless

* add missing hashCode() methods

* don't wrap jooq.Batch object and execute in CategoryTableWithFilters, remove sout

* throw exception if search term filter tokens size larger than expected, remove logcaptor dependency

* improve TableFiltersTest and TokensAsStringsTest

* allow search term filter tokens to be larger than expected tokens
  • Loading branch information
elliVM authored Nov 11, 2024
1 parent a18c8b3 commit 51c80ca
Show file tree
Hide file tree
Showing 36 changed files with 1,458 additions and 543 deletions.
6 changes: 6 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,12 @@
<version>2.2.224</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>nl.jqno.equalsverifier</groupId>
<artifactId>equalsverifier</artifactId>
<version>3.16.1</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.kafka</groupId>
<artifactId>kafka-clients</artifactId>
Expand Down
5 changes: 3 additions & 2 deletions src/main/java/com/teragrep/pth_06/planner/StreamDBClient.java
Original file line number Diff line number Diff line change
Expand Up @@ -344,9 +344,10 @@ private Table<Record> getTableStatement(Condition journaldbCondition, Date day)
.on(JOURNALDB.LOGFILE.HOST_ID.eq(GetArchivedObjectsFilterTable.host_id).and(JOURNALDB.LOGFILE.LOGTAG.eq(GetArchivedObjectsFilterTable.tag)));

if (bloomEnabled) {
Set<Table<?>> tables = walker.patternMatchTables();
// join all tables needed for the condition generated by walker
final Set<Table<?>> tables = walker.conditionRequiredTables();
if (!tables.isEmpty()) {
for (Table<?> table : tables) {
for (final Table<?> table : tables) {
if (LOGGER.isInfoEnabled()) {
LOGGER.info("Left join pattern match table: <{}>", table.getName());
}
Expand Down
168 changes: 0 additions & 168 deletions src/main/java/com/teragrep/pth_06/planner/TableFilters.java

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -43,15 +43,10 @@
* Teragrep, the applicable Commercial License may apply to this file if you as
* a licensee so wish it.
*/
package com.teragrep.pth_06.planner;

import com.teragrep.pth_06.planner.walker.conditions.QueryCondition;
package com.teragrep.pth_06.planner.bloomfilter;

public interface CategoryTable {

void create();

void insertFilters();

QueryCondition bloommatchCondition();
}
Original file line number Diff line number Diff line change
Expand Up @@ -43,16 +43,16 @@
* Teragrep, the applicable Commercial License may apply to this file if you as
* a licensee so wish it.
*/
package com.teragrep.pth_06.planner;
package com.teragrep.pth_06.planner.bloomfilter;

import com.teragrep.pth_06.config.ConditionConfig;
import com.teragrep.pth_06.planner.walker.conditions.CategoryTableCondition;
import com.teragrep.pth_06.planner.walker.conditions.QueryCondition;
import org.jooq.*;
import org.jooq.impl.DSL;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.Objects;

import static org.jooq.impl.SQLDataType.BIGINTUNSIGNED;

/**
Expand Down Expand Up @@ -88,40 +88,25 @@ public final class CategoryTableImpl implements CategoryTable {
private final DSLContext ctx;
private final Table<?> originTable;
private final long bloomTermId;
private final CategoryTableCondition tableCondition;
private final TableFilters tableFilters;

public CategoryTableImpl(ConditionConfig config, Table<?> originTable, String value) {
this(
config.context(),
originTable,
config.bloomTermId(),
new CategoryTableCondition(originTable, config.bloomTermId()),
new TableFilters(config.context(), originTable, config.bloomTermId(), value)
);
}

public CategoryTableImpl(DSLContext ctx, Table<?> originTable, long bloomTermId, String value) {
this(
ctx,
originTable,
bloomTermId,
new CategoryTableCondition(originTable, bloomTermId),
new TableFilters(ctx, originTable, bloomTermId, value)
);
this(ctx, originTable, bloomTermId, new TableFilters(ctx, originTable, bloomTermId, value));
}

public CategoryTableImpl(
DSLContext ctx,
Table<?> originTable,
long bloomTermId,
CategoryTableCondition tableCondition,
TableFilters tableFilters
) {
public CategoryTableImpl(DSLContext ctx, Table<?> originTable, long bloomTermId, TableFilters tableFilters) {
this.ctx = ctx;
this.originTable = originTable;
this.bloomTermId = bloomTermId;
this.tableCondition = tableCondition;
this.tableFilters = tableFilters;
}

Expand All @@ -144,24 +129,12 @@ public void create() {
indexStep.execute();
}

public void insertFilters() {
tableFilters.insertFiltersIntoCategoryTable();
}

/**
* Row condition that selects the same sized filter arrays from this category table and the origin table.
*
* @return condition
*/
public QueryCondition bloommatchCondition() {
return tableCondition;
}

/**
* Equal only if all object parameters are same value and the instances of DSLContext are same
* Equal if the compared object is the same instance or if the compared object is of the same class, object fields
* are equal, and DSLContext is the same instance
*
* @param object object compared against
* @return true if all object is same class, object fields are equal and DSLContext is same instance
* @return true if equal
*/
@Override
public boolean equals(final Object object) {
Expand All @@ -175,4 +148,9 @@ public boolean equals(final Object object) {
return this.originTable.equals(cast.originTable) && this.ctx == cast.ctx && // equal only if same instance of DSLContext
this.bloomTermId == cast.bloomTermId && this.tableFilters.equals(cast.tableFilters);
}

@Override
public int hashCode() {
return Objects.hash(ctx, originTable, bloomTermId, tableFilters);
}
}
Loading

0 comments on commit 51c80ca

Please sign in to comment.