Skip to content

Commit 5e7d468

Browse files
authored
Read interface support predicate (#22)
1 parent 0d0237b commit 5e7d468

File tree

11 files changed

+735
-8
lines changed

11 files changed

+735
-8
lines changed

.github/workflows/paimon-python-checks.yml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,14 @@ jobs:
4343
with:
4444
java-version: ${{ env.JDK_VERSION }}
4545
distribution: 'adopt'
46+
- name: Set up hadoop dependency
47+
run: |
48+
mkdir -p ${{ github.workspace }}/temp
49+
curl -L -o ${{ github.workspace }}/temp/bundled-hadoop.jar \
50+
https://repo.maven.apache.org/maven2/org/apache/flink/flink-shaded-hadoop-2-uber/2.8.3-10.0/flink-shaded-hadoop-2-uber-2.8.3-10.0.jar
4651
- name: Run lint-python.sh
52+
env:
53+
_PYPAIMON_HADOOP_CLASSPATH: ${{ github.workspace }}/temp/bundled-hadoop.jar
4754
run: |
4855
chmod +x dev/lint-python.sh
4956
./dev/lint-python.sh

paimon_python_api/__init__.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
from .split import Split
2020
from .table_read import TableRead
2121
from .table_scan import TableScan, Plan
22+
from .predicate import Predicate, PredicateBuilder
2223
from .read_builder import ReadBuilder
2324
from .commit_message import CommitMessage
2425
from .table_commit import BatchTableCommit
@@ -39,5 +40,7 @@
3940
'BatchWriteBuilder',
4041
'Table',
4142
'Schema',
42-
'Catalog'
43+
'Catalog',
44+
'Predicate',
45+
'PredicateBuilder'
4346
]

paimon_python_api/predicate.py

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
################################################################################
2+
# Licensed to the Apache Software Foundation (ASF) under one
3+
# or more contributor license agreements. See the NOTICE file
4+
# distributed with this work for additional information
5+
# regarding copyright ownership. The ASF licenses this file
6+
# to you under the Apache License, Version 2.0 (the
7+
# "License"); you may not use this file except in compliance
8+
# with the License. You may obtain a copy of the License at
9+
#
10+
# http://www.apache.org/licenses/LICENSE-2.0
11+
#
12+
# Unless required by applicable law or agreed to in writing, software
13+
# distributed under the License is distributed on an "AS IS" BASIS,
14+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
# See the License for the specific language governing permissions and
16+
# limitations under the License.
17+
#################################################################################
18+
19+
from abc import ABC, abstractmethod
20+
from typing import Any, List
21+
22+
23+
class Predicate(ABC):
24+
"""Predicate which evaluates to a boolean. Now it doesn't have
25+
any methods because only paimon_python_java implement it and
26+
the Java implementation convert it to Java object."""
27+
28+
29+
class PredicateBuilder(ABC):
30+
"""A utility class to create Predicate object for common filter conditions."""
31+
32+
@abstractmethod
33+
def equal(self, field: str, literal: Any) -> Predicate:
34+
"""field = literal"""
35+
36+
@abstractmethod
37+
def not_equal(self, field: str, literal: Any) -> Predicate:
38+
"""field <> literal"""
39+
40+
@abstractmethod
41+
def less_than(self, field: str, literal: Any) -> Predicate:
42+
"""field < literal"""
43+
44+
@abstractmethod
45+
def less_or_equal(self, field: str, literal: Any) -> Predicate:
46+
"""field <= literal"""
47+
48+
@abstractmethod
49+
def greater_than(self, field: str, literal: Any) -> Predicate:
50+
"""field > literal"""
51+
52+
@abstractmethod
53+
def greater_or_equal(self, field: str, literal: Any) -> Predicate:
54+
"""field >= literal"""
55+
56+
@abstractmethod
57+
def is_null(self, field: str) -> Predicate:
58+
"""field IS NULL"""
59+
60+
@abstractmethod
61+
def is_not_null(self, field: str) -> Predicate:
62+
"""field IS NOT NULL"""
63+
64+
@abstractmethod
65+
def startswith(self, field: str, pattern_literal: Any) -> Predicate:
66+
"""field.startswith"""
67+
68+
@abstractmethod
69+
def endswith(self, field: str, pattern_literal: Any) -> Predicate:
70+
"""field.endswith()"""
71+
72+
@abstractmethod
73+
def contains(self, field: str, pattern_literal: Any) -> Predicate:
74+
"""literal in field"""
75+
76+
@abstractmethod
77+
def is_in(self, field: str, literals: List[Any]) -> Predicate:
78+
"""field IN literals"""
79+
80+
@abstractmethod
81+
def is_not_in(self, field: str, literals: List[Any]) -> Predicate:
82+
"""field NOT IN literals"""
83+
84+
@abstractmethod
85+
def between(self, field: str, included_lower_bound: Any, included_upper_bound: Any) \
86+
-> Predicate:
87+
"""field BETWEEN included_lower_bound AND included_upper_bound"""
88+
89+
@abstractmethod
90+
def and_predicates(self, predicates: List[Predicate]) -> Predicate:
91+
"""predicate1 AND predicate2 AND ..."""
92+
93+
@abstractmethod
94+
def or_predicates(self, predicates: List[Predicate]) -> Predicate:
95+
"""predicate1 OR predicate2 OR ..."""

paimon_python_api/read_builder.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,20 @@
1717
#################################################################################
1818

1919
from abc import ABC, abstractmethod
20-
from paimon_python_api import TableRead, TableScan
20+
from paimon_python_api import TableRead, TableScan, Predicate, PredicateBuilder
2121
from typing import List
2222

2323

2424
class ReadBuilder(ABC):
2525
"""An interface for building the TableScan and TableRead."""
2626

27+
@abstractmethod
28+
def with_filter(self, predicate: Predicate):
29+
"""
30+
Push filters, will filter the data as much as possible,
31+
but it is not guaranteed that it is a complete filter.
32+
"""
33+
2734
@abstractmethod
2835
def with_projection(self, projection: List[List[int]]) -> 'ReadBuilder':
2936
"""Push nested projection."""
@@ -39,3 +46,7 @@ def new_scan(self) -> TableScan:
3946
@abstractmethod
4047
def new_read(self) -> TableRead:
4148
"""Create a TableRead to read splits."""
49+
50+
@abstractmethod
51+
def new_predicate_builder(self) -> PredicateBuilder:
52+
"""Create a builder for Predicate."""

paimon_python_java/__init__.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,8 @@
1818

1919
from .util import constants
2020
from .pypaimon import (Catalog, Table, ReadBuilder, TableScan, Plan, Split, TableRead,
21-
BatchWriteBuilder, BatchTableWrite, CommitMessage, BatchTableCommit)
21+
BatchWriteBuilder, BatchTableWrite, CommitMessage, BatchTableCommit,
22+
Predicate, PredicateBuilder)
2223

2324
__all__ = [
2425
'constants',
@@ -32,5 +33,7 @@
3233
'BatchWriteBuilder',
3334
'BatchTableWrite',
3435
'CommitMessage',
35-
'BatchTableCommit'
36+
'BatchTableCommit',
37+
'Predicate',
38+
'PredicateBuilder'
3639
]

paimon_python_java/gateway_server.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ def _get_hadoop_classpath(env):
103103
return env[constants.PYPAIMON_HADOOP_CLASSPATH]
104104

105105
if 'HADOOP_CLASSPATH' in env:
106-
return None
106+
return env['HADOOP_CLASSPATH']
107107
else:
108108
raise EnvironmentError(f"You haven't set '{constants.PYPAIMON_HADOOP_CLASSPATH}', \
109109
and 'HADOOP_CLASSPATH' is also not set. Ensure one of them is set.")

paimon_python_java/java_gateway.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,7 @@ def import_paimon_view(gateway):
109109
java_import(gateway.jvm, 'org.apache.paimon.types.*')
110110
java_import(gateway.jvm, 'org.apache.paimon.python.*')
111111
java_import(gateway.jvm, "org.apache.paimon.data.*")
112+
java_import(gateway.jvm, "org.apache.paimon.predicate.PredicateBuilder")
112113

113114

114115
class Watchdog(object):
Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
package org.apache.paimon.python;
20+
21+
import org.apache.paimon.data.BinaryString;
22+
import org.apache.paimon.predicate.Predicate;
23+
import org.apache.paimon.predicate.PredicateBuilder;
24+
import org.apache.paimon.types.DataType;
25+
import org.apache.paimon.types.RowType;
26+
27+
import java.util.List;
28+
import java.util.stream.Collectors;
29+
30+
/** For building Predicate. */
31+
public class PredicationUtil {
32+
33+
public static Predicate build(
34+
RowType rowType,
35+
PredicateBuilder builder,
36+
String method,
37+
int index,
38+
List<Object> literals) {
39+
literals =
40+
literals.stream()
41+
.map(l -> convertJavaObject(rowType.getTypeAt(index), l))
42+
.collect(Collectors.toList());
43+
switch (method) {
44+
case "equal":
45+
return builder.equal(index, literals.get(0));
46+
case "notEqual":
47+
return builder.notEqual(index, literals.get(0));
48+
case "lessThan":
49+
return builder.lessThan(index, literals.get(0));
50+
case "lessOrEqual":
51+
return builder.lessOrEqual(index, literals.get(0));
52+
case "greaterThan":
53+
return builder.greaterThan(index, literals.get(0));
54+
case "greaterOrEqual":
55+
return builder.greaterOrEqual(index, literals.get(0));
56+
case "isNull":
57+
return builder.isNull(index);
58+
case "isNotNull":
59+
return builder.isNotNull(index);
60+
case "startsWith":
61+
return builder.startsWith(index, literals.get(0));
62+
case "endsWith":
63+
return builder.endsWith(index, literals.get(0));
64+
case "contains":
65+
return builder.contains(index, literals.get(0));
66+
case "in":
67+
return builder.in(index, literals);
68+
case "notIn":
69+
return builder.notIn(index, literals);
70+
case "between":
71+
return builder.between(index, literals.get(0), literals.get(1));
72+
default:
73+
throw new UnsupportedOperationException(
74+
"Unknown PredicateBuilder method " + method);
75+
}
76+
}
77+
78+
/** Some type is not convenient to transfer from Python to Java. */
79+
private static Object convertJavaObject(DataType literalType, Object literal) {
80+
switch (literalType.getTypeRoot()) {
81+
case BOOLEAN:
82+
case DOUBLE:
83+
case INTEGER:
84+
return literal;
85+
case CHAR:
86+
case VARCHAR:
87+
return BinaryString.fromString((String) literal);
88+
case FLOAT:
89+
return ((Number) literal).floatValue();
90+
case TINYINT:
91+
return ((Number) literal).byteValue();
92+
case SMALLINT:
93+
return ((Number) literal).shortValue();
94+
case BIGINT:
95+
return ((Number) literal).longValue();
96+
default:
97+
throw new UnsupportedOperationException(
98+
"Unsupported predicate leaf type " + literalType.getTypeRoot().name());
99+
}
100+
}
101+
102+
public static Predicate buildAnd(List<Predicate> predicates) {
103+
// 'and' is keyword of Python
104+
return PredicateBuilder.and(predicates);
105+
}
106+
107+
public static Predicate buildOr(List<Predicate> predicates) {
108+
// 'or' is keyword of Python
109+
return PredicateBuilder.or(predicates);
110+
}
111+
}

0 commit comments

Comments
 (0)