Skip to content

Commit 0be8175

Browse files
authored
#40 Provide Optional Built-in Hadoop Dependencies (#41)
1 parent af06c70 commit 0be8175

File tree

7 files changed

+136
-20
lines changed

7 files changed

+136
-20
lines changed

MANIFEST.in

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818

1919
global-exclude *.py[cod] __pycache__ .DS_Store
2020
recursive-include deps/jars *.jar
21+
recursive-include deps/hadoop *.jar
2122
include README.md
2223
include LICENSE
2324
include NOTICE

dev/build-source-distribution-package.sh

Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -19,20 +19,34 @@ CURR_DIR=`pwd`
1919
BASE_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null && pwd )"
2020
PROJECT_ROOT="${BASE_DIR}/../"
2121

22-
# prepare bridge jar
23-
24-
DEPS_DIR=${PROJECT_ROOT}/deps/jars
22+
DEPS_DIR=${PROJECT_ROOT}/deps
2523
rm -rf ${DEPS_DIR}
26-
mkdir -p ${DEPS_DIR}
27-
touch ${DEPS_DIR}/__init__.py
24+
25+
# prepare bridge jar
26+
BRIDGE_DEPS_DIR=${DEPS_DIR}/jars
27+
mkdir -p ${BRIDGE_DEPS_DIR}
28+
touch ${BRIDGE_DEPS_DIR}/__init__.py
2829

2930
cd ${PROJECT_ROOT}/paimon-python-java-bridge
3031

3132
# get bridge jar version
32-
JAR_VERSION=$(sed -n 's/.*<version>\(.*\)<\/version>.*/\1/p' pom.xml | head -n 1)
33+
BRIDGE_JAR_VERSION=$(sed -n 's/.*<version>\(.*\)<\/version>.*/\1/p' pom.xml | head -n 1)
34+
35+
mvn clean install -DskipTests
36+
cp "target/paimon-python-java-bridge-${BRIDGE_JAR_VERSION}.jar" ${BRIDGE_DEPS_DIR}
37+
38+
# prepare hadoop-deps jar
39+
HADOOP_DEPS_DIR=${DEPS_DIR}/hadoop
40+
mkdir -p ${HADOOP_DEPS_DIR}
41+
touch ${HADOOP_DEPS_DIR}/__init__.py
42+
43+
cd ${PROJECT_ROOT}/hadoop-deps
44+
45+
# get hadoop-deps jar version
46+
HADOOP_JAR_VERSION=$(sed -n 's/.*<version>\(.*\)<\/version>.*/\1/p' pom.xml | head -n 1)
3347

3448
mvn clean install -DskipTests
35-
cp "target/paimon-python-java-bridge-${JAR_VERSION}.jar" ${DEPS_DIR}
49+
cp "target/hadoop-deps-${HADOOP_JAR_VERSION}.jar" ${HADOOP_DEPS_DIR}
3650

3751
cd ${CURR_DIR}
3852

dev/lint-python.sh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -580,6 +580,8 @@ function tox_check() {
580580
# dummy jar needed by setup.py
581581
mkdir -p $PAIMON_PYTHON_DIR/deps/jars
582582
touch $PAIMON_PYTHON_DIR/deps/jars/dummy.jar
583+
mkdir -p $PAIMON_PYTHON_DIR/deps/hadoop
584+
touch $PAIMON_PYTHON_DIR/deps/hadoop/dummy.jar
583585

584586
if [[ -n "$GITHUB_ACTION" ]]; then
585587
# Run tests in all versions triggered by a Git push (tests aren't so many currently)

hadoop-deps/pom.xml

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<!--
3+
~ Licensed to the Apache Software Foundation (ASF) under one
4+
~ or more contributor license agreements. See the NOTICE file
5+
~ distributed with this work for additional information
6+
~ regarding copyright ownership. The ASF licenses this file
7+
~ to you under the Apache License, Version 2.0 (the
8+
~ "License"); you may not use this file except in compliance
9+
~ with the License. You may obtain a copy of the License at
10+
~
11+
~ http://www.apache.org/licenses/LICENSE-2.0
12+
~
13+
~ Unless required by applicable law or agreed to in writing, software
14+
~ distributed under the License is distributed on an "AS IS" BASIS,
15+
~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16+
~ See the License for the specific language governing permissions and
17+
~ limitations under the License.
18+
-->
19+
<project xmlns="http://maven.apache.org/POM/4.0.0"
20+
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
21+
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
22+
<modelVersion>4.0.0</modelVersion>
23+
24+
<groupId>org.apache.pypaimon</groupId>
25+
<artifactId>hadoop-deps</artifactId>
26+
<version>3.3.4</version>
27+
28+
<properties>
29+
<hadoop.version>3.3.4</hadoop.version>
30+
<log4j.version>2.17.1</log4j.version>
31+
</properties>
32+
33+
<dependencies>
34+
<dependency>
35+
<groupId>org.apache.hadoop</groupId>
36+
<artifactId>hadoop-common</artifactId>
37+
<version>${hadoop.version}</version>
38+
</dependency>
39+
<dependency>
40+
<groupId>org.apache.hadoop</groupId>
41+
<artifactId>hadoop-hdfs-client</artifactId>
42+
<version>${hadoop.version}</version>
43+
</dependency>
44+
<dependency>
45+
<groupId>org.apache.logging.log4j</groupId>
46+
<artifactId>log4j-api</artifactId>
47+
<version>${log4j.version}</version>
48+
</dependency>
49+
</dependencies>
50+
51+
<build>
52+
<plugins>
53+
<plugin>
54+
<groupId>org.apache.maven.plugins</groupId>
55+
<artifactId>maven-shade-plugin</artifactId>
56+
<executions>
57+
<execution>
58+
<phase>package</phase>
59+
<goals>
60+
<goal>shade</goal>
61+
</goals>
62+
<configuration>
63+
<createDependencyReducedPom>false</createDependencyReducedPom>
64+
<transformers>
65+
<transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer"/>
66+
</transformers>
67+
</configuration>
68+
</execution>
69+
</executions>
70+
</plugin>
71+
</plugins>
72+
</build>
73+
</project>

pypaimon/py4j/gateway_server.py

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -102,12 +102,20 @@ def _get_classpath(env):
102102
return os.pathsep.join(classpath)
103103

104104

105+
_HADOOP_DEPS_PACKAGE = 'pypaimon.hadoop-deps'
106+
107+
105108
def _get_hadoop_classpath(env):
106109
if constants.PYPAIMON_HADOOP_CLASSPATH in env:
107110
return env[constants.PYPAIMON_HADOOP_CLASSPATH]
108-
109-
if 'HADOOP_CLASSPATH' in env:
111+
elif 'HADOOP_CLASSPATH' in env:
110112
return env['HADOOP_CLASSPATH']
111113
else:
112-
raise EnvironmentError(f"You haven't set '{constants.PYPAIMON_HADOOP_CLASSPATH}', \
113-
and 'HADOOP_CLASSPATH' is also not set. Ensure one of them is set.")
114+
# use built-in hadoop
115+
jars = importlib.resources.files(_HADOOP_DEPS_PACKAGE)
116+
one_jar = next(iter(jars.iterdir()), None)
117+
if not one_jar:
118+
raise EnvironmentError(f"The built-in Hadoop environment has been broken, this \
119+
is unexpected. You can set one of '{constants.PYPAIMON_HADOOP_CLASSPATH}' or \
120+
'HADOOP_CLASSPATH' to continue.")
121+
return os.path.join(os.path.dirname(str(one_jar)), '*')

setup.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,8 @@
3838
'pypaimon.api',
3939
'pypaimon.py4j',
4040
'pypaimon.py4j.util',
41-
'pypaimon.jars'
41+
'pypaimon.jars',
42+
'pypaimon.hadoop-deps'
4243
]
4344

4445
install_requires = [
@@ -57,10 +58,12 @@
5758
include_package_data=True,
5859
# releasing tool will generate deps
5960
package_dir={
60-
"pypaimon.jars": "deps/jars"
61+
"pypaimon.jars": "deps/jars",
62+
"pypaimon.hadoop-deps": "deps/hadoop"
6163
},
6264
package_data={
63-
"pypaimon.jars": ["*.jar"]
65+
"pypaimon.jars": ["*.jar"],
66+
"pypaimon.hadoop-deps": ["*.jar"]
6467
},
6568
install_requires=install_requires,
6669
description='Apache Paimon Python API',

tools/releasing/create_source_release.sh

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -55,11 +55,13 @@ fi
5555

5656
###########################
5757

58-
# prepare bridge jar
59-
60-
DEPS_DIR=${PROJECT_ROOT}/deps/jars
58+
DEPS_DIR=${PROJECT_ROOT}/deps
6159
rm -rf ${DEPS_DIR}
62-
mkdir -p ${DEPS_DIR}
60+
61+
# prepare bridge jar
62+
BRIDGE_DEPS_DIR=${DEPS_DIR}/jars
63+
mkdir -p ${BRIDGE_DEPS_DIR}
64+
touch ${BRIDGE_DEPS_DIR}/__init__.py
6365

6466
cd ${PROJECT_ROOT}/paimon-python-java-bridge
6567

@@ -70,10 +72,23 @@ if grep -q "<version>.*SNAPSHOT</version>" "pom.xml"; then
7072
fi
7173

7274
# get bridge jar version
73-
JAR_VERSION=$(sed -n 's/.*<version>\(.*\)<\/version>.*/\1/p' pom.xml | head -n 1)
75+
BRIDGE_JAR_VERSION=$(sed -n 's/.*<version>\(.*\)<\/version>.*/\1/p' pom.xml | head -n 1)
76+
77+
mvn clean install -DskipTests
78+
cp "target/paimon-python-java-bridge-${BRIDGE_JAR_VERSION}.jar" ${BRIDGE_DEPS_DIR}
79+
80+
# prepare hadoop-deps jar
81+
HADOOP_DEPS_DIR=${DEPS_DIR}/hadoop
82+
mkdir -p ${HADOOP_DEPS_DIR}
83+
touch ${HADOOP_DEPS_DIR}/__init__.py
84+
85+
cd ${PROJECT_ROOT}/hadoop-deps
86+
87+
# get hadoop-deps jar version
88+
HADOOP_JAR_VERSION=$(sed -n 's/.*<version>\(.*\)<\/version>.*/\1/p' pom.xml | head -n 1)
7489

7590
mvn clean install -DskipTests
76-
cp "target/paimon-python-java-bridge-${JAR_VERSION}.jar" ${DEPS_DIR}
91+
cp "target/hadoop-deps-${HADOOP_JAR_VERSION}.jar" ${HADOOP_DEPS_DIR}
7792

7893
cd ${CURR_DIR}
7994

0 commit comments

Comments
 (0)