Skip to content
This repository was archived by the owner on Sep 26, 2022. It is now read-only.

Commit 3888941

Browse files
#72: Added method generate bucket udf path (#73)
* Added generating udf path method * Added tests * Added with db test * Updated test * Reorganized tests * Updated changelog
1 parent a0455eb commit 3888941

23 files changed

+189
-52
lines changed

doc/changes/changes_0.3.0.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,8 @@
55

66

77
## Features / Enhancements
8-
- /
8+
9+
- #72: Added generate bucket udf path method to BucketFSLocation
910

1011
## Bug Fixes
1112

exasol_bucketfs_utils_python/abstract_bucketfs_location.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
from typing import Any, Tuple, IO, Iterable
33
from pathlib import PurePosixPath, Path
44
from urllib.parse import ParseResult
5+
from typing import Union
56

67

78
class AbstractBucketFSLocation(ABC):
@@ -10,6 +11,13 @@ class AbstractBucketFSLocation(ABC):
1011
fileobjects and joblib objects. Also able to read files from the BucketFS
1112
directly, if called from inside a UDF.
1213
"""
14+
15+
@abstractmethod
16+
def generate_bucket_udf_path(
17+
self, path_in_bucket: Union[None, str, PurePosixPath]) \
18+
-> PurePosixPath:
19+
pass
20+
1321
@abstractmethod
1422
def download_from_bucketfs_to_string(
1523
self,

exasol_bucketfs_utils_python/bucketfs_location.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
1-
from typing import Any, Tuple, IO, Iterable
1+
from typing import Any, Tuple, IO, Iterable, Union
22
from pathlib import PurePosixPath, Path
33
from urllib.parse import ParseResult
44
from exasol_bucketfs_utils_python import download, upload, list_files, \
5-
delete
5+
delete, bucketfs_utils
66
from exasol_bucketfs_utils_python import load_file_from_local_fs as from_BFS
77
from exasol_bucketfs_utils_python.bucket_config import BucketConfig
88

@@ -25,6 +25,12 @@ def __init__(self, bucket_config: BucketConfig, base_path: PurePosixPath):
2525
self.base_path = base_path
2626
self.bucket_config = bucket_config
2727

28+
def generate_bucket_udf_path(
29+
self, path_in_bucket: Union[None, str, PurePosixPath]) \
30+
-> PurePosixPath:
31+
return bucketfs_utils.generate_bucket_udf_path(
32+
self.bucket_config, path_in_bucket)
33+
2834
def get_complete_file_path_in_bucket(
2935
self,
3036
bucket_file_path: str) -> str:

exasol_bucketfs_utils_python/bucketfs_utils.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ def _encode_url_part(part: str) -> str:
1414
return urlencoded
1515

1616

17-
def _correct_path_in_bucket_for_archives(path_in_bucket: PurePosixPath) \
17+
def correct_path_in_bucket_for_archives(path_in_bucket: PurePosixPath) \
1818
-> PurePosixPath:
1919
for extension in ARCHIVE_EXTENSIONS:
2020
if path_in_bucket.name.endswith(extension):
@@ -24,7 +24,7 @@ def _correct_path_in_bucket_for_archives(path_in_bucket: PurePosixPath) \
2424
return path_in_bucket
2525

2626

27-
def _make_path_relative(path_in_bucket: Union[None, str, PurePosixPath]) \
27+
def make_path_relative(path_in_bucket: Union[None, str, PurePosixPath]) \
2828
-> PurePosixPath:
2929
path_in_bucket = PurePosixPath(path_in_bucket)
3030
if path_in_bucket.is_absolute():
@@ -62,8 +62,8 @@ def generate_bucket_udf_path(
6262
path = PurePosixPath(bucketfs_path, bucket_config.bucket_name)
6363

6464
if path_in_bucket is not None:
65-
path_in_bucket = _make_path_relative(path_in_bucket)
66-
path_in_bucket = _correct_path_in_bucket_for_archives(path_in_bucket)
65+
path_in_bucket = make_path_relative(path_in_bucket)
66+
path_in_bucket = correct_path_in_bucket_for_archives(path_in_bucket)
6767
else:
6868
path_in_bucket = ""
6969
path = PurePosixPath(path, path_in_bucket)
@@ -120,7 +120,7 @@ def generate_bucket_http_url(
120120
url = generate_bucketfs_http_url(bucket_config.bucketfs_config,
121121
with_credentials)
122122
if path_in_bucket is not None:
123-
path_in_bucket = _make_path_relative(path_in_bucket)
123+
path_in_bucket = make_path_relative(path_in_bucket)
124124
else:
125125
path_in_bucket = ""
126126
encoded_bucket_and_path_in_bucket = \

exasol_bucketfs_utils_python/localfs_mock_bucketfs_location.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
1-
from typing import Any, IO, List
1+
from typing import Any, IO, List, Union
22
from pathlib import PurePosixPath, Path
33
from typing import Any
44
import joblib
5+
6+
from exasol_bucketfs_utils_python import bucketfs_utils
57
from exasol_bucketfs_utils_python.abstract_bucketfs_location import \
68
AbstractBucketFSLocation
79

@@ -20,6 +22,18 @@ def __init__(self, base_path: PurePosixPath):
2022
def get_complete_file_path_in_bucket(self, bucket_file_path) -> str:
2123
return str(PurePosixPath(self.base_path, bucket_file_path))
2224

25+
def generate_bucket_udf_path(
26+
self, path_in_bucket: Union[None, str, PurePosixPath]) \
27+
-> PurePosixPath:
28+
29+
if path_in_bucket is not None:
30+
path_in_bucket = bucketfs_utils.\
31+
make_path_relative(path_in_bucket)
32+
else:
33+
path_in_bucket = ""
34+
path = PurePosixPath(self.base_path, path_in_bucket)
35+
return path
36+
2337
def download_from_bucketfs_to_string(self, bucket_file_path: str) -> str:
2438
with open(self.get_complete_file_path_in_bucket(
2539
bucket_file_path), "rt") as f:

tests/fixtures/prepare_bucket_fixture.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
from exasol_bucketfs_utils_python.bucketfs_config import BucketFSConfig
55
from exasol_bucketfs_utils_python.bucketfs_connection_config import \
66
BucketFSConnectionConfig
7-
from tests.test_load_fs_file_from_udf import delete_testfile_from_bucketfs
7+
from tests.integration_tests.with_db.test_load_fs_file_from_udf import delete_testfile_from_bucketfs
88

99

1010
@pytest.fixture(scope="module")

tests/integration_tests/__init__.py

Whitespace-only changes.

tests/integration_tests/with_db/__init__.py

Whitespace-only changes.

tests/test_bucketfs_location.py renamed to tests/integration_tests/with_db/test_bucketfs_location.py

Lines changed: 60 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1,56 +1,76 @@
11
from pathlib import PurePosixPath
2-
32
from exasol_bucketfs_utils_python.bucket_config import BucketConfig
43
from exasol_bucketfs_utils_python.bucketfs_config import BucketFSConfig
54
from exasol_bucketfs_utils_python.bucketfs_connection_config import BucketFSConnectionConfig
6-
75
from exasol_bucketfs_utils_python.bucketfs_location import BucketFSLocation
86
import pytest
97
import textwrap
10-
from tests.test_load_fs_file_from_udf import delete_testfile_from_bucketfs, upload_testfile_to_bucketfs
8+
from tests.integration_tests.with_db.test_load_fs_file_from_udf import delete_testfile_from_bucketfs, upload_testfile_to_bucketfs
119
# TODO replace upload_testfile_to_BucketFS once missing funcs in BucketFSLocation are implemented
1210

1311

14-
def test_upload_download_string_from_different_instance():
15-
connection_config = BucketFSConnectionConfig(host="localhost", port=6666, user="w", pwd="write", is_https=False)
16-
bucketfs_config = BucketFSConfig("bfsdefault", connection_config=connection_config)
17-
bucket_config = BucketConfig(bucket_name="default", bucketfs_config=bucketfs_config)
18-
bucket_base_path = PurePosixPath("test_up_down_str")
19-
bucketfs_location_upload = BucketFSLocation(bucket_config, bucket_base_path)
20-
bucketfs_location_download = BucketFSLocation(bucket_config, bucket_base_path)
21-
bucket_file_path = "test_file.txt"
22-
test_string = "test_string"
23-
bucketfs_location_upload.upload_string_to_bucketfs(bucket_file_path, test_string)
24-
result = bucketfs_location_download.download_from_bucketfs_to_string(bucket_file_path)
25-
assert result == test_string
26-
delete_testfile_from_bucketfs(file_path=str(bucket_base_path) + "/" + bucket_file_path,
27-
bucket_config=bucketfs_location_upload.bucket_config)
28-
29-
30-
class TestValue:
31-
__test__ = False
32-
33-
def __init__(self, value: str):
34-
self.value = value
3512

36-
def __eq__(self, other):
37-
return self.value == self.value
13+
def test_generate_bucket_udf_path_with_db(
14+
upload_language_container, pyexasol_connection):
3815

16+
connection_config = BucketFSConnectionConfig(
17+
host="localhost", port=6666, user="w", pwd="write", is_https=False)
18+
bucketfs_config = BucketFSConfig(
19+
connection_config=connection_config, bucketfs_name="bfsdefault")
20+
bucket_config = BucketConfig(
21+
bucket_name="default", bucketfs_config=bucketfs_config)
22+
bucketfs_location = BucketFSLocation(bucket_config, "")
3923

40-
def test_upload_download_obj_from_different_instance():
41-
connection_config = BucketFSConnectionConfig(host="localhost", port=6666, user="w", pwd="write", is_https=False)
42-
bucketfs_config = BucketFSConfig("bfsdefault", connection_config=connection_config)
43-
bucket_config = BucketConfig(bucket_name="default", bucketfs_config=bucketfs_config)
44-
bucket_base_path = PurePosixPath("test_up_down_obj")
45-
bucketfs_location_upload = BucketFSLocation(bucket_config, bucket_base_path)
46-
bucketfs_location_download = BucketFSLocation(bucket_config, bucket_base_path)
4724
bucket_file_path = "test_file.txt"
48-
test_value = TestValue("test_string")
49-
bucketfs_location_upload.upload_object_to_bucketfs_via_joblib(test_value, bucket_file_path)
50-
result = bucketfs_location_download.download_object_from_bucketfs_via_joblib(bucket_file_path)
51-
assert result == test_value
52-
delete_testfile_from_bucketfs(file_path=str(bucket_base_path) + "/" + bucket_file_path,
53-
bucket_config=bucketfs_location_upload.bucket_config)
25+
test_string = "test_string"
26+
bucketfs_location.upload_string_to_bucketfs(bucket_file_path, test_string)
27+
28+
target_schema = "TARGET_SCHEMA"
29+
try:
30+
# access file from udf
31+
udf_name = "AccessFileInBucketFSFromUDF"
32+
pyexasol_connection.execute(
33+
f"CREATE SCHEMA IF NOT EXISTS {target_schema};")
34+
pyexasol_connection.execute(
35+
f"OPEN SCHEMA {target_schema};")
36+
udf_sql = textwrap.dedent(f"""
37+
CREATE OR REPLACE PYTHON3_BFSUP SET SCRIPT {target_schema}."{udf_name}"(
38+
"path_in_bucket" VARCHAR(20000))
39+
RETURNS BOOLEAN
40+
AS
41+
from exasol_bucketfs_utils_python.bucket_config import BucketConfig
42+
from exasol_bucketfs_utils_python.bucketfs_connection_config import BucketFSConnectionConfig
43+
from exasol_bucketfs_utils_python.bucketfs_config import BucketFSConfig
44+
from exasol_bucketfs_utils_python.bucketfs_location import BucketFSLocation
45+
from pathlib import PurePosixPath, Path
46+
47+
bucket_name = "default"
48+
bucketfs_name = "bfsdefault"
49+
def get_bucket_config():
50+
connection_config = BucketFSConnectionConfig(host="localhost",
51+
port=6666,
52+
user="r", pwd="read",
53+
is_https=False)
54+
bucketfs_config = BucketFSConfig(bucketfs_name, connection_config=connection_config)
55+
return BucketConfig(bucket_name, bucketfs_config)
56+
57+
def run(ctx):
58+
path_in_bucket = ctx.path_in_bucket
59+
bucket_config = get_bucket_config()
60+
bucketfs_location = BucketFSLocation(bucket_config, "")
61+
file_path = bucketfs_location.generate_bucket_udf_path(path_in_bucket)
62+
63+
return Path(file_path).exists()
64+
""")
65+
pyexasol_connection.execute(udf_sql)
66+
result = pyexasol_connection.execute(
67+
f"""select {target_schema}."{udf_name}"('{bucket_file_path}')""").fetchall()
68+
print(result)
69+
assert result[0][0]
70+
finally:
71+
delete_testfile_from_bucketfs(file_path=bucket_file_path,
72+
bucket_config=bucketfs_location.bucket_config)
73+
pyexasol_connection.execute(f"DROP SCHEMA IF EXISTS {target_schema} CASCADE;")
5474

5575

5676
@pytest.mark.usefixtures("upload_language_container",
@@ -228,6 +248,7 @@ def run(ctx):
228248
bucket_config=bucketfs_location_read.bucket_config)
229249
pyexasol_connection.execute(f"DROP SCHEMA IF EXISTS {target_schema} CASCADE;")
230250

251+
231252
def test_read_files_to_fileobj_from_bucketfs_inside_udf(upload_language_container, pyexasol_connection):
232253
connection_config = BucketFSConnectionConfig(host="localhost", port=6666, user="w", pwd="write", is_https=False)
233254
bucketfs_config = BucketFSConfig("bfsdefault", connection_config=connection_config)

0 commit comments

Comments
 (0)