Skip to content

Commit 9096db4

Browse files
committed
Added basic functionality for read and write to HUAWEI Object Storage Service (OBS)
1 parent 5a82613 commit 9096db4

File tree

9 files changed

+819
-16
lines changed

9 files changed

+819
-16
lines changed

README.rst

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,7 @@ Other examples of URLs that ``smart_open`` accepts::
9393
s3://my_key:my_secret@my_server:my_port@my_bucket/my_key
9494
gs://my_bucket/my_blob
9595
azure://my_bucket/my_blob
96+
obs://bucket_id.server:port/object_key
9697
hdfs:///path/file
9798
hdfs://path/file
9899
webhdfs://host:port/path/file
@@ -290,6 +291,7 @@ Transport-specific Options
290291
- WebHDFS
291292
- GCS
292293
- Azure Blob Storage
294+
- OBS (Huawei Object Storage)
293295

294296
Each option involves setting up its own set of parameters.
295297
For example, for accessing S3, you often need to set up authentication, like API keys or a profile name.
@@ -455,6 +457,55 @@ Additional keyword arguments can be propagated to the ``commit_block_list`` meth
455457
kwargs = {'metadata': {'version': 2}}
456458
fout = open('azure://container/key', 'wb', transport_params={'blob_kwargs': kwargs})
457459
460+
OBS Credentials
461+
---------------
462+
``smart_open`` uses the ``esdk-obs-python`` library to talk to OBS.
463+
Please see `esdk-obs-python docs <https://support.huaweicloud.com/intl/en-us/sdk-python-devg-obs/obs_22_0500.html>`__.
464+
465+
There are several ways to provide Access key, Secret Key and Security Token
466+
467+
- Using env variables
468+
- Using custom client params
469+
470+
AK, SK, ST can be encrypted in this case You need install and configure `security provider <https://support.huawei.com/enterprise/en/software/260510077-ESW2000847337>`__.
471+
472+
473+
OBS Advanced Usage
474+
--------------------
475+
- Supported env variables:
476+
477+
OBS_ACCESS_KEY_ID,
478+
OBS_SECRET_ACCESS_KEY,
479+
OBS_SECURITY_TOKEN,
480+
SMART_OPEN_OBS_USE_CLIENT_WRITE_MODE,
481+
SMART_OPEN_OBS_DECRYPT_AK_SK,
482+
SMART_OPEN_OBS_SCC_LIB_PATH,
483+
SMART_OPEN_OBS_SCC_CONF_PATH
484+
485+
- Configuration via code
486+
.. code-block:: python
487+
488+
client = {'access_key_id': 'ak', 'secret_access_key': 'sk', 'security_token': 'st', 'server': 'server_url'}
489+
headers = obs.PutObjectHeader(contentType='text/plain')
490+
transport_params = {
491+
>>> # client can be dict with parameters supported by the obs.ObsClient or instance of the obs.ObsClient
492+
>>> 'client': client,
493+
>>> # additional header for request, please see esdk-obs-python docs
494+
>>> 'headers': headers,
495+
>>> # if True obs.ObsClient will be take write method argument as readable object to get bytes. For writing mode only.
496+
>>> # Please see docs for ObsClient.putContent api.
497+
>>> 'use_obs_client_write_mode': True,
498+
>>> # True if need decrypt Ak, Sk, St
499+
>>> # It required to install CryptoAPI libs.
500+
>>> # https://support.huawei.com/enterprise/en/software/260510077-ESW2000847337
501+
>>> 'decrypt_ak_sk' : True,
502+
>>> # path to python libs of the Crypto provider
503+
>>> 'scc_lib_path': '/usr/lib/scc',
504+
>>> # path to config file of the Crypto provider
505+
>>> 'scc_conf_path': '/home/user/scc.conf'}
506+
507+
fout = open('obs://bucket_id.server:port/object_key', 'wb', transport_params=transport_params)
508+
458509
Drop-in replacement of ``pathlib.Path.open``
459510
--------------------------------------------
460511

integration-tests/test_obs.py

Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,128 @@
1+
# -*- coding: utf-8 -*-
2+
import io
3+
import os
4+
5+
import obs
6+
from obs import ObsClient
7+
8+
import smart_open
9+
from smart_open.obs import parse_uri
10+
11+
_OBS_URL = os.environ.get('SO_OBS_URL')
12+
13+
assert _OBS_URL is not None, 'please set the SO_OBS_URL environment variable'
14+
15+
assert os.environ.get('OBS_ACCESS_KEY_ID') is not None, \
16+
'please set the OBS_ACCESS_KEY_ID environment variable'
17+
assert os.environ.get('OBS_SECRET_ACCESS_KEY') is not None, \
18+
'please set the OBS_SECRET_ACCESS_KEY environment variable'
19+
20+
21+
def _clear_bucket(obs_client: obs.ObsClient, bucket_id: str):
22+
objects = obs_client.listObjects(bucketName=bucket_id)
23+
for content in objects.body.contents:
24+
print(content.get('key'))
25+
_delete_object(obs_client=obs_client,
26+
bucket_id=bucket_id,
27+
object_key=content.get('key'))
28+
29+
30+
def _delete_object(obs_client: obs.ObsClient, bucket_id: str, object_key: str):
31+
try:
32+
resp = obs_client.deleteObject(bucketName=bucket_id, objectKey=object_key)
33+
if resp.status < 300:
34+
print('requestId:', resp.requestId)
35+
print('deleteMarker:', resp.body.deleteMarker)
36+
except Exception as ex:
37+
print(ex)
38+
39+
40+
def initialize_bucket():
41+
parsed = parse_uri(_OBS_URL)
42+
server = f'https://{parsed.get("server")}'
43+
bucket_id = parsed.get('bucket_id')
44+
obs_client = ObsClient(server=server, security_provider_policy='ENV')
45+
_clear_bucket(obs_client=obs_client, bucket_id=bucket_id)
46+
47+
48+
def write_read(key, content, write_mode, read_mode, **kwargs):
49+
with smart_open.open(key, write_mode, **kwargs) as fout:
50+
fout.write(content)
51+
with smart_open.open(key, read_mode, **kwargs) as fin:
52+
return fin.read()
53+
54+
55+
def read_length_prefixed_messages(key, read_mode, **kwargs):
56+
result = io.BytesIO()
57+
58+
with smart_open.open(key, read_mode, **kwargs) as fin:
59+
length_byte = fin.read(1)
60+
while len(length_byte):
61+
result.write(length_byte)
62+
msg = fin.read(ord(length_byte))
63+
result.write(msg)
64+
length_byte = fin.read(1)
65+
return result.getvalue()
66+
67+
68+
def test_obs_readwrite_binary(benchmark):
69+
initialize_bucket()
70+
71+
key = _OBS_URL + '/sanity.txt'
72+
binary = 'с гранатою в кармане, с чекою в руке'.encode()
73+
actual = benchmark(write_read, key, binary, 'wb', 'rb')
74+
assert actual == binary
75+
76+
77+
def test_obs_readwrite_binary_gzip(benchmark):
78+
initialize_bucket()
79+
80+
key = _OBS_URL + '/sanity.txt.gz'
81+
binary = 'не чайки здесь запели на знакомом языке'.encode()
82+
actual = benchmark(write_read, key, binary, 'wb', 'rb')
83+
assert actual == binary
84+
85+
86+
def test_obs_performance(benchmark):
87+
initialize_bucket()
88+
89+
one_megabyte = io.BytesIO()
90+
for _ in range(1024 * 128):
91+
one_megabyte.write(b'01234567')
92+
one_megabyte = one_megabyte.getvalue()
93+
94+
key = _OBS_URL + '/performance.txt'
95+
actual = benchmark(write_read, key, one_megabyte, 'wb', 'rb')
96+
assert actual == one_megabyte
97+
98+
99+
def test_obs_performance_gz(benchmark):
100+
initialize_bucket()
101+
102+
one_megabyte = io.BytesIO()
103+
for _ in range(1024 * 128):
104+
one_megabyte.write(b'01234567')
105+
one_megabyte = one_megabyte.getvalue()
106+
107+
key = _OBS_URL + '/performance.txt.gz'
108+
actual = benchmark(write_read, key, one_megabyte, 'wb', 'rb')
109+
assert actual == one_megabyte
110+
111+
112+
def test_obs_performance_small_reads(benchmark):
113+
initialize_bucket()
114+
115+
ONE_MIB = 1024 ** 2
116+
one_megabyte_of_msgs = io.BytesIO()
117+
msg = b'\x0f' + b'0123456789abcde' # a length-prefixed "message"
118+
for _ in range(0, ONE_MIB, len(msg)):
119+
one_megabyte_of_msgs.write(msg)
120+
one_megabyte_of_msgs = one_megabyte_of_msgs.getvalue()
121+
122+
key = _OBS_URL + '/many_reads_performance.bin'
123+
124+
with smart_open.open(key, 'wb') as fout:
125+
fout.write(one_megabyte_of_msgs)
126+
127+
actual = benchmark(read_length_prefixed_messages, key, 'rb', buffering=ONE_MIB)
128+
assert actual == one_megabyte_of_msgs

setup.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,9 @@ def read(fname):
4242
http_deps = ['requests']
4343
ssh_deps = ['paramiko']
4444
zst_deps = ['zstandard']
45+
obs_deps = ['esdk-obs-python']
4546

46-
all_deps = aws_deps + gcs_deps + azure_deps + http_deps + ssh_deps + zst_deps
47+
all_deps = aws_deps + gcs_deps + azure_deps + http_deps + ssh_deps + zst_deps + obs_deps
4748
tests_require = all_deps + [
4849
'moto[server]',
4950
'responses',
@@ -83,6 +84,7 @@ def read(fname):
8384
'webhdfs': http_deps,
8485
'ssh': ssh_deps,
8586
'zst': zst_deps,
87+
'obs': obs_deps,
8688
},
8789
python_requires=">=3.7,<4.0",
8890

0 commit comments

Comments
 (0)