Skip to content

Commit 63f5314

Browse files
authored
Merge pull request #490 from atlanhq/APP-5025
APP-5025: Added support for `Oracle` workflow package
2 parents 26ff3fc + 8a71f1e commit 63f5314

File tree

5 files changed

+549
-0
lines changed

5 files changed

+549
-0
lines changed

pyatlan/model/packages/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
from .lineage_builder import LineageBuilder
1313
from .lineage_generator_nt import LineageGenerator
1414
from .mongodb_crawler import MongoDBCrawler
15+
from .oracle_crawler import OracleCrawler
1516
from .postgres_crawler import PostgresCrawler
1617
from .powerbi_crawler import PowerBICrawler
1718
from .relational_assets_builder import RelationalAssetsBuilder
@@ -41,6 +42,7 @@
4142
"AssetImport",
4243
"AssetExportBasic",
4344
"RelationalAssetsBuilder",
45+
"OracleCrawler",
4446
"LineageBuilder",
4547
"LineageGenerator",
4648
]
Lines changed: 259 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,259 @@
1+
from __future__ import annotations
2+
3+
from typing import List, Optional
4+
5+
from pyatlan.model.enums import AtlanConnectorType, WorkflowPackage
6+
from pyatlan.model.packages.base.crawler import AbstractCrawler
7+
from pyatlan.model.workflow import WorkflowMetadata
8+
9+
10+
class OracleCrawler(AbstractCrawler):
11+
"""
12+
Base configuration for a new Oracle crawler.
13+
14+
:param connection_name: name for the connection
15+
:param admin_roles: admin roles for the connection
16+
:param admin_groups: admin groups for the connection
17+
:param admin_users: admin users for the connection
18+
:param allow_query: allow data to be queried in the
19+
connection (True) or not (False), default: True
20+
:param allow_query_preview: allow sample data viewing for
21+
assets in the connection (True) or not (False), default: True
22+
:param row_limit: maximum number of rows
23+
that can be returned by a query, default: 10000
24+
"""
25+
26+
_NAME = "oracle"
27+
_PACKAGE_NAME = "@atlan/oracle"
28+
_PACKAGE_PREFIX = WorkflowPackage.ORACLE.value
29+
_CONNECTOR_TYPE = AtlanConnectorType.ORACLE
30+
_PACKAGE_ICON = (
31+
"https://docs.oracle.com/sp_common/book-template/ohc-common/img/favicon.ico"
32+
)
33+
_PACKAGE_LOGO = (
34+
"https://docs.oracle.com/sp_common/book-template/ohc-common/img/favicon.ico"
35+
)
36+
37+
def __init__(
38+
self,
39+
connection_name: str,
40+
admin_roles: Optional[List[str]] = None,
41+
admin_groups: Optional[List[str]] = None,
42+
admin_users: Optional[List[str]] = None,
43+
allow_query: bool = True,
44+
allow_query_preview: bool = True,
45+
row_limit: int = 10000,
46+
):
47+
self._advanced_config = False
48+
super().__init__(
49+
connection_name=connection_name,
50+
connection_type=self._CONNECTOR_TYPE,
51+
admin_roles=admin_roles,
52+
admin_groups=admin_groups,
53+
admin_users=admin_users,
54+
allow_query=allow_query,
55+
allow_query_preview=allow_query_preview,
56+
row_limit=row_limit,
57+
source_logo=self._PACKAGE_LOGO,
58+
)
59+
60+
def s3(self, bucket_name: str, bucket_prefix: str) -> OracleCrawler:
61+
"""
62+
Set up the crawler to fetch metadata directly from the S3 bucket.
63+
64+
:param bucket_name: name of the S3 bucket containing the extracted metadata files
65+
:param bucket_prefix: prefix within the S3 bucket where the extracted metadata files are located
66+
:returns: crawler, configured to fetch metadata directly from the S3 bucket
67+
"""
68+
self._parameters.append(dict(name="extraction-method", value="s3"))
69+
self._parameters.append(dict(name="metadata-s3-bucket", value=bucket_name))
70+
self._parameters.append(dict(name="metadata-s3-prefix", value=bucket_prefix))
71+
# Advanced configuration defaults
72+
self.jdbc_internal_methods(enable=True)
73+
self.source_level_filtering(enable=False)
74+
return self
75+
76+
def direct(
77+
self,
78+
hostname: str,
79+
port: int = 1521,
80+
) -> OracleCrawler:
81+
"""
82+
Set up the crawler to extract directly from Oracle.
83+
84+
:param hostname: hostname of the Oracle instance
85+
:param port: port number of Oracle instance, defaults to `1521`
86+
:returns: crawler, set up to extract directly from Oracle.
87+
"""
88+
local_creds = {
89+
"name": f"default-{self._NAME}-{self._epoch}-0",
90+
"host": hostname,
91+
"port": port,
92+
"connector_config_name": f"atlan-connectors-{self._NAME}",
93+
}
94+
self._credentials_body.update(local_creds)
95+
self._parameters.append(dict(name="extraction-method", value="direct"))
96+
return self
97+
98+
def basic_auth(
99+
self,
100+
username: str,
101+
password: str,
102+
sid: str,
103+
database_name: str,
104+
) -> OracleCrawler:
105+
"""
106+
Set up the crawler to use basic authentication.
107+
108+
:param username: through which to access Oracle
109+
:param password: through which to access Oracle
110+
:param sid: SID (system identifier) of the Oracle instance
111+
:param database_name: database name to crawl
112+
:returns: crawler, set up to use basic authentication
113+
"""
114+
local_creds = {
115+
"name": f"default-{self._NAME}-{self._epoch}-0",
116+
"auth_type": "basic",
117+
"username": username,
118+
"password": password,
119+
"extra": {"sid": sid, "databaseName": database_name},
120+
}
121+
self._credentials_body.update(local_creds)
122+
return self
123+
124+
def include(self, assets: dict) -> OracleCrawler:
125+
"""
126+
Defines the filter for assets to include when crawling.
127+
128+
:param assets: Map keyed by database name with each value being a list of schemas
129+
:returns: crawler, set to include only those assets specified
130+
:raises InvalidRequestException: In the unlikely
131+
event the provided filter cannot be translated
132+
"""
133+
include_assets = assets or {}
134+
to_include = self.build_hierarchical_filter(include_assets)
135+
self._parameters.append(
136+
dict(dict(name="include-filter", value=to_include or "{}"))
137+
)
138+
return self
139+
140+
def exclude(self, assets: dict) -> OracleCrawler:
141+
"""
142+
Defines the filter for assets to exclude when crawling.
143+
144+
:param assets: Map keyed by database name with each value being a list of schemas
145+
:returns: crawler, set to exclude only those assets specified
146+
:raises InvalidRequestException: In the unlikely
147+
event the provided filter cannot be translated
148+
"""
149+
exclude_assets = assets or {}
150+
to_exclude = self.build_hierarchical_filter(exclude_assets)
151+
self._parameters.append(dict(name="exclude-filter", value=to_exclude or "{}"))
152+
return self
153+
154+
def exclude_regex(self, regex: str) -> OracleCrawler:
155+
"""
156+
Defines the exclude regex for crawler ignore
157+
tables and views based on a naming convention.
158+
159+
:param regex: exclude regex for the crawler
160+
:returns: crawler, set to exclude
161+
only those assets specified in the regex
162+
"""
163+
self._parameters.append(dict(name="temp-table-regex", value=regex))
164+
return self
165+
166+
def jdbc_internal_methods(self, enable: bool) -> OracleCrawler:
167+
"""
168+
Defines whether to enable or disable JDBC
169+
internal methods for data extraction.
170+
171+
:param enable: whether to whether to enable (`True`) or
172+
disable (`False`) JDBC internal methods for data extraction
173+
:returns: crawler, with jdbc internal methods for data extraction
174+
"""
175+
self._advanced_config = True
176+
self._parameters.append(
177+
dict(name="use-jdbc-internal-methods", value="true" if enable else "false")
178+
)
179+
return self
180+
181+
def source_level_filtering(self, enable: bool) -> OracleCrawler:
182+
"""
183+
Defines whether to enable or disable schema level filtering on source.
184+
schemas selected in the include filter will be fetched.
185+
186+
:param enable: whether to enable (`True`) or
187+
disable (`False`) schema level filtering on source
188+
:returns: crawler, with schema level filtering on source
189+
"""
190+
self._advanced_config = True
191+
self._parameters.append(
192+
dict(
193+
name="use-source-schema-filtering", value="true" if enable else "false"
194+
)
195+
)
196+
return self
197+
198+
def _set_required_metadata_params(self):
199+
self._parameters.append(
200+
{"name": "credentials-fetch-strategy", "value": "credential_guid"}
201+
)
202+
self._parameters.append(
203+
{"name": "credential-guid", "value": "{{credentialGuid}}"}
204+
)
205+
self._parameters.append(dict(name="publish-mode", value="production"))
206+
self._parameters.append(dict(name="atlas-auth-type", value="internal"))
207+
self._parameters.append(
208+
dict(
209+
name="advanced-config-strategy",
210+
value="custom" if self._advanced_config else "default",
211+
)
212+
)
213+
self._parameters.append(
214+
{
215+
"name": "connection",
216+
"value": self._get_connection().json(
217+
by_alias=True, exclude_unset=True, exclude_none=True
218+
),
219+
}
220+
)
221+
222+
def _get_metadata(self) -> WorkflowMetadata:
223+
self._set_required_metadata_params()
224+
return WorkflowMetadata(
225+
labels={
226+
"orchestration.atlan.com/certified": "true",
227+
"orchestration.atlan.com/source": self._NAME,
228+
"orchestration.atlan.com/sourceCategory": "warehouse",
229+
"orchestration.atlan.com/type": "connector",
230+
"orchestration.atlan.com/verified": "true",
231+
"package.argoproj.io/installer": "argopm",
232+
"package.argoproj.io/name": f"a-t-ratlans-l-a-s-h{self._NAME}",
233+
"package.argoproj.io/registry": "httpsc-o-l-o-ns-l-a-s-hs-l-a-s-hpackages.atlan.com",
234+
f"orchestration.atlan.com/default-{self._NAME}-{self._epoch}": "true",
235+
"orchestration.atlan.com/atlan-ui": "true",
236+
},
237+
annotations={
238+
"orchestration.atlan.com/allowSchedule": "true",
239+
"orchestration.atlan.com/categories": "warehouse,crawler",
240+
"orchestration.atlan.com/dependentPackage": "",
241+
"orchestration.atlan.com/docsUrl": "https://ask.atlan.com/hc/en-us/articles/6849958872861",
242+
"orchestration.atlan.com/emoji": "\U0001f680",
243+
"orchestration.atlan.com/icon": self._PACKAGE_ICON,
244+
"orchestration.atlan.com/logo": self._PACKAGE_LOGO,
245+
"orchestration.atlan.com/marketplaceLink": f"https://packages.atlan.com/-/web/detail/{self._PACKAGE_NAME}", # noqa
246+
"orchestration.atlan.com/name": "Oracle Assets",
247+
"package.argoproj.io/author": "Atlan",
248+
"package.argoproj.io/description": "Package to crawl Oracle assets and publish to Atlan for discovery",
249+
"package.argoproj.io/homepage": f"https://packages.atlan.com/-/web/detail/{self._PACKAGE_NAME}",
250+
"package.argoproj.io/keywords": "[\"oracle\",\"warehouse\",\"connector\",\"crawler\"]", # fmt: skip
251+
"package.argoproj.io/name": self._PACKAGE_NAME,
252+
"package.argoproj.io/registry": "https://packages.atlan.com",
253+
"package.argoproj.io/repository": "git+https://github.com/atlanhq/marketplace-packages.git",
254+
"package.argoproj.io/support": "[email protected]",
255+
"orchestration.atlan.com/atlanName": f"{self._PACKAGE_PREFIX}-default-{self._NAME}-{self._epoch}",
256+
},
257+
name=f"{self._PACKAGE_PREFIX}-{self._epoch}",
258+
namespace="default",
259+
)

0 commit comments

Comments
 (0)