Skip to content

Commit 2214bc4

Browse files
ckunkitomuben
andauthored
Refactoring/#221 fixed mypy warnings (#225)
* Fixed mypy warnings * Formatted code * Made register_peer_connection optional in RegisterPeerForwarder, RegisterPeerForwarderBuilderParameter, RegisterPeerForwarderFactory * replaced comment to ignore import-untyped by entry in pyproject.toml * Prefixed properties with "_checked_" * Updated pandas to version 2.2.3 Co-authored-by: Thomas Ubensee <[email protected]>
1 parent f4fef21 commit 2214bc4

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

53 files changed

+617
-338
lines changed

doc/changes/changes_0.1.1.md

+1
Original file line numberDiff line numberDiff line change
@@ -19,3 +19,4 @@ Code name:
1919
* #203: Cleaned-up package names and directory structure
2020
* #217: Rename dataflow abstraction files
2121
* #219: Applied PTB checks and fixes
22+
* #221: Fixed mypy warnings

exasol/analytics/py.typed

Whitespace-only changes.

exasol/analytics/query_handler/context/connection_name.py

+1-5
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,6 @@
66
class ConnectionName(DBObjectName):
77
"""A DBObjectName class which represents the name of a connection object"""
88

9-
@typechecked
10-
def __init__(self, connection_name: str):
11-
super().__init__(connection_name.upper())
12-
139

1410
class ConnectionNameImpl(DBObjectNameImpl, ConnectionName):
1511

@@ -19,4 +15,4 @@ def fully_qualified(self) -> str:
1915

2016
@typechecked
2117
def __init__(self, connection_name: str):
22-
super().__init__(connection_name)
18+
super().__init__(connection_name.upper())

exasol/analytics/query_handler/context/proxy/db_object_name_with_schema_proxy.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from typing import Generic, TypeVar
1+
from typing import Generic, Optional, TypeVar
22

33
from exasol.analytics.query_handler.context.proxy.db_object_name_proxy import (
44
DBObjectNameProxy,
@@ -15,6 +15,6 @@ def __init__(self, db_object_name_with_schema: NameType, global_counter_value: i
1515
super().__init__(db_object_name_with_schema, global_counter_value)
1616

1717
@property
18-
def schema_name(self) -> SchemaName:
18+
def schema_name(self) -> Optional[SchemaName]:
1919
self._check_if_released()
2020
return self._db_object_name.schema_name

exasol/analytics/query_handler/context/scope.py

+1
Original file line numberDiff line numberDiff line change
@@ -62,5 +62,6 @@ def transfer_object_to(
6262
"""
6363
pass
6464

65+
@abstractmethod
6566
def get_connection(self, name: str) -> Connection:
6667
pass

exasol/analytics/query_handler/deployment/aaf_exasol_lua_script_generator.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ def get_aaf_query_loop_lua_script_generator() -> ExasolLuaScriptGenerator:
3232
]
3333
jinja_template_location = JinjaTemplateLocation(
3434
package_name=constants.BASE_PACKAGE,
35-
package_path=constants.TEMPLATES_DIR,
35+
package_path=str(constants.TEMPLATES_DIR),
3636
template_file_name=constants.LUA_SCRIPT_TEMPLATE,
3737
)
3838
generator = ExasolLuaScriptGenerator(

exasol/analytics/query_handler/deployment/lua_script_bundle.py

+4-5
Original file line numberDiff line numberDiff line change
@@ -26,20 +26,19 @@ def __init__(
2626

2727
def bundle_lua_scripts(self, output_buffer: IO):
2828
with tempfile.TemporaryDirectory() as tmp_dir:
29-
tmp_dir = Path(tmp_dir)
30-
self.copy_lua_source_files(tmp_dir)
31-
self.run_lua_amlg(tmp_dir, output_buffer)
29+
tmp_path = Path(tmp_dir)
30+
self.copy_lua_source_files(tmp_path)
31+
self.run_lua_amlg(tmp_path, output_buffer)
3232

3333
def copy_lua_source_files(self, tmp_dir: Path):
3434
for src in self.lua_source_files + [self.lua_main_file]:
3535
dst = tmp_dir / src.name
3636
logger.debug(f"Copy {src} to {tmp_dir}")
37-
shutil.copy(src, dst)
37+
shutil.copy(str(src), dst)
3838

3939
def run_lua_amlg(self, tmp_dir: Path, output_buffer: IO):
4040
output_file = tmp_dir / f"bundle_{time.time()}.lua"
4141
bash_command = "amalg.lua -o {out_path} -s {main_file} {modules}".format(
42-
tmp_dir=tmp_dir,
4342
out_path=output_file,
4443
main_file=self.lua_main_file.name,
4544
modules=" ".join(self.lua_modules),

exasol/analytics/query_handler/deployment/utils.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ def get_password(pwd: str, user: str, env_var: str, descr: str) -> str:
2424

2525
def load_and_render_statement(template_name, **kwargs) -> str:
2626
env = Environment(
27-
loader=PackageLoader(constants.BASE_PACKAGE, constants.TEMPLATES_DIR),
27+
loader=PackageLoader(constants.BASE_PACKAGE, str(constants.TEMPLATES_DIR)),
2828
autoescape=select_autoescape(),
2929
)
3030
template = env.get_template(template_name)

exasol/analytics/query_handler/graph/result.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -79,14 +79,14 @@ def __new__(cls, name: str, bases: Tuple[Type, ...], attrs: Any):
7979
"""
8080
result_type = type(name, bases, attrs)
8181

82-
def _configured_new(cls: Type[cls]):
82+
def _configured_new(cls: Type[_T]):
8383
"""This function is called for subclasses of classes that declare _Meta as their metaclass."""
8484
return _new(cls, result_type)
8585

86-
result_type.__new__ = _configured_new
87-
result_type.__init__ = _init
88-
result_type.__setattr__ = _setattr
89-
result_type.__delattr__ = _delattr
86+
result_type.__new__ = _configured_new # type: ignore
87+
result_type.__init__ = _init # type: ignore
88+
result_type.__setattr__ = _setattr # type: ignore
89+
result_type.__delattr__ = _delattr # type: ignore
9090
return result_type
9191

9292

Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
import dataclasses
22

3-
from exasol_bucketfs_utils_python.abstract_bucketfs_location import (
4-
AbstractBucketFSLocation,
5-
)
3+
import exasol.bucketfs as bfs
64

75
from exasol.analytics.query_handler.graph.stage.sql.input_output import (
86
SQLStageInputOutput,
@@ -13,5 +11,5 @@
1311
@dataclasses.dataclass(frozen=True, eq=True)
1412
class SQLStageGraphExecutionInput:
1513
input: SQLStageInputOutput
16-
result_bucketfs_location: AbstractBucketFSLocation
14+
result_bucketfs_location: bfs.path.PathLike
1715
sql_stage_graph: SQLStageGraph

exasol/analytics/query_handler/graph/stage/sql/execution/query_handler_state.py

+32-16
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
)
2222
from exasol.analytics.query_handler.query_handler import QueryHandler
2323
from exasol.analytics.query_handler.result import Continue, Finish
24+
from exasol.analytics.utils.errors import UninitializedAttributeError
2425

2526

2627
class ResultHandlerReturnValue(enum.Enum):
@@ -55,23 +56,38 @@ def __init__(
5556
self._current_query_handler: Optional[
5657
QueryHandler[List[SQLStageInputOutput], SQLStageInputOutput]
5758
] = None
58-
self._current_query_handler_context: Optional[ScopeQueryHandlerContext] = None
59+
self._current_qh_context: Optional[ScopeQueryHandlerContext] = None
5960
self._create_current_query_handler()
6061

61-
def _check_is_valid(self):
62-
if self._current_query_handler is None:
63-
raise RuntimeError("No current query handler set.")
64-
6562
def get_current_query_handler(
6663
self,
6764
) -> QueryHandler[List[SQLStageInputOutput], SQLStageInputOutput]:
68-
self._check_is_valid()
69-
return self._current_query_handler
65+
value = self._current_query_handler
66+
if value is None:
67+
raise RuntimeError("No current query handler set.")
68+
return value
69+
70+
@property
71+
def _checked_current_qh_context(self) -> ScopeQueryHandlerContext:
72+
value = self._current_qh_context
73+
if value is None:
74+
raise UninitializedAttributeError(
75+
"Current query handler context is undefined."
76+
)
77+
return value
78+
79+
@property
80+
def _checked_current_stage(self) -> SQLStage:
81+
value = self._current_stage
82+
if value is None:
83+
raise UninitializedAttributeError("Current stage is None.")
84+
return value
7085

7186
def handle_result(
7287
self, result: Union[Continue, Finish[SQLStageInputOutput]]
7388
) -> ResultHandlerReturnValue:
74-
self._check_is_valid()
89+
# check if current query handler is set
90+
self.get_current_query_handler()
7591
if isinstance(result, Finish):
7692
return self._handle_finished_result(result)
7793
elif isinstance(result, Continue):
@@ -90,7 +106,7 @@ def _handle_finished_result(
90106
return self._try_to_move_to_next_stage()
91107

92108
def _try_to_move_to_next_stage(self) -> ResultHandlerReturnValue:
93-
self._current_query_handler_context.release()
109+
self._checked_current_qh_context.release()
94110
if self._is_not_last_stage():
95111
self._move_to_next_stage()
96112
return ResultHandlerReturnValue.CONTINUE_PROCESSING
@@ -101,7 +117,7 @@ def _try_to_move_to_next_stage(self) -> ResultHandlerReturnValue:
101117
def invalidate(self):
102118
self._current_stage = None
103119
self._current_query_handler = None
104-
self._current_query_handler_context = None
120+
self._current_qh_context = None
105121

106122
def _is_not_last_stage(self):
107123
return self._current_stage_index < len(self._stages_in_execution_order) - 1
@@ -113,7 +129,7 @@ def _move_to_next_stage(self):
113129

114130
def _create_current_query_handler(self):
115131
stage_inputs = self._stage_inputs_map[self._current_stage]
116-
self._current_query_handler_context = (
132+
self._current_qh_context = (
117133
self._query_handler_context.get_child_query_handler_context()
118134
)
119135
result_bucketfs_location = self._result_bucketfs_location.joinpath(
@@ -123,12 +139,12 @@ def _create_current_query_handler(self):
123139
result_bucketfs_location=result_bucketfs_location,
124140
sql_stage_inputs=stage_inputs,
125141
)
126-
self._current_query_handler = self._current_stage.create_train_query_handler(
127-
stage_input, self._current_query_handler_context
142+
self._current_query_handler = self._checked_current_stage.create_train_query_handler(
143+
stage_input, self._current_qh_context
128144
)
129145

130146
def _add_result_to_successors(self, result: SQLStageInputOutput):
131-
successors = self._sql_stage_graph.successors(self._current_stage)
147+
successors = self._sql_stage_graph.successors(self._checked_current_stage)
132148
if len(successors) == 0:
133149
raise RuntimeError("Programming error")
134150
self._add_result_to_inputs_of_successors(result, successors)
@@ -146,7 +162,7 @@ def _add_result_to_reference_counting_bag(
146162
object_proxies = find_object_proxies(result)
147163
for object_proxy in object_proxies:
148164
if object_proxy not in self._reference_counting_bag:
149-
self._current_query_handler_context.transfer_object_to(
165+
self._checked_current_qh_context.transfer_object_to(
150166
object_proxy, self._query_handler_context
151167
)
152168
for _ in successors:
@@ -160,7 +176,7 @@ def _transfer_ownership_of_result_to_query_result_handler(self, result):
160176
object_proxy
161177
)
162178
else:
163-
self._current_query_handler_context.transfer_object_to(
179+
self._checked_current_qh_context.transfer_object_to(
164180
object_proxy, self._query_handler_context
165181
)
166182

exasol/analytics/query_handler/python_query_handler_runner.py

+14-9
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
from exasol.analytics.query_handler.result import Continue, Finish
1818
from exasol.analytics.query_handler.udf.runner.state import QueryHandlerRunnerState
1919
from exasol.analytics.sql_executor.interface import SQLExecutor
20+
from exasol.analytics.utils.errors import UninitializedAttributeError
2021

2122
LOGGER = logging.getLogger(__file__)
2223

@@ -72,8 +73,8 @@ def _handle_continue(self, result: Continue) -> Union[Continue, Finish[ResultTyp
7273
self._cleanup_query_handler_context()
7374
self._execute_queries(result.query_list)
7475
input_query_result = self._run_input_query(result)
75-
result = self._state.query_handler.handle_query_result(input_query_result)
76-
return result
76+
_result = self._state.query_handler.handle_query_result(input_query_result)
77+
return _result
7778

7879
def _run_input_query(self, result: Continue) -> PythonQueryResult:
7980
input_query_view, input_query = self._wrap_return_query(result.input_query)
@@ -116,24 +117,28 @@ def _release_and_create_query_handler_context_of_input_query(self):
116117
def _wrap_return_query(
117118
self, input_query: SelectQueryWithColumnDefinition
118119
) -> Tuple[str, str]:
120+
if self._state.input_query_query_handler_context is None:
121+
raise UninitializedAttributeError(
122+
"Current state's input query query handler context is not set."
123+
)
119124
temporary_view_name = (
120125
self._state.input_query_query_handler_context.get_temporary_view_name()
121126
)
122127
input_query_create_view_string = cleandoc(
123128
f"""
124-
CREATE OR REPLACE VIEW {temporary_view_name.fully_qualified} AS
125-
{input_query.query_string};
126-
"""
129+
CREATE OR REPLACE VIEW {temporary_view_name.fully_qualified} AS
130+
{input_query.query_string};
131+
"""
127132
)
128133
full_qualified_columns = [
129134
col.name.fully_qualified for col in input_query.output_columns
130135
]
131136
columns_str = ",\n".join(full_qualified_columns)
132137
input_query_string = cleandoc(
133138
f"""
134-
SELECT
135-
{textwrap.indent(columns_str, " " * 4)}
136-
FROM {temporary_view_name.fully_qualified};
137-
"""
139+
SELECT
140+
{textwrap.indent(columns_str, " " * 4)}
141+
FROM {temporary_view_name.fully_qualified};
142+
"""
138143
)
139144
return input_query_create_view_string, input_query_string

exasol/analytics/query_handler/query/drop/view.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -12,5 +12,5 @@ def query_string(self) -> str:
1212
return f"DROP VIEW IF EXISTS {self._view_name.fully_qualified};"
1313

1414
@property
15-
def view_name(self) -> TableName:
15+
def view_name(self) -> ViewName:
1616
return self._view_name

exasol/analytics/query_handler/query/result/python_query_result.py

+8-3
Original file line numberDiff line numberDiff line change
@@ -47,15 +47,20 @@ def __init__(self, data: List[Tuple[Any, ...]], columns: List[Column]):
4747
}
4848
self._next()
4949

50+
def _range(self, num_rows: Union[int, str]) -> range:
51+
if isinstance(num_rows, int):
52+
return range(num_rows - 1)
53+
if num_rows == "all":
54+
return range(len(self._data) - 1)
55+
raise ValueError(f'num_rows must be an int or str "all" but is {num_rows}')
56+
5057
def fetch_as_dataframe(
5158
self, num_rows: Union[int, str], start_col=0
5259
) -> Optional[pd.DataFrame]:
5360
batch_list = []
54-
if num_rows == "all":
55-
num_rows = len(self._data)
5661
if self._current_row is not None:
5762
batch_list.append(self._current_row)
58-
for i in range(num_rows - 1):
63+
for i in self._range(num_rows):
5964
self._next()
6065
if self._current_row is not None:
6166
batch_list.append(self._current_row)

exasol/analytics/query_handler/query/result/udf_query_result.py

+13-2
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,19 @@
11
import collections
2-
from typing import Any, Iterator, List, OrderedDict, Union
2+
from typing import Any, Iterator, List, Optional, OrderedDict, TYPE_CHECKING, Union
33

44
from exasol.analytics.query_handler.query.result.interface import QueryResult, Row
55
from exasol.analytics.schema.column import Column
66
from exasol.analytics.schema.column_name import ColumnName
77
from exasol.analytics.schema.column_type import ColumnType
88

9+
if TYPE_CHECKING:
10+
# Importing pandas might take several seconds. At runtime pandas is
11+
# imported on-demand by UDF context.
12+
#
13+
# This file only imports pandas for type checking, see also
14+
# https://legacy.python.org/dev/peps/pep-0484/#runtime-or-type-checking
15+
import pandas
16+
917

1018
class UDFQueryResult(QueryResult):
1119

@@ -51,7 +59,10 @@ def rowcount(self) -> int:
5159

5260
def fetch_as_dataframe(
5361
self, num_rows: Union[str, int], start_col: int = 0
54-
) -> "pandas.DataFrame":
62+
) -> Optional["pandas.DataFrame"]:
63+
# This place intentionally uses a forward reference, to avoid
64+
# importing pandas which might take several seconds, see comment at
65+
# the beginning of the file.
5566
df = self._ctx.get_dataframe(num_rows, start_col=self._start_col)
5667
self._initialized = True
5768
if df is None:

exasol/analytics/query_handler/udf/runner/call_udf.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from exasol.analytics.query_handler.udf.runner.udf import QueryHandlerRunnerUDF
22

3-
udf = QueryHandlerRunnerUDF(exa)
3+
udf = QueryHandlerRunnerUDF(exa) # type: ignore
44

55

66
def run(ctx):

exasol/analytics/query_handler/udf/runner/state.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,6 @@
1414
class QueryHandlerRunnerState:
1515
top_level_query_handler_context: TopLevelQueryHandlerContext
1616
query_handler: QueryHandler
17-
connection_lookup: UDFConnectionLookup
17+
connection_lookup: Optional[UDFConnectionLookup] = None
1818
input_query_query_handler_context: Optional[ScopeQueryHandlerContext] = None
1919
input_query_output_columns: Optional[List[Column]] = None

0 commit comments

Comments
 (0)