Skip to content

Commit 5fef17a

Browse files
claude[bot]drgroot
andcommitted
fix: improve type safety and address security concerns in delta.py
- Replace string concatenation with proper value escaping to prevent injection issues - Add _escape_value method for safe predicate building - Standardize type annotations to use Optional instead of | None syntax - Fix all typing issues for deltalake 1.1.3 compatibility Co-authored-by: Yusuf Ali <[email protected]>
1 parent 54b9c67 commit 5fef17a

File tree

1 file changed

+34
-15
lines changed

1 file changed

+34
-15
lines changed

servc/svc/com/storage/delta.py

Lines changed: 34 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ def optimize(self):
7878
table.cleanup_metadata()
7979
table.create_checkpoint()
8080

81-
def getPartitions(self) -> Dict[str, List[Any]] | None:
81+
def getPartitions(self) -> Optional[Dict[str, List[Any]]]:
8282
table = self.getConn()
8383

8484
partitions: Dict[str, List[Any]] = {}
@@ -91,11 +91,11 @@ def getPartitions(self) -> Dict[str, List[Any]] | None:
9191

9292
return partitions
9393

94-
def getCurrentVersion(self) -> str | None:
94+
def getCurrentVersion(self) -> Optional[str]:
9595
table = self.getConn()
9696
return str(table.get_version())
9797

98-
def getVersions(self) -> List[str] | None:
98+
def getVersions(self) -> Optional[List[str]]:
9999
return [str(self.getCurrentVersion())]
100100

101101
def insert(self, data: List[Any]) -> bool:
@@ -108,10 +108,29 @@ def insert(self, data: List[Any]) -> bool:
108108
)
109109
return True
110110

111+
def _escape_value(self, val: Any) -> str:
112+
"""Properly escape values for predicate building to prevent injection issues."""
113+
if isinstance(val, str):
114+
# Escape single quotes in strings and wrap in quotes
115+
return f"'{val.replace(\"'\", \"''\")}"
116+
elif isinstance(val, (int, float)):
117+
return str(val)
118+
elif isinstance(val, bool):
119+
return str(val).lower()
120+
elif val is None:
121+
return "null"
122+
elif isinstance(val, list):
123+
# Handle list values for 'in' operations
124+
escaped_items = [self._escape_value(item) for item in val]
125+
return f"({', '.join(escaped_items)})"
126+
else:
127+
# Fallback to string representation with quotes
128+
return f"'{str(val).replace(\"'\", \"''\")}"
129+
111130
def _filters(
112131
self,
113-
partitions: Dict[str, List[Any]] | None = None,
114-
) -> List[Tuple[str, str, Any]] | None:
132+
partitions: Optional[Dict[str, List[Any]]] = None,
133+
) -> Optional[List[Tuple[str, str, Any]]]:
115134
filters: List[Tuple[str, str, Any]] = []
116135
if partitions is None:
117136
return None
@@ -123,14 +142,14 @@ def _filters(
123142
return filters if len(filters) > 0 else None
124143

125144
def overwrite(
126-
self, data: List[Any], partitions: Dict[str, List[Any]] | None = None
145+
self, data: List[Any], partitions: Optional[Dict[str, List[Any]]] = None
127146
) -> bool:
128147
table = self.getConn()
129148

130-
predicate: str | None = None
149+
predicate: Optional[str] = None
131150
filter = self._filters(partitions)
132151
if filter is not None:
133-
predicate = " & ".join([f"{col} {op} {str(val)}" for col, op, val in filter])
152+
predicate = " & ".join([f"{col} {op} {self._escape_value(val)}" for col, op, val in filter])
134153

135154
write_deltalake(
136155
table,
@@ -144,9 +163,9 @@ def overwrite(
144163
def readRaw(
145164
self,
146165
columns: List[str],
147-
partitions: Dict[str, List[Any]] | None = None,
148-
version: str | None = None,
149-
options: Any | None = None,
166+
partitions: Optional[Dict[str, List[Any]]] = None,
167+
version: Optional[str] = None,
168+
options: Optional[Any] = None,
150169
) -> Table:
151170
table = self.getConn()
152171
if version is not None:
@@ -172,13 +191,13 @@ def readRaw(
172191
def read(
173192
self,
174193
columns: List[str],
175-
partitions: Dict[str, List[Any]] | None = None,
176-
version: str | None = None,
177-
options: Any | None = None,
194+
partitions: Optional[Dict[str, List[Any]]] = None,
195+
version: Optional[str] = None,
196+
options: Optional[Any] = None,
178197
) -> Table:
179198
return self.readRaw(columns, partitions, version, options)
180199

181-
def getSchema(self) -> Schema | None:
200+
def getSchema(self) -> Optional[Schema]:
182201
table = self.getConn()
183202

184203
return table.schema.to_pyarrow()

0 commit comments

Comments
 (0)