Merge branch 'main' into saumya/update-test-matrix

jahnvi480 · web-flow · commit b11ebce42e02 · 2026-06-01T14:40:39.000+05:30
diff --git a/PyPI_Description.md b/PyPI_Description.md
@@ -35,23 +35,19 @@ PyBind11 provides:
 - Memory-safe bindings
 - Clean and Pythonic API, while performance-critical logic remains in robust, maintainable C++.
  
-## What's new in v1.7.1
+## What's new in v1.8.0
 
 ### Enhancements
 
-- **Platform Support: manylinux_2_28 Build Targets** - Added build targets for RHEL 8 and glibc 2.28 compatible distributions (#548).
-- **Platform Support: macOS universal2 Wheel for Python 3.10** - Now producing a universal2 wheel for Python 3.10 on macOS, enabling native performance on Apple Silicon (#542).
-- **Performance: UTF-16 String Handling via simdutf** - UTF-16 string processing now uses `simdutf` and `std::u16string` for significantly faster encoding/decoding (#526).
-- **Performance: Optimized execute() Hot Path** - `execute()` gains soft reset, prepare caching, and guarded diagnostics for reduced overhead on repeated statement execution (#528).
-- **Documentation: Azure Linux Installation Guide** - Added installation instructions for Azure Linux (#567).
+- **ActiveDirectoryMSI Support for Bulk Copy** - Adds `Authentication=ActiveDirectoryMSI` support to bulk copy, enabling both system-assigned and user-assigned managed identity authentication for Azure-hosted services (#573).
+- **Row String-Key Indexing** - Row objects now support accessing values by column name as a string key (e.g., `row["col"]`), in addition to integer index and attribute access. Case-insensitive lookup is supported when the cursor's `lowercase` attribute is enabled (#589).
+- **Bundled ODBC Driver Upgrade** - Updated the bundled Microsoft ODBC Driver for SQL Server from 18.5.1.1 to 18.6.2.1 (#569).
 
 ### Bug Fixes
 
-- **Login Failures Now Raise Correct Exception Type** - Authentication failures previously surfaced as `RuntimeError`; they now raise the appropriate `mssql_python` exception type (#562).
-- **GIL Release Around Blocking ODBC Calls** - The GIL is now released around blocking `SQLSetConnectAttr` calls (#568), ODBC statement/fetch/transaction calls (#541), preventing thread stalls in multi-threaded workloads.
-- **executemany Decimal Sign Change Fix** - Fixed a `RuntimeError` in `executemany` when decimal parameter values change sign between rows (#560).
-- **CP1252 VARCHAR Encoding Consistency** - Fixed inconsistent retrieval of CP1252 encoded data in `VARCHAR` columns between Windows and Linux (#495).
-- **BulkCopy Empty String in NVARCHAR(MAX)/VARCHAR(MAX)** - Fixed `cursor.bulkcopy()` failing with SQL error 40197/4804 when any row contained an empty string `""` in an `NVARCHAR(MAX)` or `VARCHAR(MAX)` column. Fix ships via `mssql_py_core` 0.1.4 (#559).
+- **Deferred Connect-Attribute Use-After-Free** - Fixed a use-after-free in `Connection.setAttribute` for deferred ODBC attributes (e.g., `SQL_COPT_SS_ACCESS_TOKEN`) that caused SIGBUS on macOS arm64 and authentication failures on Windows and Azure SQL (#596).
+- **Connection String Parsed Multiple Times in Auth Path** - Refactored authentication handling to use dictionary-based parameter processing instead of repeated string parsing, improving reliability and performance (#590).
+- **executemany Type Annotation Regression** - Fixed a typing regression where `Cursor.executemany` rejected valid `list[tuple[...]]` arguments under mypy due to invariant `List` type. The parameter type now uses covariant `Sequence` matching PEP 249 (#586).
 
 For more information, please visit the project link on Github: https://github.com/microsoft/mssql-python
  
diff --git a/mssql_python/__init__.py b/mssql_python/__init__.py
@@ -14,7 +14,7 @@
 from .helpers import Settings, get_settings, _settings, _settings_lock
 
 # Driver version
-__version__ = "1.7.1"
+__version__ = "1.8.0"
 
 # Exceptions
 # https://www.python.org/dev/peps/pep-0249/#exceptions
diff --git a/mssql_python/cursor.py b/mssql_python/cursor.py
@@ -2022,7 +2022,10 @@ def columns(self, table=None, catalog=None, schema=None, column=None):
         # Use the helper method to prepare the result set
         return self._prepare_metadata_result_set(fallback_description=fallback_description)
 
-    def _transpose_rowwise_to_columnwise(self, seq_of_parameters: list) -> tuple[list, int]:
+    def _transpose_rowwise_to_columnwise(
+        self,
+        seq_of_parameters: Sequence[Sequence[Any]],
+    ) -> tuple[list, int]:
         """
         Convert sequence of rows (row-wise) into list of columns (column-wise),
         for array binding via ODBC. Works with both iterables and generators.
@@ -2140,7 +2143,9 @@ def _compute_column_type(self, column):
         return sample_value, None, None, max_decimal_formatted_len
 
     def executemany(  # pylint: disable=too-many-locals,too-many-branches,too-many-statements
-        self, operation: str, seq_of_parameters: Union[List[Sequence[Any]], List[Mapping[str, Any]]]
+        self,
+        operation: str,
+        seq_of_parameters: Union[Sequence[Sequence[Any]], Sequence[Mapping[str, Any]]],
     ) -> None:
         """
         Prepare a database operation and execute it against all parameter sequences.
diff --git a/mssql_python/mssql_python.pyi b/mssql_python/mssql_python.pyi
@@ -193,7 +193,9 @@ class Cursor:
         reset_cursor: bool = True,
     ) -> "Cursor": ...
     def executemany(
-        self, operation: str, seq_of_parameters: Union[List[Sequence[Any]], List[Mapping[str, Any]]]
+        self,
+        operation: str,
+        seq_of_parameters: Union[Sequence[Sequence[Any]], Sequence[Mapping[str, Any]]],
     ) -> None: ...
     def fetchone(self) -> Optional[Row]: ...
     def fetchmany(self, size: Optional[int] = None) -> List[Row]: ...
diff --git a/setup.py b/setup.py
@@ -176,7 +176,7 @@ def run(self):
 
 setup(
     name="mssql-python",
-    version="1.7.1",
+    version="1.8.0",
     description="A Python library for interacting with Microsoft SQL Server",
     long_description=open("PyPI_Description.md", encoding="utf-8").read(),
     long_description_content_type="text/markdown",
diff --git a/tests/test_009_pooling.py b/tests/test_009_pooling.py
@@ -16,8 +16,50 @@
 """
 
 import pytest
+import os
+import re
+import subprocess
+import sys
+import textwrap
 import time
 import threading
+
+
+def _run_in_subprocess(body: str, conn_str: str) -> None:
+    """Run a test body in a fresh Python process.
+
+    Some tests need to be the *first* to call ``pooling(...)`` in the
+    process (the C++ ``enable_pooling`` is wrapped in ``std::call_once``
+    so only the first call's max_size/idle_timeout take effect). Running
+    them in a subprocess gives each a clean process state.
+
+    The subprocess inherits the current ``DB_CONNECTION_STRING`` env var
+    so the worker uses the same database. ``body`` must be a self-contained
+    Python snippet that exits non-zero on failure (any uncaught assertion
+    is fine).
+    """
+    env = os.environ.copy()
+    env["DB_CONNECTION_STRING"] = conn_str
+    proc = subprocess.run(
+        [sys.executable, "-c", textwrap.dedent(body)],
+        env=env,
+        capture_output=True,
+        text=True,
+        timeout=120,
+    )
+    # Sentinel exit code 77 means the subprocess decided to skip
+    # (e.g. the test prerequisite is unmet on this server, like missing
+    # KILL permission). The reason is printed to stderr.
+    if proc.returncode == 77:
+        pytest.skip(proc.stderr.strip() or "Subprocess requested skip")
+    if proc.returncode != 0:
+        pytest.fail(
+            "Subprocess test body failed\n"
+            f"--- stdout ---\n{proc.stdout}\n"
+            f"--- stderr ---\n{proc.stderr}"
+        )
+
+
 import statistics
 from mssql_python import connect, pooling
 from mssql_python.pooling import PoolingManager
@@ -314,82 +356,182 @@ def test_pool_release_overflow_disconnects_outside_mutex(conn_str):
     conn3.close()
 
 
-@pytest.mark.skip("Flaky test - idle timeout behavior needs investigation")
 def test_pool_idle_timeout_removes_connections(conn_str):
-    """Test that idle_timeout removes connections from the pool after the timeout."""
-    pooling(max_size=2, idle_timeout=1)
-    conn1 = connect(conn_str)
-    spid_list = []
-    cursor1 = conn1.cursor()
-    cursor1.execute("SELECT @@SPID")
-    spid1 = cursor1.fetchone()[0]
-    spid_list.append(spid1)
-    conn1.close()
-
-    # Wait for longer than idle_timeout
-    time.sleep(3)
-
-    # Get a new connection, which should not reuse the previous SPID
-    conn2 = connect(conn_str)
-    cursor2 = conn2.cursor()
-    cursor2.execute("SELECT @@SPID")
-    spid2 = cursor2.fetchone()[0]
-    spid_list.append(spid2)
-    conn2.close()
-
-    assert spid1 != spid2, "Idle timeout did not remove connection from pool"
+    """Test that idle_timeout removes connections from the pool after the timeout.
+
+    Run in a subprocess so this test's pooling(idle_timeout=1) is the
+    first call in the process — the C++ ``enable_pooling`` is wrapped in
+    ``std::call_once``, so only the first call's settings take effect for
+    the lifetime of the process.
+
+    A bare SPID-inequality assertion is unreliable: SQL Server is free to
+    reassign a recently-freed SPID to the next session. So we identify a
+    session by the (SPID, login_time) tuple from sys.dm_exec_sessions —
+    login_time has millisecond resolution and is unique per physical
+    connection.
+    """
+    _run_in_subprocess(
+        """
+        import os, time
+        from mssql_python import connect, pooling
+
+        conn_str = os.environ["DB_CONNECTION_STRING"]
+        pooling(max_size=2, idle_timeout=1)
+
+        def session_identity(conn):
+            cur = conn.cursor()
+            cur.execute(
+                "SELECT @@SPID, "
+                "       (SELECT login_time FROM sys.dm_exec_sessions "
+                "        WHERE session_id = @@SPID)"
+            )
+            spid, login_time = cur.fetchone()
+            return (spid, login_time)
+
+        c1 = connect(conn_str)
+        id1 = session_identity(c1)
+        c1.close()
+
+        time.sleep(3)
+
+        c2 = connect(conn_str)
+        id2 = session_identity(c2)
+        c2.close()
+
+        assert id1 != id2, (
+            f"Idle timeout did not remove connection from pool: "
+            f"got the same session both times {id1}"
+        )
+        """,
+        conn_str,
+    )
 
 
 # =============================================================================
 # Error Handling and Recovery Tests
 # =============================================================================
 
 
-@pytest.mark.skip(
-    "Test causes fatal crash - forcibly closing underlying connection leads to undefined behavior"
-)
 def test_pool_removes_invalid_connections(conn_str):
-    """Test that the pool removes connections that become invalid (simulate by closing underlying connection)."""
-    pooling(max_size=1, idle_timeout=30)
-    conn = connect(conn_str)
-    cursor = conn.cursor()
-    cursor.execute("SELECT 1")
-    # Simulate invalidation by forcibly closing the connection at the driver level
-    try:
-        # Try to access a private attribute or method to forcibly close the underlying connection
-        # This is implementation-specific; if not possible, skip
-        if hasattr(conn, "_conn") and hasattr(conn._conn, "close"):
-            conn._conn.close()
-        else:
-            pytest.skip("Cannot forcibly close underlying connection for this driver")
-    except Exception:
-        pass
-    # Safely close the connection, ignoring errors due to forced invalidation
-    try:
-        conn.close()
-    except RuntimeError as e:
-        if "not initialized" not in str(e):
+    """Pool must replace a pooled connection whose server-side session has died.
+
+    Run in a subprocess so this test does not pollute the in-process pool
+    state for sibling tests (KILL leaves dead pool entries that survive
+    Python-side teardown because the C++ pool config is locked in for the
+    lifetime of the process via ``std::call_once``).
+
+    Simulates the realistic failure mode (DBA KILL, failover, server-side
+    idle timeout) by:
+      1. Opening two connections concurrently (distinct physical sessions)
+         in autocommit mode.
+      2. Using one to KILL the other's server-side session out-of-band.
+      3. Returning both to the pool.
+      4. Re-acquiring repeatedly: every connection must work and the
+         killed SPID must never reappear.
+
+    Only public APIs are used.
+    """
+    _run_in_subprocess(
+        """
+        import os
+        import time
+        from mssql_python import connect, pooling
+
+        conn_str = os.environ["DB_CONNECTION_STRING"]
+        pooling(max_size=2, idle_timeout=30)
+
+        def session_identity(conn):
+            cur = conn.cursor()
+            cur.execute(
+                "SELECT @@SPID, "
+                "       (SELECT login_time FROM sys.dm_exec_sessions "
+                "        WHERE session_id = @@SPID)"
+            )
+            spid, login_time = cur.fetchone()
+            return (spid, login_time)
+
+        # Step 1: two distinct, autocommit connections. Autocommit avoids
+        # the implicit rollback in Connection.close(), which would
+        # otherwise fail on the killed session and leak its pool slot.
+        victim = connect(conn_str)
+        admin = connect(conn_str)
+        victim.autocommit = True
+        admin.autocommit = True
+
+        victim_id = session_identity(victim)
+        admin_id = session_identity(admin)
+        assert victim_id != admin_id, (
+            "Pool handed out the same physical session to two concurrent "
+            "acquires"
+        )
+        victim_spid = victim_id[0]
+
+        # Step 2: admin KILLs the victim's session. Requires server
+        # permission (ALTER ANY CONNECTION or sysadmin); on hosted/CI
+        # databases the test login often lacks it, so skip gracefully.
+        try:
+            admin.cursor().execute(f"KILL {victim_spid}")
+        except Exception as e:
+            msg = str(e)
+            if "permission" in msg.lower() or "KILL" in msg:
+                import sys as _sys
+                print(
+                    f"Skipping: KILL not permitted for this login: {msg}",
+                    file=_sys.stderr,
+                )
+                victim.close()
+                admin.close()
+                _sys.exit(77)
             raise
-    # Now, get a new connection from the pool and ensure it works
-    new_conn = connect(conn_str)
-    new_cursor = new_conn.cursor()
-    try:
-        new_cursor.execute("SELECT 1")
-        result = new_cursor.fetchone()
-        assert result is not None and result[0] == 1, "Pool did not remove invalid connection"
-    finally:
-        new_conn.close()
+
+        # KILL is processed asynchronously on the server, but we don't
+        # need to wait for it here. The test's correctness contract is
+        # "the killed (SPID, login_time) must never reappear in
+        # subsequent acquires." Any session that gets handed back
+        # later — whether the same SPID reused by the server or a
+        # transparently-reconnected one — necessarily has a different
+        # login_time, so the identity check below catches the only
+        # failure mode that matters.
+
+        # Step 3: return both to the pool.
+        victim.close()
+        admin.close()
+
+        # Step 4: re-acquire from the pool. Each must be working; the
+        # killed *physical session* (SPID, login_time) must never come
+        # back. SQL Server is free to reassign the SPID number to a new
+        # session, so SPID alone is not a reliable identity.
+        seen_ids = set()
+        for _ in range(4):
+            c = connect(conn_str)
+            try:
+                seen_ids.add(session_identity(c))
+                assert c.cursor().execute("SELECT 1").fetchone()[0] == 1, (
+                    "Pool handed out an unusable connection"
+                )
+            finally:
+                c.close()
+        assert victim_id not in seen_ids, (
+            f"Pool returned the killed session {victim_id}; "
+            f"saw sessions {seen_ids}"
+        )
+        """,
+        conn_str,
+    )
 
 
 def test_pool_recovery_after_failed_connection(conn_str):
     """Test that the pool recovers after a failed connection attempt."""
     pooling(max_size=1, idle_timeout=30)
-    # First, try to connect with a bad password (should fail)
-    if "Pwd=" in conn_str:
-        bad_conn_str = conn_str.replace("Pwd=", "Pwd=wrongpassword")
-    elif "Password=" in conn_str:
-        bad_conn_str = conn_str.replace("Password=", "Password=wrongpassword")
-    else:
+    # First, try to connect with a bad password (should fail).
+    # Match the password keyword case-insensitively since ODBC accepts any case.
+    bad_conn_str = re.sub(
+        r"(?i)(\b(?:pwd|password)\s*=)([^;]*)",
+        r"\1wrongpassword",
+        conn_str,
+        count=1,
+    )
+    if bad_conn_str == conn_str:
         pytest.skip("No password found in connection string to modify")
     with pytest.raises(Exception):
         connect(bad_conn_str)