Appsilon · jakubnowicki · Feb 27, 2026 · Feb 17, 2026 · Feb 27, 2026 · Feb 27, 2026
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -19,7 +19,7 @@ jobs:
       - name: Set up Python
         uses: actions/setup-python@v5
         with:
-          python-version: '3.10'
+          python-version: '3.11'
 
       - name: Install uv
         uses: astral-sh/setup-uv@v4
@@ -46,7 +46,7 @@ jobs:
       - name: Set up Python
         uses: actions/setup-python@v5
         with:
-          python-version: '3.10'
+          python-version: '3.11'
 
       - name: Install uv
         uses: astral-sh/setup-uv@v4
@@ -65,7 +65,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: ['3.10', '3.11', '3.12']
+        python-version: ['3.11', '3.12', '3.13']
 
     steps:
       - name: Checkout code
@@ -95,8 +95,8 @@ jobs:
 
       - name: Run tests
         run: |
-          if [ "${{ matrix.python-version }}" == "3.10" ]; then
-            # Run with coverage on Python 3.10
+          if [ "${{ matrix.python-version }}" == "3.11" ]; then
+            # Run with coverage on Python 3.11
             uv run python -m pytest tests/ --cov=tealflow_mcp --cov-report=xml --cov-report=term-missing -v
           else
             # Run without coverage on other versions

diff --git a/pyproject.toml b/pyproject.toml
@@ -4,10 +4,10 @@ build-backend = "hatchling.build"
 
 [project]
 name = "tealflow-mcp"
-version = "0.2.0.dev1"
+version = "0.2.0.dev2"
 description = "MCP server for discovering, understanding, and generating Teal R Shiny applications for clinical trial data analysis"
 readme = "README.md"
-requires-python = ">=3.10"
+requires-python = ">=3.11"
 license = { text = "AGPL-3.0-only" }
 authors = [
     { name = "Jakub Nowicki", email = "[email protected]" },
@@ -35,17 +35,17 @@ classifiers = [
     "Intended Audience :: Science/Research",
     "License :: OSI Approved :: GNU Affero General Public License v3",
     "Programming Language :: Python :: 3",
-    "Programming Language :: Python :: 3.10",
     "Programming Language :: Python :: 3.11",
     "Programming Language :: Python :: 3.12",
+    "Programming Language :: Python :: 3.13",
     "Topic :: Software Development :: Libraries :: Python Modules",
     "Topic :: Scientific/Engineering",
 ]
 dependencies = [
     "mcp>=1.0.0",
     "pydantic>=2.0.0",
     "pandas>=2.0.0",
-    "pyreadr>=0.5.0",
+    "rdata>=1.0.0",
 ]
 
 [project.urls]
@@ -78,7 +78,7 @@ docs = [
 [tool.ruff]
 # Set the maximum line length to 100
 line-length = 100
-target-version = "py310"
+target-version = "py311"
 
 # Exclude common directories
 exclude = [
@@ -135,7 +135,7 @@ skip-magic-trailing-comma = false
 line-ending = "auto"
 
 [tool.mypy]
-python_version = "3.10"
+python_version = "3.11"
 warn_unused_configs = true
 disallow_untyped_defs = false
 disallow_incomplete_defs = false
@@ -152,7 +152,7 @@ strict_optional = false
 module = [
     "mcp.*",
     "fastmcp.*",
-    "pyreadr.*",
+    "rdata.*",
     "pandas.*",
 ]
 ignore_missing_imports = true
diff --git a/tealflow_mcp/__init__.py b/tealflow_mcp/__init__.py
@@ -13,7 +13,7 @@
 - Generating R code for Teal apps
 """
 
-__version__ = "0.2.0.dev1"
+__version__ = "0.2.0.dev2"
 
 from .core import PackageFilter, ResponseFormat
 from .models import (

diff --git a/tealflow_mcp/utils/dataset_readers.py b/tealflow_mcp/utils/dataset_readers.py
@@ -5,12 +5,77 @@
 row count, and file metadata.
 """
 
+import datetime
 import warnings
+from collections.abc import Mapping
 from dataclasses import dataclass
 from pathlib import Path
+from typing import Any
 
+import numpy as np
 import pandas as pd
-import pyreadr
+import rdata
+from rdata.conversion import DEFAULT_CLASS_MAP, SimpleConverter
+
+
+def _date_constructor(obj: Any, attrs: Mapping[str, Any]) -> Any:
+    """
+    Custom constructor for R Date class.
+
+    R Date stores dates as days since 1970-01-01.
+    Converts to pandas datetime64[ns] with date precision.
+    Handles NaN values without triggering RuntimeWarning.
+    Stores original R class in Series.attrs for accurate type inference.
+    """
+    obj_array = np.asarray(obj, dtype=float)
+    mask = ~np.isnan(obj_array)
+
+    # Create result series with NaT for all positions
+    result = pd.Series(pd.NaT, index=range(len(obj_array)), dtype="datetime64[ns]")
+
+    # Convert only non-NaN values
+    if mask.any():
+        origin = pd.Timestamp("1970-01-01")
+        result[mask] = pd.to_datetime(obj_array[mask], unit="D", origin=origin, errors="coerce")
+
+    # Store original R class in attrs for accurate type inference
+    # This prevents misclassification of POSIXct columns that happen to have all midnight values
+    result.attrs["r_class"] = "Date"
+
+    return result
+
+
+def _posixct_constructor(obj: Any, attrs: Mapping[str, Any]) -> Any:
+    """
+    Custom constructor for R POSIXct class.
+
+    R POSIXct stores datetimes as seconds since 1970-01-01 UTC.
+    Converts to pandas datetime64[ns].
+    Handles NaN values without triggering RuntimeWarning.
+    Stores original R class in Series.attrs for accurate type inference.
+    """
+    obj_array = np.asarray(obj, dtype=float)
+    mask = ~np.isnan(obj_array)
+
+    # Create result series with NaT for all positions
+    result = pd.Series(pd.NaT, index=range(len(obj_array)), dtype="datetime64[ns]")
+
+    # Convert only non-NaN values
+    if mask.any():
+        result[mask] = pd.to_datetime(obj_array[mask], unit="s", errors="coerce")
+
+    # Store original R class in attrs for accurate type inference
+    # This prevents misclassification of POSIXct columns that happen to have all midnight values
+    result.attrs["r_class"] = "POSIXct"
+
+    return result
+
+
+# Custom class map with Date and POSIXct support
+_RDATA_CLASS_MAP = DEFAULT_CLASS_MAP.copy()
+_RDATA_CLASS_MAP["Date"] = _date_constructor
+_RDATA_CLASS_MAP["POSIXct"] = _posixct_constructor
+_RDATA_CLASS_MAP["POSIXt"] = _posixct_constructor  # POSIXt is parent class
 
 
 @dataclass
@@ -45,7 +110,7 @@ def _infer_object_type(col_data: pd.Series) -> str:
     """
     Infer the R type for an object dtype column.
 
-    pyreadr converts numeric columns with NA values to object dtype.
+    R data readers may convert numeric columns with NA values to object dtype.
     This function checks if non-null values can be converted to numeric or are date objects.
 
     Args:
@@ -54,8 +119,6 @@ def _infer_object_type(col_data: pd.Series) -> str:
     Returns:
         "integer", "numeric", "date", or "character"
     """
-    import datetime
-
     # Get non-null values
     non_null = col_data.dropna()
 
@@ -91,9 +154,64 @@ def _infer_object_type(col_data: pd.Series) -> str:
         return "character"
 
 
+def _infer_datetime_type(col_data: pd.Series) -> str:
+    """
+    Infer whether a datetime column is R Date or POSIXct.
+
+    First checks Series.attrs for the original R class stored during rdata conversion.
+    This prevents misclassification of POSIXct columns that legitimately have all
+    midnight timestamps.
+
+    Falls back to heuristic (time component check) for datetime data from other sources
+    (e.g., CSV files, manual series creation).
+
+    Args:
+        col_data: pandas Series with datetime dtype
+
+    Returns:
+        "date" or "POSIXct"
+    """
+    # Check if original R class is stored in attrs (from rdata conversion)
+    if hasattr(col_data, "attrs") and "r_class" in col_data.attrs:
+        r_class = col_data.attrs["r_class"]
+        if r_class == "Date":
+            return "date"
+        elif r_class == "POSIXct":
+            return "POSIXct"
+
+    # Fallback: use heuristic for data from other sources (e.g., CSV, manual creation)
+    # Get non-null values
+    non_null = col_data.dropna()
+
+    if len(non_null) == 0:
+        # Default to POSIXct for empty columns
+        return "POSIXct"
+
+    # Sample values to check for time component
+    sample = non_null.head(min(100, len(non_null)))
+
+    # Check if all times are exactly midnight (00:00:00)
+    # This heuristic suggests it's likely a Date (date-only) rather than POSIXct
+    # Note: This can misclassify POSIXct with all midnight values, but that's
+    # acceptable for non-R data sources where we don't have the original class info
+    try:
+        # For datetime64, check if time components are all zero
+        times = pd.to_datetime(sample)
+        all_midnight = (
+            (times.dt.hour == 0).all()
+            and (times.dt.minute == 0).all()
+            and (times.dt.second == 0).all()
+            and (times.dt.microsecond == 0).all()
+            and (times.dt.nanosecond == 0).all()
+        )
+        return "date" if all_midnight else "POSIXct"
+    except Exception:
+        return "POSIXct"
+
+
 def _read_rds_dataset(file_path: Path, include_sample_values: bool = False) -> DatasetInfo:
     """
-    Read dataset information from an RDS file using pyreadr.
+    Read dataset information from an RDS file using rdata.
 
     Args:
         file_path: Path to the RDS file
@@ -106,40 +224,45 @@ def _read_rds_dataset(file_path: Path, include_sample_values: bool = False) -> D
         ValueError: If the file cannot be read or is not a valid RDS file
     """
     try:
-        # Read RDS file using pyreadr
-        # Suppress RuntimeWarning from pyreadr's datetime conversion with NaT values
+        # Read RDS file using rdata with custom converters for Date/POSIXct
+        # Parse the file first, then convert with our custom class map
         with warnings.catch_warnings():
-            warnings.filterwarnings("ignore", message="invalid value encountered in cast")
-            result = pyreadr.read_r(str(file_path))
+            warnings.filterwarnings("ignore", category=UserWarning)
+            parsed = rdata.parser.parse_file(file_path)
+            converter = SimpleConverter(constructor_dict=_RDATA_CLASS_MAP)
+            df = converter.convert(parsed)
 
-        # pyreadr returns a dict, get the first (and usually only) dataframe
-        if not result:
+        # Verify we got a DataFrame
+        if df is None:
             raise ValueError("RDS file contains no data")
 
-        # Get the first dataframe
-        df = next(iter(result.values()))
+        if not isinstance(df, pd.DataFrame):
+            raise ValueError(f"RDS file does not contain a DataFrame, got {type(df).__name__}")
 
         # Extract column information
         columns = []
         for col_name in df.columns:
             col_data = df[col_name]
 
             # Get pandas dtype and convert to R-like type name
-            dtype = str(col_data.dtype)
+            dtype = str(col_data.dtype).lower()
 
             # Map pandas dtypes to R-like types
+            # Note: rdata uses nullable integer types (Int32, Int64) which become lowercase here
             if dtype.startswith("int"):
                 r_type = "integer"
             elif dtype.startswith("float"):
                 r_type = "numeric"
             elif dtype == "object":
                 # For object dtype, try to infer if it's actually numeric
-                # pyreadr converts numeric columns with NAs to object dtype
+                # R data readers may convert numeric columns with NAs to object dtype
                 r_type = _infer_object_type(col_data)
-            elif dtype == "bool":
+            elif dtype in ("bool", "boolean"):
                 r_type = "logical"
             elif dtype.startswith("datetime"):
-                r_type = "POSIXct"
+                # Distinguish between Date (date-only) and POSIXct (datetime)
+                # Date columns have no time component (all times are midnight)
+                r_type = _infer_datetime_type(col_data)
             else:
                 r_type = dtype