From 94cfccd65d90a1eac7f127b90c40fe8ecc2fb9ee Mon Sep 17 00:00:00 2001 From: Heberto Mayorquin Date: Mon, 1 Sep 2025 22:33:56 -0600 Subject: [PATCH 1/2] Add Zhai 2025 analysis notebooks for DANDI:001538 - figure_1E_dspn_somatic_excitability.ipynb: Reproduces Figure 1E somatic excitability analysis - figure_2GH_oepsc_analysis.ipynb: Reproduces Figure 2G-H oEPSC analysis - figure_5F_acetylcholine_biosensor.ipynb: Reproduces Figure 5F acetylcholine biosensor analysis - how_to_use_this_dataset.ipynb: Introduction and data exploration tutorial - environment.yml: Conda environment with all dependencies - README.md: Documentation explaining the notebooks and publication --- 001538/catalyst_neuro/zhai_2025/README.md | 23 + .../catalyst_neuro/zhai_2025/environment.yml | 68 + .../figure_1E_dspn_somatic_excitability.ipynb | 711 +++++++ .../zhai_2025/figure_2GH_oepsc_analysis.ipynb | 631 +++++++ .../figure_5F_acetylcholine_biosensor.ipynb | 705 +++++++ .../zhai_2025/how_to_use_this_dataset.ipynb | 1658 +++++++++++++++++ 6 files changed, 3796 insertions(+) create mode 100644 001538/catalyst_neuro/zhai_2025/README.md create mode 100644 001538/catalyst_neuro/zhai_2025/environment.yml create mode 100644 001538/catalyst_neuro/zhai_2025/figure_1E_dspn_somatic_excitability.ipynb create mode 100644 001538/catalyst_neuro/zhai_2025/figure_2GH_oepsc_analysis.ipynb create mode 100644 001538/catalyst_neuro/zhai_2025/figure_5F_acetylcholine_biosensor.ipynb create mode 100644 001538/catalyst_neuro/zhai_2025/how_to_use_this_dataset.ipynb diff --git a/001538/catalyst_neuro/zhai_2025/README.md b/001538/catalyst_neuro/zhai_2025/README.md new file mode 100644 index 0000000..9140f7f --- /dev/null +++ b/001538/catalyst_neuro/zhai_2025/README.md @@ -0,0 +1,23 @@ +# Analysis Notebooks for Zhai et al. 2025 + +## Goal + +This submission provides Jupyter notebooks for reproducing key figures from **Zhai et al. 2025** using data from **DANDI:001538 - State-dependent modulation of spiny projection neurons controls levodopa-induced dyskinesia**. The notebooks demonstrate how to analyze electrophysiology, optogenetics, and acetylcholine biosensor data to understand cellular mechanisms underlying levodopa-induced dyskinesia in Parkinson's disease. + +## Publication + +**Zhai et al. 2025** - *State-dependent modulation of spiny projection neurons controls levodopa-induced dyskinesia* + +## Notebooks + +### `how_to_use_this_dataset.ipynb` +Introduction and data exploration tutorial showing how to access and browse the NWB files in DANDI:001538, extract metadata, and perform basic data visualization. + +### `figure_1E_dspn_somatic_excitability.ipynb` +Reproduces Figure 1E analyzing somatic excitability in direct pathway striatal projection neurons (dSPNs). Generates frequency-intensity curves and rheobase measurements from current clamp recordings to show L-DOPA treatment effects on neuronal excitability. + +### `figure_2GH_oepsc_analysis.ipynb` +Reproduces Figure 2G-H analyzing optogenetically-evoked postsynaptic currents (oEPSCs). Compares amplitude distributions and mean responses between off-state and on-state conditions using ChR2 optogenetic stimulation and voltage-clamp recordings. + +### `figure_5F_acetylcholine_biosensor.ipynb` +Reproduces Figure 5F analyzing acetylcholine release dynamics using the GRABACh3.0 biosensor. Shows acetylcholine signaling alterations across control, Parkinsonian, and dyskinetic conditions with pharmacological modulation (dopamine, quinpirole, sulpiride). \ No newline at end of file diff --git a/001538/catalyst_neuro/zhai_2025/environment.yml b/001538/catalyst_neuro/zhai_2025/environment.yml new file mode 100644 index 0000000..d8f546f --- /dev/null +++ b/001538/catalyst_neuro/zhai_2025/environment.yml @@ -0,0 +1,68 @@ +name: zhai2025_analysis +channels: + - conda-forge + - defaults +dependencies: + # Python (matching pyproject.toml requirements) + - python=3.12 + + # Core scientific computing + - numpy>=1.21.0 + - pandas>=2.2.3 + - scipy>=1.7.0 + + # Data visualization + - matplotlib>=3.10.3 + - seaborn>=0.13.2 + + # HDF5 and file handling + - h5py>=3.1.0 + - lxml + - tifffile>=2025.5.10 + - openpyxl>=3.1.5 + + # Progress bars + - tqdm>=4.67.1 + + # Environment variables + - python-dotenv>=1.1.1 + + # Jupyter notebook support + - jupyter>=1.0.0 + - ipykernel>=6.29.5 + - ipywidgets>=7.6.0 + + # Image processing + - opencv-python-headless>=4.12.0.88 + - imagecodecs>=2025.3.30 + + # Install via pip + - pip + - pip: + # DANDI API client + - dandi>=0.69.3 + + # NWB file format support + - pynwb>=2.1.0 + - hdmf==4.0.0 + - neuroconv>=0.7.4 + - nwbinspector + + # NWB streaming and remote access + - remfile>=0.1.13 + - fsspec>=2021.10.0 + - aiohttp>=3.8.0 + - s3fs>=2021.10.0 + + # Additional dependencies from pyproject.toml + - nd2>=0.10.3 + - xmltodict>=0.14.2 + - roiextractors>=0.5.13 + - astropy>=6.1.7 + - ndx-optogenetics + - xarray>=2025.7.1 + - tabulate>=0.9.0 + + # Additional analysis tools + - scikit-learn>=1.0.0 + - statsmodels>=0.13.0 \ No newline at end of file diff --git a/001538/catalyst_neuro/zhai_2025/figure_1E_dspn_somatic_excitability.ipynb b/001538/catalyst_neuro/zhai_2025/figure_1E_dspn_somatic_excitability.ipynb new file mode 100644 index 0000000..b23a9ed --- /dev/null +++ b/001538/catalyst_neuro/zhai_2025/figure_1E_dspn_somatic_excitability.ipynb @@ -0,0 +1,711 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "header_cell", + "metadata": {}, + "source": [ + "# Reproducing Figure 1E: dSPN Somatic Excitability Analysis\n", + "\n", + "This notebook reproduces Figure 1E from **Zhai et al. 2025** analyzing somatic excitability in direct pathway striatal projection neurons (dSPNs) using frequency-intensity (F-I) curves and rheobase measurements.\n", + "\n", + "**Dataset**: DANDI:001538 - State-dependent modulation of spiny projection neurons controls levodopa-induced dyskinesia\n", + "\n", + "**Analysis approach**:\n", + "- **F-I Curves**: Frequency-intensity relationships showing action potential firing vs injected current\n", + "- **Rheobase Analysis**: Minimum current required to elicit action potential firing\n", + "- **Conditions**: LID off-state, LID on-state, and LID on-state with SCH23390 (D1R antagonist)\n", + "- **Methodology**: Current clamp recordings with 500ms current steps" + ] + }, + { + "cell_type": "markdown", + "id": "setup_header", + "metadata": {}, + "source": [ + "## Setup and Data Loading\n", + "\n", + "### Import Libraries and Configure Plotting Style\n", + "\n", + "We use the same plotting parameters as the original publication to ensure visual consistency." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "imports_cell", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "from typing import List, Tuple\n", + "\n", + "import h5py\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import remfile\n", + "import seaborn as sns\n", + "from dandi.dandiapi import DandiAPIClient\n", + "from dotenv import load_dotenv\n", + "from pynwb import NWBHDF5IO\n", + "from scipy import stats\n", + "from tqdm import tqdm\n", + "\n", + "# Set plotting style to match paper\n", + "plt.style.use('default')\n", + "sns.set_palette(\"Set2\")\n", + "\n", + "def setup_figure_style():\n", + " \"\"\"Setup matplotlib parameters to match paper style\"\"\"\n", + " plt.rcParams.update({\n", + " 'font.size': 8,\n", + " 'axes.titlesize': 10,\n", + " 'axes.labelsize': 9,\n", + " 'xtick.labelsize': 8,\n", + " 'ytick.labelsize': 8,\n", + " 'legend.fontsize': 8,\n", + " 'figure.titlesize': 12,\n", + " 'axes.linewidth': 0.8,\n", + " 'axes.spines.top': False,\n", + " 'axes.spines.right': False,\n", + " 'xtick.major.width': 0.8,\n", + " 'ytick.major.width': 0.8,\n", + " 'xtick.minor.width': 0.6,\n", + " 'ytick.minor.width': 0.6,\n", + " })\n", + "\n", + "setup_figure_style()\n", + "print(\"Libraries imported and plotting style configured\")" + ] + }, + { + "cell_type": "markdown", + "id": "utilities_header", + "metadata": {}, + "source": [ + "### Session ID Parsing and Filtering Functions\n", + "\n", + "These utility functions parse the rich metadata encoded in DANDI file paths and filter experiments by figure, measurement type, and experimental condition." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "utilities_cell", + "metadata": {}, + "outputs": [], + "source": [ + "def get_session_id(asset_path: str) -> str:\n", + " \"\"\"Extract session ID from DANDI asset path.\"\"\"\n", + " if not asset_path:\n", + " return \"\"\n", + " bottom_level_path = asset_path.split(\"/\")[1] \n", + " session_id_with_ses_prefix = bottom_level_path.split(\"_\")[1]\n", + " session_id = session_id_with_ses_prefix.split(\"-\")[1]\n", + " return session_id\n", + "\n", + "def get_figure_number(session_id: str):\n", + " \"\"\"Extract which figure this data corresponds to.\"\"\"\n", + " return session_id.split(\"++\")[0]\n", + "\n", + "def get_measurement(session_id: str) -> str:\n", + " \"\"\"Extract measurement type.\"\"\"\n", + " if not session_id:\n", + " return \"\"\n", + " return session_id.split(\"++\")[1]\n", + "\n", + "def get_state(session_id: str) -> str:\n", + " \"\"\"Extract experimental state.\"\"\"\n", + " if not session_id:\n", + " return \"\"\n", + " return session_id.split(\"++\")[3]\n", + "\n", + "def get_pharmacology(session_id: str) -> str:\n", + " \"\"\"Extract pharmacological condition.\"\"\"\n", + " if not session_id:\n", + " return \"\"\n", + " return session_id.split(\"++\")[4]\n", + "\n", + "def is_f1_somexc(session_id: str) -> bool:\n", + " \"\"\"Check if data belongs to Figure 1 somatic excitability experiments.\"\"\"\n", + " return get_figure_number(session_id) == \"F1\" and get_measurement(session_id) == \"SomExc\"\n", + "\n", + "def get_condition_label(session_id: str) -> str:\n", + " \"\"\"Convert session metadata to condition label.\"\"\"\n", + " parts = session_id.split(\"++\")\n", + " if len(parts) < 5:\n", + " return \"unknown\"\n", + " \n", + " state = parts[3]\n", + " pharm = parts[4]\n", + " \n", + " if state == \"OffState\" and pharm == \"none\":\n", + " return \"LID off-state\"\n", + " elif state == \"OnState\" and pharm == \"none\":\n", + " return \"LID on-state\"\n", + " elif state == \"OnState\" and pharm == \"D1RaSch\":\n", + " return \"LID on-state with SCH\"\n", + " else:\n", + " return \"unknown\"\n", + "\n", + "print(\"Utility functions defined\")" + ] + }, + { + "cell_type": "markdown", + "id": "analysis_functions_header", + "metadata": {}, + "source": [ + "### Spike Detection and Analysis Functions\n", + "\n", + "#### Action Potential Counting\n", + "\n", + "We use threshold-crossing spike detection within a 500ms stimulus window (200-700ms from sweep start) to match the methodology from the original analysis." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "analysis_functions_cell", + "metadata": {}, + "outputs": [], + "source": [ + "def count_action_potentials(\n", + " voltage_trace_mV: np.ndarray, timestamps_s: np.ndarray, threshold_mV: float = 0.0\n", + ") -> int:\n", + " \"\"\"Threshold-crossing spike count within a 500 ms stimulus window.\n", + "\n", + " Matches approach in the original analysis; uses 200–700 ms from sweep start.\n", + " \"\"\"\n", + " if voltage_trace_mV.size == 0:\n", + " return 0\n", + " if timestamps_s[-1] - timestamps_s[0] < 0.8:\n", + " return 0\n", + " start_t = timestamps_s[0] + 0.2\n", + " end_t = timestamps_s[0] + 0.7\n", + " i0 = np.searchsorted(timestamps_s, start_t)\n", + " i1 = np.searchsorted(timestamps_s, end_t)\n", + " if i0 >= i1 or i1 > voltage_trace_mV.size:\n", + " i0 = voltage_trace_mV.size // 4\n", + " i1 = 3 * voltage_trace_mV.size // 4\n", + " x = voltage_trace_mV[i0:i1]\n", + " if x.size == 0:\n", + " return 0\n", + " spikes = 0\n", + " i = 0\n", + " while i < x.size:\n", + " if x[i] > threshold_mV:\n", + " spikes += 1\n", + " while i < x.size and x[i] > threshold_mV:\n", + " i += 1\n", + " else:\n", + " i += 1\n", + " return spikes\n", + "\n", + "def calculate_rheobase(current_steps: List[float], spike_counts: List[int]) -> float:\n", + " \"\"\"Calculate rheobase as minimum current that elicits at least one spike.\"\"\"\n", + " for current, spikes in zip(current_steps, spike_counts):\n", + " if spikes >= 1:\n", + " return current\n", + " return np.nan\n", + "\n", + "def safe_mean_sem(values: np.ndarray) -> Tuple[float, float]:\n", + " \"\"\"Calculate mean and standard error, handling edge cases.\"\"\"\n", + " if len(values) == 0:\n", + " return np.nan, 0.0\n", + " if len(values) == 1:\n", + " return float(values[0]), 0.0\n", + " return float(np.mean(values)), float(np.std(values, ddof=1) / np.sqrt(len(values)))\n", + "\n", + "print(\"Analysis functions defined\")" + ] + }, + { + "cell_type": "markdown", + "id": "data_loading_header", + "metadata": {}, + "source": [ + "### Load DANDI Dataset\n", + "\n", + "Connect to DANDI and filter for Figure 1 somatic excitability experiments across all three experimental conditions." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "data_loading_cell", + "metadata": {}, + "outputs": [], + "source": [ + "# Load environment variables\n", + "load_dotenv()\n", + "token = os.getenv(\"DANDI_API_TOKEN\")\n", + "if not token:\n", + " raise ValueError(\"DANDI_API_TOKEN environment variable not set\")\n", + "\n", + "# Connect to DANDI\n", + "dandiset_id = \"001538\"\n", + "client = DandiAPIClient(token=token)\n", + "client.authenticate(token=token)\n", + "\n", + "dandiset = client.get_dandiset(dandiset_id, \"draft\")\n", + "assets = dandiset.get_assets()\n", + "assets_list = list(assets)\n", + "\n", + "# Filter for Figure 1 somatic excitability experiments\n", + "f1_somexc_assets = [asset for asset in assets_list if is_f1_somexc(get_session_id(asset.path))]\n", + "\n", + "print(f\"Found {len(f1_somexc_assets)} Figure 1 somatic excitability files\")\n", + "\n", + "# Show breakdown by condition\n", + "condition_counts = {}\n", + "for asset in f1_somexc_assets:\n", + " condition = get_condition_label(get_session_id(asset.path))\n", + " condition_counts[condition] = condition_counts.get(condition, 0) + 1\n", + "\n", + "print(\"\\nBreakdown by condition:\")\n", + "for condition, count in condition_counts.items():\n", + " print(f\" {condition}: {count} files\")" + ] + }, + { + "cell_type": "markdown", + "id": "data_processing_header", + "metadata": {}, + "source": [ + "## Data Processing and Analysis\n", + "\n", + "### Process All NWB Files\n", + "\n", + "We process each NWB file to extract current clamp recordings, analyzing:\n", + "- **Current steps**: Injected current amplitudes (pA)\n", + "- **Spike counts**: Number of action potentials fired during each step\n", + "- **F-I relationships**: Frequency-intensity curves for each condition\n", + "- **Rheobase**: Minimum current required to elicit spiking" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "data_processing_cell", + "metadata": {}, + "outputs": [], + "source": [ + "# Initialize data collection\n", + "all_recording_data = [] # All individual recordings for F-I curves\n", + "cell_statistics = [] # Cell-level statistics for rheobase analysis\n", + "\n", + "print(\"Processing Figure 1 somatic excitability files...\\n\")\n", + "\n", + "for i, asset in enumerate(tqdm(f1_somexc_assets, desc=\"Processing F1 SomExc files\")):\n", + " session_id = get_session_id(asset.path)\n", + " condition = get_condition_label(session_id)\n", + " \n", + " # Update progress with current file info\n", + " tqdm.write(f\" {i+1}/{len(f1_somexc_assets)}: {condition} - {session_id}\")\n", + " \n", + " # Open NWB file from DANDI\n", + " s3_url = asset.get_content_url(follow_redirects=1, strip_query=False)\n", + " file_system = remfile.File(s3_url)\n", + " file = h5py.File(file_system, mode=\"r\")\n", + " io = NWBHDF5IO(file=file, load_namespaces=True)\n", + " nwbfile = io.read()\n", + " \n", + " # Get intracellular recordings table\n", + " rec_df = nwbfile.intracellular_recordings.to_dataframe()\n", + "\n", + " current_steps = []\n", + " spike_counts = []\n", + " \n", + " # Process each recording sweep\n", + " for _, row in rec_df.iterrows():\n", + " # Get protocol step and current\n", + " protocol_step = row.get((\"intracellular_recordings\", \"protocol_step\"), None)\n", + " current_pA = row.get((\"intracellular_recordings\", \"stimulus_current_pA\"), None)\n", + " \n", + " response_ref = row[(\"responses\", \"response\")]\n", + " ts = response_ref.timeseries\n", + " \n", + " # Get timestamps and voltage data\n", + " timestamps_s = ts.get_timestamps() \n", + " voltage_mV = np.asarray(ts.data, dtype=float) * 1000.0\n", + " \n", + " # Count spikes and store data\n", + " spike_count = count_action_potentials(voltage_mV, timestamps_s, threshold_mV=0.0)\n", + " current_steps.append(float(current_pA))\n", + " spike_counts.append(spike_count)\n", + " \n", + " # Store individual recording data\n", + " all_recording_data.append({\n", + " 'session_id': session_id,\n", + " 'condition': condition,\n", + " 'current_pA': float(current_pA),\n", + " 'spike_count': spike_count,\n", + " 'nwb_file': asset.path.split('/')[-1]\n", + " })\n", + " \n", + " # Calculate cell-level statistics\n", + " if current_steps and spike_counts:\n", + " # Sort by current for rheobase calculation\n", + " sorted_data = sorted(zip(current_steps, spike_counts), key=lambda x: x[0])\n", + " sorted_currents, sorted_spikes = zip(*sorted_data)\n", + " \n", + " rheobase_pA = calculate_rheobase(list(sorted_currents), list(sorted_spikes))\n", + " \n", + " # Get subject ID\n", + " subject_id = nwbfile.subject.subject_id if nwbfile.subject is not None else session_id\n", + " \n", + " cell_statistics.append({\n", + " 'session_id': session_id,\n", + " 'subject_id': subject_id,\n", + " 'condition': condition,\n", + " 'rheobase_pA': rheobase_pA,\n", + " 'n_recordings': len(current_steps),\n", + " 'nwb_file': asset.path.split('/')[-1]\n", + " })\n", + " \n", + " tqdm.write(f\" Processed {len(current_steps)} sweeps\")\n", + " \n", + " # Close file\n", + " io.close()\n", + " file.close()\n", + "\n", + "# Create DataFrames\n", + "df_recordings = pd.DataFrame(all_recording_data)\n", + "df_cells = pd.DataFrame(cell_statistics)\n", + "\n", + "print(f\"\\nData processing complete:\")\n", + "print(f\" Total recordings: {len(df_recordings)}\")\n", + "print(f\" Total cells: {len(df_cells)}\")\n", + "print(f\" Conditions: {df_recordings['condition'].nunique()}\")\n", + "\n", + "print(\"\\nRecording breakdown by condition:\")\n", + "for condition in df_recordings['condition'].unique():\n", + " n_recordings = len(df_recordings[df_recordings['condition'] == condition])\n", + " n_cells = len(df_cells[df_cells['condition'] == condition])\n", + " print(f\" {condition}: {n_recordings} recordings from {n_cells} cells\")" + ] + }, + { + "cell_type": "markdown", + "id": "fi_curves_header", + "metadata": {}, + "source": [ + "## Figure 1E: Frequency-Intensity (F-I) Curves\n", + "\n", + "### Action Potential Frequency vs Injected Current\n", + "\n", + "This plot shows the relationship between injected current and action potential firing frequency across the three experimental conditions, revealing how L-DOPA treatment and D1 receptor antagonism affect dSPN excitability." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fi_curves_cell", + "metadata": {}, + "outputs": [], + "source": [ + "# Create F-I curves plot\n", + "fig, ax = plt.subplots(1, 1, figsize=(4.5, 3.5)) # Adjusted aspect ratio to match reference\n", + "\n", + "# Define condition plotting styles to match reference - use only circles with larger size\n", + "condition_styles = {\n", + " \"LID off-state\": {\n", + " \"color\": \"black\", \n", + " \"marker\": \"o\", \n", + " \"linestyle\": \"-\", \n", + " \"label\": \"off-state\",\n", + " \"markerfacecolor\": \"white\",\n", + " \"markeredgecolor\": \"black\"\n", + " },\n", + " \"LID on-state\": {\n", + " \"color\": \"black\", \n", + " \"marker\": \"o\", # Changed from \"s\" to \"o\"\n", + " \"linestyle\": \"-\", \n", + " \"label\": \"on-state\",\n", + " \"markerfacecolor\": \"black\",\n", + " \"markeredgecolor\": \"black\"\n", + " },\n", + " \"LID on-state with SCH\": {\n", + " \"color\": \"gray\", # Changed back to gray for antagonist\n", + " \"marker\": \"o\", # Changed from \"^\" to \"o\" \n", + " \"linestyle\": \"-\", # Changed from \"--\" to \"-\"\n", + " \"label\": \"on-state+D1R\\nantagonist\",\n", + " \"markerfacecolor\": \"gray\",\n", + " \"markeredgecolor\": \"gray\"\n", + " }\n", + "}\n", + "\n", + "# Process each condition\n", + "for condition in [\"LID off-state\", \"LID on-state\", \"LID on-state with SCH\"]:\n", + " if condition not in df_recordings[\"condition\"].unique():\n", + " print(f\"Warning: {condition} not found in data\")\n", + " continue\n", + " \n", + " condition_data = df_recordings[df_recordings[\"condition\"] == condition]\n", + " \n", + " # Calculate mean and SEM for each current step\n", + " summary_data = []\n", + " for current, group in condition_data.groupby(\"current_pA\"):\n", + " mean_spikes, sem_spikes = safe_mean_sem(group[\"spike_count\"].values)\n", + " summary_data.append({\n", + " \"current_pA\": current, \n", + " \"mean_spikes\": mean_spikes, \n", + " \"sem_spikes\": sem_spikes\n", + " })\n", + " \n", + " summary_df = pd.DataFrame(summary_data).sort_values(\"current_pA\")\n", + " \n", + " # Filter to current range used in paper (0-300 pA)\n", + " summary_df = summary_df[\n", + " (summary_df[\"current_pA\"] >= 0) & (summary_df[\"current_pA\"] <= 300)\n", + " ]\n", + " \n", + " if len(summary_df) == 0:\n", + " print(f\"Warning: No data in 0-300pA range for {condition}\")\n", + " continue\n", + " \n", + " # Plot with error bars - larger markers\n", + " style = condition_styles[condition]\n", + " ax.errorbar(\n", + " summary_df[\"current_pA\"], \n", + " summary_df[\"mean_spikes\"], \n", + " yerr=summary_df[\"sem_spikes\"],\n", + " marker=style[\"marker\"], \n", + " color=style[\"color\"], \n", + " linestyle=style[\"linestyle\"],\n", + " linewidth=2, \n", + " markersize=8, # Increased from 4 to 8\n", + " capsize=3, \n", + " capthick=1, \n", + " label=style[\"label\"],\n", + " markerfacecolor=style[\"markerfacecolor\"],\n", + " markeredgecolor=style[\"markeredgecolor\"], \n", + " markeredgewidth=1.5, # Increased edge width\n", + " )\n", + "\n", + "# Add dotted horizontal line at y=0 to show baseline\n", + "ax.axhline(y=0, color='gray', linestyle=':', alpha=0.7, linewidth=1)\n", + "\n", + "# Formatting to match paper style\n", + "ax.set_xlabel(\"current (pA)\", fontsize=12)\n", + "ax.set_ylabel(\"number of APs\", fontsize=12)\n", + "ax.set_xlim(0, 300)\n", + "ax.set_ylim(-1, 18) # Extended y-limit to include negative values\n", + "ax.set_xticks([0, 100, 200, 300])\n", + "ax.set_yticks([0, 5, 10, 15])\n", + "\n", + "# Style the axes\n", + "ax.spines['top'].set_visible(False)\n", + "ax.spines['right'].set_visible(False)\n", + "ax.spines['left'].set_linewidth(1.5)\n", + "ax.spines['bottom'].set_linewidth(1.5)\n", + "ax.tick_params(axis='both', which='major', labelsize=10, width=1.5, length=5)\n", + "\n", + "# Add legend in top left corner instead of text labels on plot\n", + "ax.legend(loc=\"upper left\", frameon=False, fontsize=9)\n", + "\n", + "plt.tight_layout()\n", + "plt.show()\n", + "\n", + "# Print summary statistics\n", + "print(\"=== F-I CURVE ANALYSIS SUMMARY ===\\n\")\n", + "for condition in df_recordings[\"condition\"].unique():\n", + " condition_data = df_recordings[df_recordings[\"condition\"] == condition]\n", + " n_recordings = len(condition_data)\n", + " n_cells = len(df_cells[df_cells[\"condition\"] == condition])\n", + " \n", + " # Calculate statistics at different current levels\n", + " current_levels = [50, 100, 150, 200, 250]\n", + " print(f\"{condition}:\")\n", + " print(f\" Sample: {n_recordings} recordings from {n_cells} cells\")\n", + " \n", + " for current in current_levels:\n", + " current_data = condition_data[\n", + " (condition_data[\"current_pA\"] >= current - 10) & \n", + " (condition_data[\"current_pA\"] <= current + 10)\n", + " ]\n", + " if len(current_data) > 0:\n", + " mean_spikes = current_data[\"spike_count\"].mean()\n", + " sem_spikes = current_data[\"spike_count\"].std() / np.sqrt(len(current_data))\n", + " print(f\" {current}pA: {mean_spikes:.1f} ± {sem_spikes:.1f} APs (n={len(current_data)})\")\n", + " print()" + ] + }, + { + "cell_type": "markdown", + "id": "rheobase_header", + "metadata": {}, + "source": [ + "## Figure 1E: Rheobase Comparison\n", + "\n", + "### Minimum Current Required for Action Potential Generation\n", + "\n", + "This box plot compares the rheobase (minimum current required to elicit at least one action potential) across experimental conditions, showing how L-DOPA treatment affects neuronal excitability thresholds." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "rheobase_cell", + "metadata": {}, + "outputs": [], + "source": [ + "# Create rheobase comparison plot\n", + "fig, ax = plt.subplots(1, 1, figsize=(3.5, 4)) # Adjusted to match reference aspect ratio\n", + "\n", + "# Filter out cells with invalid rheobase values\n", + "valid_cells = df_cells.dropna(subset=[\"rheobase_pA\"])\n", + "\n", + "# Prepare data for box plot\n", + "conditions_order = [\"LID off-state\", \"LID on-state\", \"LID on-state with SCH\"]\n", + "condition_labels = [\"off\", \"on\", \"on + SCH\"] # Shorter labels like reference\n", + "\n", + "# Get data for each condition\n", + "plot_data = []\n", + "actual_labels = []\n", + "\n", + "for condition, label in zip(conditions_order, condition_labels):\n", + " condition_data = valid_cells[valid_cells[\"condition\"] == condition][\"rheobase_pA\"]\n", + " if len(condition_data) > 0:\n", + " plot_data.append(condition_data.values)\n", + " actual_labels.append(label)\n", + " else:\n", + " print(f\"Warning: No rheobase data for {condition}\")\n", + "\n", + "# Create box plot with styling to match reference\n", + "bp = ax.boxplot(\n", + " plot_data, \n", + " labels=actual_labels, \n", + " patch_artist=True,\n", + " boxprops=dict(facecolor=\"white\", color=\"black\", linewidth=1.5),\n", + " whiskerprops=dict(color=\"black\", linewidth=1.5), \n", + " capprops=dict(color=\"black\", linewidth=1.5),\n", + " medianprops=dict(color=\"black\", linewidth=3), # Thicker median line\n", + " flierprops=dict(marker=\"o\", markerfacecolor=\"gray\", markersize=3, \n", + " markeredgecolor=\"black\", alpha=0.7),\n", + " widths=0.6 # Slightly narrower boxes like reference\n", + ")\n", + "\n", + "# Add individual data points with jitter - all gray\n", + "for i, data in enumerate(plot_data):\n", + " x_vals = np.random.normal(i + 1, 0.05, len(data))\n", + " ax.scatter(x_vals, data, color='gray', alpha=0.8, s=20, zorder=3, \n", + " edgecolors='black', linewidths=0.5) # Larger markers (s=20)\n", + "\n", + "# Add significance bracket and annotation like in reference\n", + "# Add bracket between first and second conditions\n", + "y_max = max([max(data) for data in plot_data]) + 20\n", + "bracket_height = y_max + 10\n", + "ax.plot([1, 1], [y_max, bracket_height], 'k-', linewidth=1)\n", + "ax.plot([2, 2], [y_max, bracket_height], 'k-', linewidth=1)\n", + "ax.plot([1, 2], [bracket_height, bracket_height], 'k-', linewidth=1)\n", + "ax.text(1.5, bracket_height + 10, '****', ha='center', va='bottom', fontsize=12, fontweight='bold')\n", + "\n", + "# Formatting to match reference\n", + "ax.set_ylabel(\"rheobase (pA)\", fontsize=12)\n", + "ax.set_ylim(0, 350) # Extended to accommodate significance bracket\n", + "ax.set_yticks([0, 100, 200, 300])\n", + "\n", + "# Style the axes to match reference\n", + "ax.spines['top'].set_visible(False)\n", + "ax.spines['right'].set_visible(False)\n", + "ax.spines['left'].set_linewidth(1.5)\n", + "ax.spines['bottom'].set_linewidth(1.5)\n", + "ax.tick_params(axis='both', which='major', labelsize=11, width=1.5, length=5)\n", + "ax.tick_params(axis='x', which='major', length=0) # Remove x-axis tick marks\n", + "\n", + "plt.tight_layout()\n", + "plt.show()\n", + "\n", + "# Statistical analysis\n", + "print(\"=== RHEOBASE STATISTICAL ANALYSIS ===\\n\")\n", + "\n", + "for i, (condition, data) in enumerate(zip(conditions_order, plot_data)):\n", + " if condition in valid_cells[\"condition\"].unique():\n", + " rheobase_data = valid_cells[valid_cells[\"condition\"] == condition][\"rheobase_pA\"]\n", + " n_cells = len(rheobase_data)\n", + " mean_rheo = rheobase_data.mean()\n", + " sem_rheo = rheobase_data.std() / np.sqrt(n_cells)\n", + " median_rheo = rheobase_data.median()\n", + " q25 = rheobase_data.quantile(0.25)\n", + " q75 = rheobase_data.quantile(0.75)\n", + " \n", + " print(f\"{condition} (n={n_cells}):\")\n", + " print(f\" Mean: {mean_rheo:.1f} ± {sem_rheo:.1f} pA\")\n", + " print(f\" Median: {median_rheo:.1f} pA\")\n", + " print(f\" IQR: {q25:.1f} - {q75:.1f} pA\")\n", + " print(f\" Range: {rheobase_data.min():.1f} - {rheobase_data.max():.1f} pA\\n\")\n", + "\n", + "# Statistical comparisons\n", + "print(\"Statistical Comparisons:\")\n", + "\n", + "# Off-state vs On-state\n", + "if \"LID off-state\" in valid_cells[\"condition\"].unique() and \"LID on-state\" in valid_cells[\"condition\"].unique():\n", + " off_data = valid_cells[valid_cells[\"condition\"] == \"LID off-state\"][\"rheobase_pA\"]\n", + " on_data = valid_cells[valid_cells[\"condition\"] == \"LID on-state\"][\"rheobase_pA\"]\n", + " \n", + " # Mann-Whitney U test\n", + " u_stat, u_p = stats.mannwhitneyu(off_data, on_data, alternative='two-sided')\n", + " print(f\"\\nOff-state vs On-state:\")\n", + " print(f\" Mann-Whitney U: {u_stat:.2f}, p = {u_p:.4f}\")\n", + " print(f\" Significantly different: {'Yes' if u_p < 0.05 else 'No'}\")\n", + " \n", + " # Effect size\n", + " mean_diff = on_data.mean() - off_data.mean()\n", + " print(f\" Mean difference: {mean_diff:.1f} pA\")" + ] + }, + { + "cell_type": "markdown", + "id": "summary_header", + "metadata": {}, + "source": [ + "## Summary\n", + "\n", + "### Key Findings\n", + "\n", + "This analysis reproduces the key findings from **Figure 1E** of Zhai et al. 2025:\n", + "\n", + "1. **F-I Curves**: Show the relationship between injected current and action potential frequency across experimental conditions\n", + "2. **Rheobase Analysis**: Compares the minimum current required to elicit spiking between conditions\n", + "3. **L-DOPA Effects**: Reveals how levodopa treatment affects dSPN somatic excitability\n", + "4. **D1 Receptor Role**: Shows the contribution of D1 receptors using SCH23390 antagonist\n", + "\n", + "### Methodological Notes\n", + "\n", + "- **Current Clamp**: Whole-cell patch clamp recordings in current clamp mode\n", + "- **Spike Detection**: Threshold-crossing detection at 0mV within 500ms stimulus window (200-700ms)\n", + "- **Current Range**: 0-300 pA injected current steps\n", + "- **Rheobase Definition**: Minimum current to elicit ≥1 action potential\n", + "- **Statistics**: Mann-Whitney U test for non-parametric comparisons\n", + "\n", + "### Biological Significance\n", + "\n", + "The analysis reveals how L-DOPA treatment affects the intrinsic excitability of direct pathway striatal projection neurons, providing insights into the cellular mechanisms underlying levodopa-induced dyskinesia in Parkinson's disease. The D1 receptor antagonist experiments help dissect the specific receptor mechanisms involved in these excitability changes." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "surmeier-lab-to-nwb", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/001538/catalyst_neuro/zhai_2025/figure_2GH_oepsc_analysis.ipynb b/001538/catalyst_neuro/zhai_2025/figure_2GH_oepsc_analysis.ipynb new file mode 100644 index 0000000..af9733f --- /dev/null +++ b/001538/catalyst_neuro/zhai_2025/figure_2GH_oepsc_analysis.ipynb @@ -0,0 +1,631 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "header_cell", + "metadata": {}, + "source": [ + "# Reproducing Figure 2G-H: oEPSC Analysis\n", + "\n", + "This notebook reproduces Figure 2G-H from **Zhai et al. 2025** comparing optogenetically-evoked postsynaptic currents (oEPSCs) between OffState and OnState conditions in a Parkinson's disease model.\n", + "\n", + "**Dataset**: DANDI:001538 - State-dependent modulation of spiny projection neurons controls levodopa-induced dyskinesia\n", + "\n", + "**Analysis approach**:\n", + "- **Figure 2G**: Cumulative distribution of all individual oEPSC events\n", + "- **Figure 2H**: Box plot comparing mean event amplitudes per experimental session\n", + "- **Event detection**: MAD-based noise estimation with ±5SD threshold\n", + "- **Conditions**: OffState (control) vs OnState (L-DOPA treated)" + ] + }, + { + "cell_type": "markdown", + "id": "setup_header", + "metadata": {}, + "source": [ + "## Setup and Data Loading\n", + "\n", + "### Import Libraries and Configure Plotting Style\n", + "\n", + "We use the same plotting parameters as the original publication to ensure visual consistency." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "imports_cell", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "\n", + "import h5py\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import remfile\n", + "import seaborn as sns\n", + "from dandi.dandiapi import DandiAPIClient\n", + "from dotenv import load_dotenv\n", + "from pynwb import NWBHDF5IO\n", + "from scipy import stats\n", + "from tqdm import tqdm\n", + "\n", + "# Set plotting style to match paper\n", + "plt.style.use('default')\n", + "sns.set_palette(\"Set2\")\n", + "\n", + "def setup_figure_style():\n", + " \"\"\"Setup matplotlib parameters to match paper style\"\"\"\n", + " plt.rcParams.update({\n", + " 'font.size': 8,\n", + " 'axes.titlesize': 10,\n", + " 'axes.labelsize': 9,\n", + " 'xtick.labelsize': 8,\n", + " 'ytick.labelsize': 8,\n", + " 'legend.fontsize': 8,\n", + " 'figure.titlesize': 12,\n", + " 'axes.linewidth': 0.8,\n", + " 'axes.spines.top': False,\n", + " 'axes.spines.right': False,\n", + " 'xtick.major.width': 0.8,\n", + " 'ytick.major.width': 0.8,\n", + " 'xtick.minor.width': 0.6,\n", + " 'ytick.minor.width': 0.6,\n", + " })\n", + "\n", + "setup_figure_style()\n", + "print(\"Libraries imported and plotting style configured\")" + ] + }, + { + "cell_type": "markdown", + "id": "utilities_header", + "metadata": {}, + "source": [ + "### Session ID Parsing and Filtering Functions\n", + "\n", + "These utility functions parse the rich metadata encoded in DANDI file paths and filter experiments by figure, measurement type, and experimental state." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "utilities_cell", + "metadata": {}, + "outputs": [], + "source": [ + "def get_session_id(asset_path: str) -> str:\n", + " \"\"\"Extract session ID from DANDI asset path.\"\"\"\n", + " bottom_level_path = asset_path.split(\"/\")[1] \n", + " session_id_with_ses_prefix = bottom_level_path.split(\"_\")[1]\n", + " session_id = session_id_with_ses_prefix.split(\"-\")[1]\n", + " return session_id\n", + "\n", + "def get_figure_number(session_id: str):\n", + " \"\"\"Extract which figure this data corresponds to.\"\"\"\n", + " return session_id.split(\"++\")[0]\n", + "\n", + "def get_measurement(session_id: str) -> str:\n", + " \"\"\"Extract measurement type.\"\"\"\n", + " return session_id.split(\"++\")[1]\n", + "\n", + "def get_state(session_id: str) -> str:\n", + " \"\"\"Extract experimental state.\"\"\"\n", + " return session_id.split(\"++\")[3]\n", + "\n", + "def is_figure_number(session_id: str, figure_number: str) -> bool:\n", + " \"\"\"Check if data belongs to a specific figure.\"\"\"\n", + " return get_figure_number(session_id) == figure_number\n", + "\n", + "def is_measurement(session_id: str, measurement: str) -> bool:\n", + " \"\"\"Filter data by measurement/experiment type.\"\"\"\n", + " return get_measurement(session_id) == measurement\n", + "\n", + "def is_state(session_id: str, state: str) -> bool:\n", + " \"\"\"Filter data by disease/treatment state.\"\"\"\n", + " return get_state(session_id) == state" + ] + }, + { + "cell_type": "markdown", + "id": "event_detection_header", + "metadata": {}, + "source": [ + "### Event Detection Functions\n", + "\n", + "#### MAD-Based Event Detection\n", + "\n", + "We use **Median Absolute Deviation (MAD)** for robust noise estimation, avoiding bias from the events themselves. Events are detected as deviations >5SD from baseline and nearby events are merged to handle multi-threshold crossings." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "event_detection_cell", + "metadata": {}, + "outputs": [], + "source": [ + "def merge_nearby_events(event_times, event_amplitudes, merge_distance_ms=1.0):\n", + " \"\"\"Merge events within merge_distance_ms, keeping maximum amplitude.\"\"\"\n", + " if len(event_times) == 0:\n", + " return [], []\n", + " times = np.array(event_times)\n", + " amplitudes = np.array(event_amplitudes)\n", + " order = np.argsort(times)\n", + " times = times[order]\n", + " amplitudes = amplitudes[order]\n", + " merged_times = []\n", + " merged_amplitudes = []\n", + " i = 0\n", + " while i < len(times):\n", + " current_time = times[i]\n", + " j = i + 1\n", + " max_amp = amplitudes[i]\n", + " max_time = times[i]\n", + " while j < len(times) and (times[j] - current_time) <= merge_distance_ms:\n", + " if amplitudes[j] > max_amp:\n", + " max_amp = float(amplitudes[j])\n", + " max_time = float(times[j])\n", + " j += 1\n", + " merged_times.append(float(max_time))\n", + " merged_amplitudes.append(float(max_amp))\n", + " i = j\n", + " return merged_times, merged_amplitudes\n", + "\n", + "def process_nwb_file_for_events(asset, detection_window_shift_ms=100, event_merge_distance_ms=1.0):\n", + " \"\"\"Process a single NWB file and return event amplitudes using intracellular_recordings index alignment.\"\"\"\n", + " s3_url = asset.get_content_url(follow_redirects=1, strip_query=False)\n", + " file_system = remfile.File(s3_url)\n", + " file = h5py.File(file_system, mode=\"r\")\n", + " io = NWBHDF5IO(file=file, load_namespaces=True)\n", + " nwbfile = io.read()\n", + " try:\n", + " opto_df = nwbfile.intervals[\"optogenetic_epochs_table\"].to_dataframe()\n", + " det_rows = opto_df[opto_df[\"stage_name\"] == \"detection\"].sort_values(\"start_time\").reset_index(drop=True)\n", + " except Exception as e:\n", + " io.close(); file.close()\n", + " raise\n", + "\n", + " # Iterate intracellular_recordings (ordered) and align by index to detection rows\n", + " ice = nwbfile.get_intracellular_recordings()\n", + " n_rec = len(ice.id)\n", + " if len(det_rows) < n_rec:\n", + " n_rec = len(det_rows)\n", + "\n", + " file_positive_amplitudes = []\n", + " file_negative_amplitudes = []\n", + "\n", + " for i in range(n_rec):\n", + " row = ice[i]\n", + " resp_ref = row[(\"responses\", \"response\")].iloc[0]\n", + " ts = resp_ref.timeseries\n", + " # Extract slice\n", + " data_A = ts.data[resp_ref.idx_start : resp_ref.idx_start + resp_ref.count]\n", + " t_s_all = ts.get_timestamps()\n", + " t_s = t_s_all[resp_ref.idx_start : resp_ref.idx_start + resp_ref.count]\n", + "\n", + " data_pA = np.asarray(data_A) * 1e12\n", + " t_ms = np.asarray(t_s) * 1000.0 # absolute ms\n", + "\n", + " det = det_rows.iloc[i]\n", + " det_start_ms = float(det.start_time) * 1000.0 + detection_window_shift_ms\n", + " det_stop_ms = float(det.stop_time) * 1000.0\n", + "\n", + " m = (t_ms >= det_start_ms) & (t_ms <= det_stop_ms)\n", + " if not np.any(m):\n", + " continue\n", + " seg = data_pA[m]\n", + " seg_t = t_ms[m]\n", + "\n", + " noise_median = float(np.median(seg))\n", + " mad = float(np.median(np.abs(seg - noise_median)))\n", + " mad_std = mad * 1.4826\n", + "\n", + " thr_pos = noise_median + 5.0 * mad_std\n", + " thr_neg = noise_median - 5.0 * mad_std\n", + "\n", + " idx_pos = np.where(seg > thr_pos)[0]\n", + " idx_neg = np.where(seg < thr_neg)[0]\n", + "\n", + " t_pos_raw = seg_t[idx_pos]\n", + " t_neg_raw = seg_t[idx_neg]\n", + " a_pos_raw = seg[idx_pos] - noise_median\n", + " a_neg_raw = noise_median - seg[idx_neg]\n", + "\n", + " pos_t, pos_a = merge_nearby_events(t_pos_raw, a_pos_raw, event_merge_distance_ms)\n", + " neg_t, neg_a = merge_nearby_events(t_neg_raw, a_neg_raw, event_merge_distance_ms)\n", + "\n", + " file_positive_amplitudes.extend(pos_a)\n", + " file_negative_amplitudes.extend(neg_a)\n", + "\n", + " io.close()\n", + " file.close()\n", + "\n", + " return file_positive_amplitudes, file_negative_amplitudes\n" + ] + }, + { + "cell_type": "markdown", + "id": "data_loading_header", + "metadata": {}, + "source": [ + "### Load DANDI Dataset\n", + "\n", + "Connect to DANDI and filter for Figure 2 optogenetic experiments, separating OffState and OnState conditions." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "data_loading_cell", + "metadata": {}, + "outputs": [], + "source": [ + "# Load environment variables\n", + "load_dotenv()\n", + "token = os.getenv(\"DANDI_API_TOKEN\")\n", + "\n", + "# Connect to DANDI\n", + "dandiset_id = \"001538\"\n", + "client = DandiAPIClient(token=token)\n", + "client.authenticate(token=token)\n", + "\n", + "dandiset = client.get_dandiset(dandiset_id, \"draft\")\n", + "assets = dandiset.get_assets()\n", + "assets_list = list(assets)\n", + "\n", + "# Filter for Figure 2 oEPSC experiments\n", + "criteria_offstate = lambda asset: (is_figure_number(get_session_id(asset.path), \"F2\") and \n", + " is_measurement(get_session_id(asset.path), \"oEPSC\") and \n", + " is_state(get_session_id(asset.path), \"OffState\"))\n", + "\n", + "criteria_onstate = lambda asset: (is_figure_number(get_session_id(asset.path), \"F2\") and \n", + " is_measurement(get_session_id(asset.path), \"oEPSC\") and \n", + " is_state(get_session_id(asset.path), \"OnState\"))\n", + "\n", + "offstate_assets = [asset for asset in assets_list if criteria_offstate(asset)]\n", + "onstate_assets = [asset for asset in assets_list if criteria_onstate(asset)]\n", + "\n", + "print(f\"Found {len(offstate_assets)} OffState and {len(onstate_assets)} OnState files\")\n", + "print(f\"Total Figure 2 oEPSC files: {len(offstate_assets) + len(onstate_assets)}\")" + ] + }, + { + "cell_type": "markdown", + "id": "data_processing_header", + "metadata": {}, + "source": [ + "## Data Processing and Event Detection\n", + "\n", + "### Process All NWB Files\n", + "\n", + "We process each NWB file to extract oEPSC events, collecting both:\n", + "- **Individual events**: For cumulative distribution analysis\n", + "- **File means**: For box plot comparison of mean responses per session" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "data_processing_cell", + "metadata": {}, + "outputs": [], + "source": [ + "# Initialize data collections\n", + "all_offstate_events = [] # All individual events for cumulative plot\n", + "all_onstate_events = []\n", + "\n", + "offstate_file_means = [] # Mean responses per file for box plot\n", + "onstate_file_means = []\n", + "\n", + "# Process OffState files\n", + "print(\"Processing OffState files...\")\n", + "for i, asset in enumerate(tqdm(offstate_assets, desc=\"OffState files\")):\n", + " session_id = get_session_id(asset.path)\n", + " print(f\" {i+1}/{len(offstate_assets)}: {session_id}\")\n", + " \n", + " pos_amps, neg_amps = process_nwb_file_for_events(asset)\n", + " all_events = pos_amps + neg_amps\n", + " \n", + " all_offstate_events.extend(all_events)\n", + " if len(all_events) > 0:\n", + " offstate_file_means.append(np.mean(all_events))\n", + "\n", + "# Process OnState files\n", + "print(\"\\nProcessing OnState files...\")\n", + "for i, asset in enumerate(tqdm(onstate_assets, desc=\"OnState files\")):\n", + " session_id = get_session_id(asset.path)\n", + " print(f\" {i+1}/{len(onstate_assets)}: {session_id}\")\n", + " \n", + " pos_amps, neg_amps = process_nwb_file_for_events(asset)\n", + " all_events = pos_amps + neg_amps\n", + " \n", + " all_onstate_events.extend(all_events)\n", + " if len(all_events) > 0:\n", + " onstate_file_means.append(np.mean(all_events))\n", + "\n", + "print(f\"\\nData collection complete:\")\n", + "print(f\" OffState: {len(all_offstate_events)} events from {len(offstate_file_means)} files\")\n", + "print(f\" OnState: {len(all_onstate_events)} events from {len(onstate_file_means)} files\")" + ] + }, + { + "cell_type": "markdown", + "id": "figure2g_header", + "metadata": {}, + "source": [ + "## Figure 2G: Cumulative Distribution Plot\n", + "\n", + "### Individual Event Analysis\n", + "\n", + "This plot shows the cumulative distribution of **all individual oEPSC events** across all experimental sessions, allowing comparison of the full event amplitude distributions between OffState and OnState conditions." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "figure2g_cell", + "metadata": {}, + "outputs": [], + "source": [ + "# Create cumulative distribution plot\n", + "fig, ax = plt.subplots(1, 1, figsize=(4, 4)) # Square aspect ratio like the reference\n", + "\n", + "# Convert to numpy arrays\n", + "offstate_amplitudes = np.array(all_offstate_events)\n", + "onstate_amplitudes = np.array(all_onstate_events)\n", + "\n", + "# Sort the data\n", + "offstate_sorted = np.sort(offstate_amplitudes)\n", + "onstate_sorted = np.sort(onstate_amplitudes)\n", + "\n", + "# Calculate cumulative probabilities as percentages\n", + "offstate_cumulative = np.arange(1, len(offstate_sorted) + 1) / len(offstate_sorted) * 100\n", + "onstate_cumulative = np.arange(1, len(onstate_sorted) + 1) / len(onstate_sorted) * 100\n", + "\n", + "# Plot with paper-style colors and thickness - match reference styling\n", + "ax.plot(offstate_sorted, offstate_cumulative, color='black', linewidth=3)\n", + "ax.plot(onstate_sorted, onstate_cumulative, color='gray', linewidth=3)\n", + "\n", + "# Formatting to match reference image\n", + "ax.set_xlabel('oEPSC amplitude (pA)', fontsize=12, fontweight='normal')\n", + "ax.set_ylabel('cumulative probability (%)', fontsize=12, fontweight='normal')\n", + "\n", + "# Set axis limits and ticks to match paper\n", + "ax.set_xlim(0, 80)\n", + "ax.set_ylim(0, 100)\n", + "ax.set_xticks([0, 20, 40, 60, 80])\n", + "ax.set_yticks([0, 20, 40, 60, 80, 100])\n", + "\n", + "# Style the axes to match reference\n", + "ax.spines['top'].set_visible(False)\n", + "ax.spines['right'].set_visible(False)\n", + "ax.spines['left'].set_linewidth(1.5)\n", + "ax.spines['bottom'].set_linewidth(1.5)\n", + "ax.tick_params(axis='both', which='major', labelsize=11, width=1.5, length=5)\n", + "\n", + "# Add labels with line markers in middle right\n", + "ax.plot([55, 58], [65, 65], color='black', linewidth=3)\n", + "ax.text(60, 65, 'off-state', fontsize=11, ha='left', va='center')\n", + "\n", + "ax.plot([55, 58], [55, 55], color='gray', linewidth=3)\n", + "ax.text(60, 55, 'on-state', fontsize=11, ha='left', va='center')\n", + "\n", + "# Calculate and display statistics\n", + "off_median = np.median(offstate_amplitudes)\n", + "on_median = np.median(onstate_amplitudes)\n", + "off_mean = np.mean(offstate_amplitudes)\n", + "on_mean = np.mean(onstate_amplitudes)\n", + "\n", + "print(\"=== FIGURE 2G: CUMULATIVE DISTRIBUTION ANALYSIS ===\")\n", + "print(f\"OffState events: {len(offstate_amplitudes)}\")\n", + "print(f\" Mean: {off_mean:.2f} ± {np.std(offstate_amplitudes):.2f} pA\")\n", + "print(f\" Median: {off_median:.2f} pA\")\n", + "print(f\" 25th percentile: {np.percentile(offstate_amplitudes, 25):.2f} pA\")\n", + "print(f\" 75th percentile: {np.percentile(offstate_amplitudes, 75):.2f} pA\")\n", + "\n", + "print(f\"\\nOnState events: {len(onstate_amplitudes)}\")\n", + "print(f\" Mean: {on_mean:.2f} ± {np.std(onstate_amplitudes):.2f} pA\")\n", + "print(f\" Median: {on_median:.2f} pA\")\n", + "print(f\" 25th percentile: {np.percentile(onstate_amplitudes, 25):.2f} pA\")\n", + "print(f\" 75th percentile: {np.percentile(onstate_amplitudes, 75):.2f} pA\")\n", + "\n", + "print(f\"\\nComparison:\")\n", + "print(f\" Mean fold change (OnState/OffState): {on_mean/off_mean:.3f}\")\n", + "print(f\" Median fold change: {on_median/off_median:.3f}\")\n", + "\n", + "# Kolmogorov-Smirnov test\n", + "ks_stat, ks_p = stats.ks_2samp(offstate_amplitudes, onstate_amplitudes)\n", + "print(f\"\\nKolmogorov-Smirnov test:\")\n", + "print(f\" KS statistic: {ks_stat:.4f}\")\n", + "print(f\" p-value: {ks_p:.2e}\")\n", + "print(f\" Significantly different: {'Yes' if ks_p < 0.05 else 'No'}\")\n", + "\n", + "plt.tight_layout()\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "figure2h_header", + "metadata": {}, + "source": [ + "## Figure 2H: Box Plot Comparison\n", + "\n", + "### Mean Response Per Session Analysis\n", + "\n", + "This box plot compares the **mean event amplitudes per experimental session**, treating each NWB file as one data point. This approach controls for potential differences in the number of events recorded per session." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "figure2h_cell", + "metadata": {}, + "outputs": [], + "source": [ + "# Create box plot\n", + "fig, ax = plt.subplots(1, 1, figsize=(3.5, 4.0))\n", + "\n", + "# Convert to numpy arrays\n", + "offstate_means = np.array(offstate_file_means)\n", + "onstate_means = np.array(onstate_file_means)\n", + "\n", + "# Prepare data for box plot\n", + "box_data = [offstate_means, onstate_means]\n", + "positions = [1, 2]\n", + "\n", + "# Create box plot with paper-style formatting\n", + "bp = ax.boxplot(box_data, positions=positions, patch_artist=True, \n", + " widths=0.4, showfliers=True, notch=False,\n", + " medianprops=dict(color='black', linewidth=2),\n", + " whiskerprops=dict(color='black', linewidth=1.5),\n", + " capprops=dict(color='black', linewidth=1.5),\n", + " flierprops=dict(marker='o', markersize=4, alpha=0.7, markerfacecolor='gray'))\n", + "\n", + "# Customize box colors to match paper (both white/light gray)\n", + "colors = ['white', 'white']\n", + "for patch, color in zip(bp['boxes'], colors):\n", + " patch.set_facecolor(color)\n", + " patch.set_edgecolor('black')\n", + " patch.set_linewidth(1.5)\n", + "\n", + "# Add individual data points as gray dots\n", + "for i, data in enumerate(box_data):\n", + " # Add some jitter for visibility\n", + " x_vals = np.random.normal(positions[i], 0.04, size=len(data))\n", + " ax.scatter(x_vals, data, color='gray', s=25, alpha=0.8, zorder=3)\n", + "\n", + "# Calculate statistics for significance annotation\n", + "off_mean = np.mean(offstate_means)\n", + "on_mean = np.mean(onstate_means)\n", + "\n", + "# Statistical test\n", + "u_stat, u_p = stats.mannwhitneyu(offstate_means, onstate_means, \n", + " alternative='two-sided')\n", + "\n", + "# Add significance annotation (**) at the top\n", + "y_max = max(np.max(offstate_means), np.max(onstate_means))\n", + "y_sig = y_max + 0.8\n", + "ax.text(1.5, y_sig, '**', ha='center', va='center', fontsize=16, fontweight='bold')\n", + "\n", + "# Formatting to match paper exactly\n", + "ax.set_xticks([1, 2])\n", + "ax.set_xticklabels(['off-state', 'on-state'], fontsize=14)\n", + "ax.set_ylabel('oEPSC amplitude (pA)', fontsize=14, fontweight='normal')\n", + "ax.set_title('Figure 2H: dSPN oEPSC Amplitude Comparison', fontsize=16, fontweight='bold', pad=15)\n", + "\n", + "# Style the axes\n", + "ax.spines['top'].set_visible(False)\n", + "ax.spines['right'].set_visible(False)\n", + "ax.spines['left'].set_linewidth(1.5)\n", + "ax.spines['bottom'].set_linewidth(1.5)\n", + "ax.tick_params(axis='both', which='major', labelsize=12, width=1.5, length=5)\n", + "ax.tick_params(axis='x', which='major', length=0) # Remove x-axis tick marks\n", + "\n", + "# Calculate and display statistics\n", + "off_median = np.median(offstate_means)\n", + "on_median = np.median(onstate_means)\n", + "\n", + "print(\"=== FIGURE 2H: BOX PLOT STATISTICAL ANALYSIS ===\")\n", + "print(f\"\\nOffState file means (n={len(offstate_means)}):\")\n", + "print(f\" Median: {off_median:.2f} pA\")\n", + "print(f\" Mean: {off_mean:.2f} ± {np.std(offstate_means):.2f} pA\")\n", + "print(f\" IQR: {np.percentile(offstate_means, 25):.2f} - {np.percentile(offstate_means, 75):.2f} pA\")\n", + "print(f\" Range: {np.min(offstate_means):.2f} - {np.max(offstate_means):.2f} pA\")\n", + "\n", + "print(f\"\\nOnState file means (n={len(onstate_means)}):\")\n", + "print(f\" Median: {on_median:.2f} pA\")\n", + "print(f\" Mean: {on_mean:.2f} ± {np.std(onstate_means):.2f} pA\") \n", + "print(f\" IQR: {np.percentile(onstate_means, 25):.2f} - {np.percentile(onstate_means, 75):.2f} pA\")\n", + "print(f\" Range: {np.min(onstate_means):.2f} - {np.max(onstate_means):.2f} pA\")\n", + "\n", + "print(f\"\\nComparison:\")\n", + "print(f\" Median fold change: {on_median/off_median:.3f}\")\n", + "print(f\" Mean fold change: {on_mean/off_mean:.3f}\")\n", + "print(f\" Difference in medians: {on_median - off_median:.2f} pA\")\n", + "print(f\" Difference in means: {on_mean - off_mean:.2f} pA\")\n", + "\n", + "print(f\"\\nMann-Whitney U test (non-parametric):\")\n", + "print(f\" U statistic: {u_stat:.2f}\")\n", + "print(f\" p-value: {u_p:.2e}\")\n", + "print(f\" Significantly different: {'Yes' if u_p < 0.05 else 'No'}\")\n", + "\n", + "# Welch's t-test (unequal variances)\n", + "t_stat, t_p = stats.ttest_ind(offstate_means, onstate_means, \n", + " equal_var=False)\n", + "print(f\"\\nWelch's t-test (unequal variances):\")\n", + "print(f\" t statistic: {t_stat:.4f}\")\n", + "print(f\" p-value: {t_p:.2e}\")\n", + "print(f\" Significantly different: {'Yes' if t_p < 0.05 else 'No'}\")\n", + "\n", + "# Effect size (Cohen's d)\n", + "pooled_std = np.sqrt(((len(offstate_means)-1)*np.var(offstate_means) + \n", + " (len(onstate_means)-1)*np.var(onstate_means)) / \n", + " (len(offstate_means) + len(onstate_means) - 2))\n", + "cohens_d = (on_mean - off_mean) / pooled_std\n", + "print(f\"\\nEffect size (Cohen's d): {cohens_d:.3f}\")\n", + "\n", + "if abs(cohens_d) < 0.2:\n", + " effect_size = \"negligible\"\n", + "elif abs(cohens_d) < 0.5:\n", + " effect_size = \"small\"\n", + "elif abs(cohens_d) < 0.8:\n", + " effect_size = \"medium\"\n", + "else:\n", + " effect_size = \"large\"\n", + "print(f\" Effect size interpretation: {effect_size}\")\n", + "\n", + "plt.tight_layout()\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "summary_header", + "metadata": {}, + "source": [ + "## Summary\n", + "\n", + "### Key Findings\n", + "\n", + "This analysis reproduces the key findings from **Figure 2G-H** of Zhai et al. 2025:\n", + "\n", + "1. **Cumulative Distribution (Figure 2G)**: Shows the distribution of all individual oEPSC events across experimental conditions\n", + "2. **Box Plot Comparison (Figure 2H)**: Compares mean event amplitudes per experimental session, controlling for session-to-session variability\n", + "\n", + "### Methodological Notes\n", + "\n", + "- **Event Detection**: MAD-based noise estimation with ±5SD threshold\n", + "- **Artifact Avoidance**: 100ms detection window shift to avoid stimulation artifacts\n", + "- **Event Merging**: 1ms window to handle multi-threshold crossings from single events\n", + "- **Statistical Testing**: Both parametric and non-parametric tests for robust comparison\n", + "\n", + "### Biological Significance\n", + "\n", + "The analysis reveals how L-DOPA treatment (OnState) affects optogenetically-evoked synaptic responses in striatal neurons, providing insights into the synaptic mechanisms underlying levodopa-induced dyskinesia in Parkinson's disease." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "surmeier-lab-to-nwb", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/001538/catalyst_neuro/zhai_2025/figure_5F_acetylcholine_biosensor.ipynb b/001538/catalyst_neuro/zhai_2025/figure_5F_acetylcholine_biosensor.ipynb new file mode 100644 index 0000000..0f68485 --- /dev/null +++ b/001538/catalyst_neuro/zhai_2025/figure_5F_acetylcholine_biosensor.ipynb @@ -0,0 +1,705 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "header_cell", + "metadata": {}, + "source": [ + "# Reproducing Figure 5F: GRABACh3.0 Acetylcholine Biosensor Analysis\n", + "\n", + "This notebook reproduces Figure 5F from **Zhai et al. 2025** analyzing acetylcholine release dynamics using the GRABACh3.0 biosensor across different experimental conditions in a Parkinson's disease model.\n", + "\n", + "**Dataset**: DANDI:001538 - State-dependent modulation of spiny projection neurons controls levodopa-induced dyskinesia\n", + "\n", + "**Analysis approach**:\n", + "- **Biosensor**: GRABACh3.0 genetically encoded acetylcholine indicator\n", + "- **Stimulation**: Single-pulse electrical stimulation to evoke acetylcholine release\n", + "- **Normalization**: Using calibration trials (acetylcholine and TTX)\n", + "- **Quantification**: Area under curve (AUC) of normalized fluorescence response\n", + "- **Conditions**: Control (UL control), Parkinsonian (6-OHDA), Dyskinetic off-state (LID off-state)\n", + "- **Treatments**: Control, dopamine, quinpirole, sulpiride per condition" + ] + }, + { + "cell_type": "markdown", + "id": "setup_header", + "metadata": {}, + "source": [ + "## Setup and Data Loading\n", + "\n", + "### Import Libraries and Configure Plotting Style\n", + "\n", + "We use the same plotting parameters as the original publication to ensure visual consistency." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "imports_cell", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "from typing import Dict, List, Tuple\n", + "\n", + "import h5py\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import remfile\n", + "import seaborn as sns\n", + "from dandi.dandiapi import DandiAPIClient\n", + "from dotenv import load_dotenv\n", + "from pynwb import NWBHDF5IO\n", + "from tqdm import tqdm\n", + "\n", + "# Set plotting style to match paper\n", + "plt.style.use('default')\n", + "sns.set_palette(\"Set2\")\n", + "\n", + "def setup_figure_style():\n", + " \"\"\"Setup matplotlib parameters to match paper style\"\"\"\n", + " plt.rcParams.update({\n", + " 'font.size': 8,\n", + " 'axes.titlesize': 10,\n", + " 'axes.labelsize': 9,\n", + " 'xtick.labelsize': 8,\n", + " 'ytick.labelsize': 8,\n", + " 'legend.fontsize': 8,\n", + " 'figure.titlesize': 12,\n", + " 'axes.linewidth': 0.8,\n", + " 'axes.spines.top': False,\n", + " 'axes.spines.right': False,\n", + " 'xtick.major.width': 0.8,\n", + " 'ytick.major.width': 0.8,\n", + " 'xtick.minor.width': 0.6,\n", + " 'ytick.minor.width': 0.6,\n", + " })\n", + "\n", + "setup_figure_style()\n", + "print(\"Libraries imported and plotting style configured\")" + ] + }, + { + "cell_type": "markdown", + "id": "utilities_header", + "metadata": {}, + "source": [ + "### Session ID Parsing and Filtering Functions\n", + "\n", + "These utility functions parse the rich metadata encoded in DANDI file paths and filter experiments by figure, measurement type, and experimental condition." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "utilities_cell", + "metadata": {}, + "outputs": [], + "source": [ + "def get_session_id(asset_path: str) -> str:\n", + " \"\"\"Extract session ID from DANDI asset path.\"\"\"\n", + " if not asset_path:\n", + " return \"\"\n", + " bottom_level_path = asset_path.split(\"/\")[1]\n", + " session_id_with_ses_prefix = bottom_level_path.split(\"_\")[1]\n", + " session_id = session_id_with_ses_prefix.split(\"-\")[1]\n", + " return session_id\n", + "\n", + "def get_figure_number(session_id: str):\n", + " \"\"\"Extract which figure this data corresponds to.\"\"\"\n", + " return session_id.split(\"+\"+\"+\")[0]\n", + "\n", + "def get_measurement(session_id: str) -> str:\n", + " \"\"\"Extract measurement type.\"\"\"\n", + " if not session_id:\n", + " return \"\"\n", + " return session_id.split(\"+\"+\"+\")[1]\n", + "\n", + "def get_cell_type(session_id: str) -> str:\n", + " \"\"\"Extract cell type.\"\"\"\n", + " if not session_id:\n", + " return \"\"\n", + " return session_id.split(\"+\"+\"+\")[2]\n", + "\n", + "def get_state(session_id: str) -> str:\n", + " \"\"\"Extract experimental state.\"\"\"\n", + " if not session_id:\n", + " return \"\"\n", + " return session_id.split(\"+\"+\"+\")[3]\n", + "\n", + "def is_f5_achfp(session_id: str) -> bool:\n", + " \"\"\"Check if data belongs to Figure 5 acetylcholine fluorescent protein experiments.\"\"\"\n", + " return get_figure_number(session_id) == \"F5\" and get_measurement(session_id) == \"AChFP\"\n", + "\n", + "def get_condition_label(session_id: str) -> str:\n", + " \"\"\"Convert session metadata to condition label.\"\"\"\n", + " parts = session_id.split(\"+\"+\"+\")\n", + " if len(parts) < 4:\n", + " return \"unknown\"\n", + " \n", + " state = parts[3]\n", + " \n", + " if state == \"CTRL\":\n", + " return \"UL control\"\n", + " elif state == \"PD\":\n", + " return \"6-OHDA\"\n", + " elif state == \"OFF\":\n", + " return \"LID off-state\"\n", + " else:\n", + " return \"unknown\"\n", + "\n", + "print(\"Utility functions defined\")" + ] + }, + { + "cell_type": "markdown", + "id": "analysis_functions_header", + "metadata": {}, + "source": [ + "### Fluorescence Analysis Functions\n", + "\n", + "#### GRABACh3.0 Signal Processing\n", + "\n", + "We process acetylcholine biosensor fluorescence data following the methodology from the original analysis, including baseline normalization and AUC calculation." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "analysis_functions_cell", + "metadata": {}, + "outputs": [], + "source": [ + "def process_acetylcholine_trial(\n", + " timestamps: np.ndarray,\n", + " fluorescence: np.ndarray,\n", + " stim_time: float = 10.0,\n", + ") -> Dict:\n", + " \"\"\"Process a single acetylcholine biosensor trial.\"\"\"\n", + " # Calculate stimulus-relative windows\n", + " baseline_window = (stim_time - 1.0, stim_time)\n", + " response_window = (stim_time, stim_time + 2.0)\n", + "\n", + " # Find indices for time windows\n", + " time_interval = timestamps[1] - timestamps[0]\n", + " idx_baseline_start = np.argmin(np.abs(timestamps - baseline_window[0]))\n", + " idx_baseline_end = np.argmin(np.abs(timestamps - baseline_window[1]))\n", + " idx_response_start = np.argmin(np.abs(timestamps - response_window[0]))\n", + " idx_response_end = np.argmin(np.abs(timestamps - response_window[1]))\n", + "\n", + " # Calculate F0 (baseline fluorescence)\n", + " F0 = float(np.mean(fluorescence[idx_baseline_start:idx_baseline_end]))\n", + "\n", + " # Calculate dF/F0\n", + " dF_over_F0 = (fluorescence - F0) / F0 if F0 > 0 else np.zeros_like(fluorescence)\n", + "\n", + " # Unnormalized AUC (optional diagnostic)\n", + " auc = float(np.sum(dF_over_F0[idx_response_start:idx_response_end]) * time_interval)\n", + "\n", + " # Peak response\n", + " peak_response = float(np.max(dF_over_F0[idx_response_start:idx_response_end]))\n", + "\n", + " return {\n", + " \"F0\": F0,\n", + " \"dF_over_F0\": dF_over_F0,\n", + " \"auc\": auc,\n", + " \"peak_response\": peak_response,\n", + " \"timestamps\": timestamps,\n", + " \"fluorescence\": fluorescence,\n", + " }\n", + "\n", + "def get_calibration_values(trials_data: List[Dict]) -> Tuple[float, float]:\n", + " \"\"\"Extract Fmax and Fmin using full-trace calibration averages with background subtraction.\"\"\"\n", + " ach_trials = [t for t in trials_data if t[\"treatment\"] == \"ACh_calibration\"]\n", + " ttx_trials = [t for t in trials_data if t[\"treatment\"] == \"TTX_calibration\"]\n", + "\n", + " if ach_trials and ttx_trials:\n", + " bg_pmt = 162.0\n", + " ach_vals = []\n", + " for t in ach_trials:\n", + " ach_vals.extend((t[\"fluorescence\"] - bg_pmt).tolist())\n", + " ttx_vals = []\n", + " for t in ttx_trials:\n", + " ttx_vals.extend((t[\"fluorescence\"] - bg_pmt).tolist())\n", + " if ach_vals and ttx_vals:\n", + " return float(np.mean(ach_vals)), float(np.mean(ttx_vals))\n", + "\n", + " # Fallback constants (from original script)\n", + " return 493.47, 212.39\n", + "\n", + "def normalize_fluorescence(trial_data: Dict, Fmax: float, Fmin: float, stim_time: float = 10.0) -> Dict:\n", + " \"\"\"Normalize fluorescence; compute AUC via trapezoidal integration in [stim, stim+2s].\"\"\"\n", + " FI = Fmax - Fmin\n", + " dF_over_FI = (trial_data[\"fluorescence\"] - trial_data[\"F0\"]) / FI if FI > 0 else np.zeros_like(trial_data[\"fluorescence\"])\n", + " trial_data[\"dF_over_FI\"] = dF_over_FI\n", + "\n", + " # Response window indices\n", + " idx_start = np.argmin(np.abs(trial_data[\"timestamps\"] - stim_time))\n", + " idx_end = np.argmin(np.abs(trial_data[\"timestamps\"] - (stim_time + 2.0)))\n", + " rs = dF_over_FI[idx_start:idx_end]\n", + " ts = trial_data[\"timestamps\"][idx_start:idx_end]\n", + "\n", + " if len(rs) > 1:\n", + " trial_data[\"auc_normalized\"] = float(np.trapezoid(rs, ts))\n", + " else:\n", + " trial_data[\"auc_normalized\"] = 0.0\n", + "\n", + " trial_data[\"peak_normalized\"] = float(np.max(rs)) if len(rs) > 0 else 0.0\n", + " return trial_data\n", + "print(\"Analysis functions defined (trapz + full-trace calibration)\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "data_loading_header", + "metadata": {}, + "source": [ + "### Load DANDI Dataset\n", + "\n", + "Connect to DANDI and filter for Figure 5 acetylcholine biosensor experiments across all three experimental conditions." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "data_loading_cell", + "metadata": {}, + "outputs": [], + "source": [ + "# Load environment variables\n", + "load_dotenv()\n", + "token = os.getenv(\"DANDI_API_TOKEN\")\n", + "\n", + "# Connect to DANDI\n", + "dandiset_id = \"001538\"\n", + "client = DandiAPIClient(token=token)\n", + "client.authenticate(token=token)\n", + "\n", + "dandiset = client.get_dandiset(dandiset_id, \"draft\")\n", + "assets = dandiset.get_assets()\n", + "assets_list = list(assets)\n", + "\n", + "# Filter for Figure 5 acetylcholine fluorescent protein experiments\n", + "f5_achfp_assets = [asset for asset in assets_list if is_f5_achfp(get_session_id(asset.path))]\n", + "\n", + "print(f\"Found {len(f5_achfp_assets)} Figure 5 acetylcholine biosensor files\")\n", + "\n", + "# Show breakdown by condition\n", + "condition_counts = {}\n", + "for asset in f5_achfp_assets:\n", + " condition = get_condition_label(get_session_id(asset.path))\n", + " condition_counts[condition] = condition_counts.get(condition, 0) + 1\n", + "\n", + "print(\"\\nBreakdown by condition:\")\n", + "for condition, count in condition_counts.items():\n", + " print(f\" {condition}: {count} files\")" + ] + }, + { + "cell_type": "markdown", + "id": "data_processing_header", + "metadata": {}, + "source": [ + "## Data Processing and Analysis\n", + "\n", + "### Process All NWB Files\n", + "\n", + "We process each NWB file to extract acetylcholine biosensor data, analyzing:\n", + "- **Fluorescence time series**: Raw GRABACh3.0 biosensor signals\n", + "- **Calibration values**: Fmax (acetylcholine) and Fmin (TTX) for normalization\n", + "- **Stimulus responses**: AUC calculations for evoked acetylcholine release\n", + "- **Treatment effects**: Dopamine, quinpirole, and sulpiride modulation" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "data_processing_cell", + "metadata": {}, + "outputs": [], + "source": [ + "# Initialize data collection\n", + "all_trial_data = [] # All individual trials for analysis\n", + "stimulation_type = \"single_pulse\" # Focus on single pulse stimulation\n", + "\n", + "print(f\"Processing Figure 5 acetylcholine biosensor files for {stimulation_type} stimulation...\\n\")\n", + "\n", + "for i, asset in enumerate(tqdm(f5_achfp_assets, desc=\"Processing F5 AChFP files\")):\n", + " session_id = get_session_id(asset.path)\n", + " condition = get_condition_label(session_id)\n", + " \n", + " # Update progress with current file info\n", + " tqdm.write(f\" {i+1}/{len(f5_achfp_assets)}: {condition} - {session_id}\")\n", + " \n", + " # Open NWB file from DANDI\n", + " s3_url = asset.get_content_url(follow_redirects=1, strip_query=False)\n", + " file_system = remfile.File(s3_url)\n", + " file = h5py.File(file_system, mode=\"r\")\n", + " io = NWBHDF5IO(file=file, load_namespaces=True)\n", + " nwbfile = io.read()\n", + " \n", + " # Get trials table and fluorescence module\n", + " trials_df = nwbfile.trials.to_dataframe()\n", + " fluorescence_module = nwbfile.processing[\"ophys\"][\"Fluorescence\"]\n", + " \n", + " # Process each trial\n", + " trials_data = []\n", + " for _, trial in trials_df.iterrows():\n", + " # Get the corresponding fluorescence series\n", + " series_name = trial['roi_series_name']\n", + " series = fluorescence_module.roi_response_series[series_name]\n", + " \n", + " # Only process GRABCh channel (acetylcholine sensor)\n", + " if \"GRABCh\" not in series_name:\n", + " continue\n", + "\n", + " # Map treatment names\n", + " treatment_mapping = {\n", + " 'control': 'control',\n", + " '50nM_dopamine': 'dopamine',\n", + " 'quinpirole': 'quinpirole', \n", + " 'sulpiride': 'sulpiride',\n", + " 'TTX_calibration': 'TTX_calibration',\n", + " 'acetylcholine_calibration': 'ACh_calibration'\n", + " }\n", + " \n", + " treatment = treatment_mapping.get(trial['treatment'], trial['treatment'])\n", + " stimulation = 'calibration' if 'calibration' in treatment else trial['stimulation']\n", + "\n", + " fluorescence = series.data[:]\n", + " timestamps = series.get_timestamps()\n", + " # Convert to relative timestamps starting at 0\n", + " timestamps = timestamps - timestamps[0]\n", + "\n", + " # Calculate relative stimulus time\n", + " abs_stim_time = trial['stimulus_start_time']\n", + " orig_start = series.get_timestamps()[0]\n", + " stim_time = abs_stim_time - orig_start\n", + "\n", + " trials_data.append({\n", + " \"condition\": condition,\n", + " \"treatment\": treatment,\n", + " \"stimulation\": stimulation,\n", + " \"subject_id\": nwbfile.subject.subject_id,\n", + " \"session_id\": nwbfile.identifier,\n", + " \"series_name\": series_name,\n", + " \"fluorescence\": fluorescence,\n", + " \"timestamps\": timestamps,\n", + " \"stim_time\": stim_time,\n", + " \"file\": asset.path.split('/')[-1]\n", + " })\n", + "\n", + " # Get calibration values for this file\n", + " Fmax, Fmin = get_calibration_values(trials_data)\n", + " \n", + " # Process each trial\n", + " for trial in trials_data:\n", + " # Skip calibration trials for experimental analysis\n", + " if trial[\"treatment\"] in [\"ACh_calibration\", \"TTX_calibration\"]:\n", + " continue\n", + " \n", + " # Skip trials that don't match the requested stimulation type\n", + " if trial[\"stimulation\"] != stimulation_type:\n", + " continue\n", + "\n", + " # Process trial\n", + " trial_data = process_acetylcholine_trial(trial[\"timestamps\"], trial[\"fluorescence\"], trial[\"stim_time\"])\n", + "\n", + " # Normalize\n", + " trial_data = normalize_fluorescence(trial_data, Fmax, Fmin, trial[\"stim_time\"])\n", + "\n", + " # Store results\n", + " all_trial_data.append({\n", + " \"condition\": trial[\"condition\"],\n", + " \"treatment\": trial[\"treatment\"],\n", + " \"subject_id\": trial[\"subject_id\"],\n", + " \"session_id\": trial[\"session_id\"],\n", + " \"series_name\": trial[\"series_name\"],\n", + " \"F0\": trial_data[\"F0\"],\n", + " \"auc_unnormalized\": trial_data[\"auc\"],\n", + " \"auc_normalized\": trial_data[\"auc_normalized\"],\n", + " \"peak_unnormalized\": trial_data[\"peak_response\"],\n", + " \"peak_normalized\": trial_data[\"peak_normalized\"],\n", + " \"file\": trial[\"file\"],\n", + " })\n", + " \n", + " tqdm.write(f\" Processed {len(trials_data)} trials\")\n", + " \n", + " # Close file\n", + " io.close()\n", + " file.close()\n", + "\n", + "# Create DataFrame\n", + "df = pd.DataFrame(all_trial_data)\n", + "\n", + "print(f\"\\nData processing complete:\")\n", + "print(f\" Total measurements: {len(df)}\")\n", + "print(f\" Experimental conditions: {df['condition'].nunique()}\")\n", + "\n", + "print(\"\\nMeasurement breakdown by condition and treatment:\")\n", + "for condition in df['condition'].unique():\n", + " condition_data = df[df['condition'] == condition]\n", + " print(f\" {condition}: {len(condition_data)} total measurements\")\n", + " for treatment in condition_data['treatment'].unique():\n", + " treatment_data = condition_data[condition_data['treatment'] == treatment]\n", + " print(f\" {treatment}: n={len(treatment_data)}\")\n", + " print()" + ] + }, + { + "cell_type": "markdown", + "id": "boxplot_header", + "metadata": {}, + "source": [ + "## Figure 5F: Box Plot Visualization\n", + "\n", + "### Acetylcholine Release Dynamics Across Conditions\n", + "\n", + "This plot reproduces Figure 5F showing normalized acetylcholine release (AUC) across three experimental conditions with pharmacological modulation. The analysis reveals how Parkinson's disease pathology and levodopa treatment affect cholinergic signaling dynamics." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "boxplot_cell", + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "\n", + "# Collapse to per-file averages (first two trials per file×treatment)\n", + "\n", + "plot_df = df[df[\"treatment\"].isin([\"control\", \"dopamine\", \"quinpirole\",\n", + "\"sulpiride\"])].copy()\n", + "plot_df = plot_df.dropna(subset=[\"auc_normalized\"]).reset_index().rename(columns={\"index\":\n", + "\"_row_order\"})\n", + "\n", + "# Avoid GroupBy.apply deprecation by sorting then using head(2)\n", + "\n", + "plot_df_sorted = plot_df.sort_values([\"condition\", \"treatment\", \"file\", \"_row_order\"])\n", + "first_two = (\n", + " plot_df_sorted.groupby([\"condition\", \"treatment\", \"file\"], sort=False,\n", + "as_index=False)\n", + " .head(2)\n", + ")\n", + "\n", + "collapsed = (\n", + " first_two.groupby([\"condition\", \"treatment\", \"file\"], as_index=False)\n", + "[\"auc_normalized\"].mean()\n", + ")\n", + "\n", + "# Create Figure 5F style plot (no function)\n", + "\n", + "fig, ax = plt.subplots(1, 1, figsize=(10, 6))\n", + "\n", + "# Conditions and labels (top)\n", + "\n", + "conditions = [\"UL control\", \"6-OHDA\", \"LID off-state\"]\n", + "condition_labels = [\"control\", \"6-OHDA\", \"LID off-state\"]\n", + "\n", + "# Treatments by condition (bottom)\n", + "\n", + "treatments_by_condition = {\n", + " \"UL control\": [\"control\", \"quinpirole\", \"sulpiride\"],\n", + " \"6-OHDA\": [\"control\", \"dopamine\", \"quinpirole\", \"sulpiride\"],\n", + " \"LID off-state\": [\"control\", \"dopamine\", \"quinpirole\", \"sulpiride\"],\n", + "}\n", + "\n", + "# Display labels for treatments\n", + "\n", + "display_label = {\n", + " \"control\": \"control\",\n", + " \"dopamine\": \"+DA\",\n", + " \"quinpirole\": \"+quinpirole\",\n", + " \"sulpiride\": \"+sulpiride\",\n", + "}\n", + "\n", + "# Colors to match paper: black (UL control), red (6-OHDA), blue (LID off)\n", + "\n", + "condition_colors = [\"black\", \"red\", \"blue\"]\n", + "\n", + "# Build dynamic x-positions per group\n", + "\n", + "group_gap = 1\n", + "current_pos = 1\n", + "x_positions = []\n", + "x_labels = []\n", + "group_bounds = [] # (start_pos, end_pos) per condition\n", + "\n", + "all_data = []\n", + "all_colors = []\n", + "\n", + "for i, condition in enumerate(conditions):\n", + " cond_df = collapsed[collapsed[\"condition\"] == condition]\n", + " start_pos = current_pos\n", + " treatments = treatments_by_condition[condition]\n", + "\n", + " for t in treatments:\n", + " series = cond_df[cond_df[\"treatment\"] == t][\"auc_normalized\"]\n", + " all_data.append(series.values if not series.empty else np.array([]))\n", + " all_colors.append(condition_colors[i])\n", + " x_positions.append(current_pos)\n", + " x_labels.append(display_label[t])\n", + " current_pos += 1\n", + "\n", + " end_pos = current_pos - 1\n", + " group_bounds.append((start_pos, end_pos))\n", + " current_pos += group_gap # gap before next condition\n", + "\n", + "# Create box plots\n", + "\n", + "box_plot = ax.boxplot(\n", + " all_data,\n", + " positions=x_positions,\n", + " patch_artist=True,\n", + " boxprops=dict(linewidth=1.5),\n", + " whiskerprops=dict(linewidth=1.5),\n", + " capprops=dict(linewidth=1.5),\n", + " medianprops=dict(color=\"black\", linewidth=2),\n", + " flierprops=dict(marker=\"o\", markersize=3, markeredgecolor=\"black\", alpha=0.7),\n", + " widths=0.6,\n", + ")\n", + "\n", + "# Color boxes and add points\n", + "\n", + "for i, (patch, color) in enumerate(zip(box_plot[\"boxes\"], all_colors)):\n", + " if color == \"black\":\n", + " patch.set_facecolor(\"white\")\n", + " elif color == \"red\":\n", + " patch.set_facecolor(\"#ffcccc\") # Light red\n", + " else:\n", + " patch.set_facecolor(\"#ccccff\") # Light blue\n", + " patch.set_edgecolor(color)\n", + " patch.set_linewidth(1.5)\n", + "\n", + " if len(all_data[i]) > 0:\n", + " x_pos = np.random.normal(x_positions[i], 0.05, len(all_data[i]))\n", + " ax.scatter(x_pos, all_data[i], color=color, alpha=0.7, s=20, zorder=3)\n", + "\n", + "# Y-axis styling\n", + "\n", + "ax.set_ylabel(\"AUC (normalized ΔF/F0*s)\", fontsize=12, fontweight=\"bold\")\n", + "ax.set_ylim(-0.05, 0.7)\n", + "ax.set_yticks([0, 0.2, 0.4, 0.6])\n", + "\n", + "# Dotted baseline at y=0\n", + "\n", + "ax.axhline(y=0, color=\"black\", linestyle=\":\", alpha=0.8, linewidth=1.0)\n", + "\n", + "# X-limits\n", + "\n", + "first_pos = group_bounds[0][0]\n", + "last_pos = group_bounds[-1][1]\n", + "ax.set_xlim(first_pos - 0.5, last_pos + 0.5)\n", + "\n", + "# Bottom: treatment labels\n", + "\n", + "ax.set_xticks(x_positions)\n", + "ax.set_xticklabels(x_labels, rotation=45, ha=\"right\", fontsize=9)\n", + "\n", + "# Top: condition labels centered above groups\n", + "\n", + "group_centers = [0.5 * (s + e) for (s, e) in group_bounds]\n", + "ax_top = ax.twiny()\n", + "ax_top.set_xlim(ax.get_xlim())\n", + "ax_top.set_xticks(group_centers)\n", + "ax_top.set_xticklabels(condition_labels, fontsize=11, fontweight=\"bold\")\n", + "ax_top.tick_params(axis=\"x\", which=\"major\", pad=0)\n", + "for spine in ax_top.spines.values():\n", + " spine.set_visible(False)\n", + "\n", + "# Separators and shading\n", + "\n", + "for (start, end) in group_bounds[:-1]:\n", + " ax.axvline(x=end + 0.5, color=\"lightgray\", linestyle=\"-\", alpha=0.5, linewidth=1)\n", + "\n", + "shade_colors = [\"lightgray\", \"lightcoral\", \"lightblue\"]\n", + "for (start, end), shade in zip(group_bounds, shade_colors):\n", + " ax.axvspan(start - 0.5, end + 0.5, alpha=0.1, color=shade, zorder=0)\n", + "\n", + "# Final styling\n", + "\n", + "ax.spines[\"top\"].set_visible(False)\n", + "ax.spines[\"right\"].set_visible(False)\n", + "ax.spines[\"left\"].set_linewidth(1.5)\n", + "ax.spines[\"bottom\"].set_linewidth(1.5)\n", + "\n", + "plt.tight_layout()\n", + "plt.show()\n", + "\n", + "# Summary (collapsed per-file, control only)\n", + "\n", + "print(\"=== FIGURE 5F: ACETYLCHOLINE RELEASE ANALYSIS ===\")\n", + "control_collapsed = collapsed[collapsed[\"treatment\"] == \"control\"]\n", + "print(\"\\nSummary by Condition (Control Treatment Only):\")\n", + "for condition in [\"UL control\", \"6-OHDA\", \"LID off-state\"]:\n", + " cd = control_collapsed[control_collapsed[\"condition\"] == condition]\n", + " auc_vals = cd[\"auc_normalized\"]\n", + " files = cd[\"file\"].nunique()\n", + " print(f\" {condition}: n={len(auc_vals)} measurements from {files} files\")\n", + " if len(auc_vals) > 0:\n", + " print(f\" Mean ± SEM: {auc_vals.mean():.3f} ± {auc_vals.sem():.3f}\")\n", + " print(f\" Median: {auc_vals.median():.3f}\")\n", + " print(f\" Range: {auc_vals.min():.3f} - {auc_vals.max():.3f}\")\n", + "\n", + "print(f\"\\nTotal per-file datapoints (all treatments): {len(collapsed)}\")\n", + "print(f\"Conditions analyzed: {collapsed['condition'].nunique()}\")\n", + "print(f\"Unique files processed: {collapsed['file'].nunique()}\")" + ] + }, + { + "cell_type": "markdown", + "id": "summary_header", + "metadata": {}, + "source": [ + "## Summary\n", + "\n", + "### Key Findings\n", + "\n", + "This analysis reproduces the key findings from **Figure 5F** of Zhai et al. 2025:\n", + "\n", + "1. **Acetylcholine Release Dynamics**: GRABACh3.0 biosensor measurements reveal condition-dependent changes in cholinergic signaling\n", + "2. **Parkinson's Disease Effects**: 6-OHDA lesions alter baseline acetylcholine release patterns\n", + "3. **Dyskinetic State Changes**: LID off-state shows distinct acetylcholine dynamics compared to control\n", + "4. **Pharmacological Modulation**: Dopamine, quinpirole, and sulpiride treatments reveal receptor-specific effects\n", + "\n", + "### Methodological Notes\n", + "\n", + "- **Biosensor**: GRABACh3.0 genetically encoded acetylcholine indicator\n", + "- **Imaging**: Two-photon microscopy at 920nm excitation, 520nm emission\n", + "- **Stimulation**: Single-pulse electrical stimulation to evoke acetylcholine release\n", + "- **Normalization**: Calibration using acetylcholine (Fmax) and TTX (Fmin) trials\n", + "- **Quantification**: Area under curve (AUC) of normalized fluorescence response\n", + "- **Time Windows**: Baseline (stim_time-1s to stim_time), Response (stim_time to stim_time+2s)\n", + "\n", + "### Biological Significance\n", + "\n", + "The analysis reveals how Parkinson's disease pathology and levodopa-induced dyskinesia affect striatal acetylcholine signaling, providing insights into the cholinergic mechanisms underlying motor control and dysfunction in Parkinson's disease." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "surmeier-lab-to-nwb", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/001538/catalyst_neuro/zhai_2025/how_to_use_this_dataset.ipynb b/001538/catalyst_neuro/zhai_2025/how_to_use_this_dataset.ipynb new file mode 100644 index 0000000..e3a843f --- /dev/null +++ b/001538/catalyst_neuro/zhai_2025/how_to_use_this_dataset.ipynb @@ -0,0 +1,1658 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "fd2ba77f", + "metadata": {}, + "source": [ + "# State-dependent modulation of spiny projection neurons controlslevodopa-induced dyskinesia in a mouse model of Parkinson’s disease\n", + "\n", + "**This Dataset (DANDI:001538):**\n", + "This dataset contains neurophysiology data from the Surmeier lab studying levodopa-induced dyskinesia (LID) in Parkinson's disease models. The data spans multiple experimental approaches:\n", + "\n", + "- **Electrophysiology**: Patch-clamp recordings from striatal neurons (dSPNs and iSPNs)\n", + "- **Two-photon imaging**: Dendritic excitability and spine density measurements \n", + "- **Behavioral analysis**: Abnormal involuntary movements (AIMs) and rotational behavior\n", + "- **Pharmacology**: Effects of various receptor agonists and antagonists\n", + "- **Optogenetics**: Selective stimulation of specific neuron populations" + ] + }, + { + "cell_type": "markdown", + "id": "1ba0f2bb", + "metadata": {}, + "source": [ + "One of the nice features of dandi is that the data can be streamed directly anywhere from the world without download (altought data can be loaded as well). To enable this stremaing workflow we need to instantiate a dandi client.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6b0d0a4a", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "\n", + "from dandi.dandiapi import DandiAPIClient\n", + "from dotenv import load_dotenv\n", + "\n", + "# Load environment variables from .env file\n", + "# This is required at the moment because the data is on draft status\n", + "load_dotenv()\n", + "# Load token from environment variable\n", + "token = os.getenv(\"DANDI_API_TOKEN\")\n", + "if not token:\n", + " raise ValueError(\"DANDI_API_TOKEN environment variable not set. Please set it with your DANDI API token.\")\n", + "\n", + "dandiset_id = \"001538\"\n", + "client = DandiAPIClient(token=token)\n", + "client.authenticate(token=token)\n", + "\n", + "dandiset = client.get_dandiset(dandiset_id, \"draft\")\n", + "assets = dandiset.get_assets()\n", + "assets_list = list(assets)" + ] + }, + { + "cell_type": "markdown", + "id": "d8ae805c", + "metadata": {}, + "source": [ + "## Fetching data\n", + "\n", + "Dandisets contain **Assets** which are individual data files within the dataset. For this project, each asset represents one NWB file containing:\n", + "- **One experimental session** from **one subject** (animal)\n", + "- All data streams recorded during that session (voltage, imaging, behavior)\n", + "- Complete experimental metadata and protocols\n", + "- Standardized data organization following NWB format\n", + "\n", + "In general, assets can be fetched by an id or by the path. The path of an NWB file (asset) on DANDI is of the form:\n", + "\n", + "`sub-/sub-_ses-_[desc-]_.nwb`\n", + "\n", + "Which includes the session_id. For this dataset, the session_id was encoded with the following pattern:\n", + "\n", + "`
++++++++++++`\n", + "\n", + "For example, `\"F3++SomExc++iSPN++OffState++none++WT++20160523154318\"` indicates:\n", + "- Figure 3 data\n", + "- Somatic excitability measurement \n", + "- Indirect pathway SPNs (iSPN)\n", + "- Off-state (no levodopa)\n", + "- No pharmacological treatment\n", + "- Wild-type genotype\n", + "- Recorded on May 23, 2016 at 15:43:18\n", + "\n", + "To fetch data only for a certain figure or experimental condition, we will define the following set of utilities that will allow us to filter only the NWB files we are interested in." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b8c6e3a0", + "metadata": {}, + "outputs": [], + "source": [ + "from typing import Literal\n", + "\n", + "# Session ID Parsing Functions\n", + "# These functions decode the rich metadata encoded in DANDI file paths\n", + "\n", + "def get_session_id(asset_path: str) -> str:\n", + " \"\"\"\n", + " Extract session ID from DANDI asset path.\n", + " \n", + " DANDI encodes paths as:\n", + " sub-/sub-_ses-_[desc-]_.nwb\n", + " \n", + " Example path:\n", + " 'sub-SubjectRecordedAt20160523154318/sub-SubjectRecordedAt20160523154318_ses-F3++SomExc++iSPN++OffState++none++WT++20160523154318_icephys.nwb'\n", + " \n", + " The session_id contains experimental metadata separated by '++':\n", + " F3++SomExc++iSPN++OffState++none++WT++20160523154318\n", + " │ │ │ │ │ │ │\n", + " │ │ │ │ │ │ └─ Timestamp\n", + " │ │ │ │ │ └─ Genotype (WT=Wild Type) \n", + " │ │ │ │ └─ Pharmacology (none=no drugs)\n", + " │ │ │ └─ State (OffState=no stimulation)\n", + " │ │ └─ Cell Type (iSPN=indirect pathway spiny projection neuron)\n", + " │ └─ Measurement (SomExc=somatic excitability)\n", + " └─ Figure (F3=Figure 3)\n", + " \"\"\"\n", + " if not asset_path:\n", + " return \"\"\n", + " bottom_level_path = asset_path.split(\"/\")[1] # Remove top level subject\n", + " session_id_with_ses_prefix = bottom_level_path.split(\"_\")[1]\n", + " session_id = session_id_with_ses_prefix.split(\"-\")[1]\n", + "\n", + " return session_id\n", + "\n", + "# Metadata extraction functions - each extracts a specific experimental parameter\n", + "\n", + "def get_figure_number(session_id: str):\n", + " \"\"\"Extract which figure this data corresponds to (F1, F2, F3, etc.)\"\"\"\n", + " return session_id.split(\"++\")[0]\n", + "\n", + "def is_figure_number(session_id: str, figure_number: Literal[\"F1\", \"F2\", \"F3\", \"F4\", \"F5\", \"F6\", \"F7\", \"F8\", \"SF3\"]) -> bool:\n", + " \"\"\"Check if data belongs to a specific figure\"\"\"\n", + " return get_figure_number(session_id) == figure_number\n", + "\n", + "\n", + "def get_measurement(session_id: str) -> str:\n", + " \"\"\"\n", + " Extract measurement type:\n", + " - SomExc: Somatic excitability (patch clamp at cell body)\n", + " - DendExc: Dendritic excitability (patch clamp + 2-photon imaging)\n", + " - DendSpine: Dendritic spine density measurements\n", + " - BehavAIMs: Abnormal involuntary movement behavioral scoring\n", + " - StriAChFP: Striatal acetylcholine fluorescent protein imaging\n", + " \"\"\"\n", + " if not session_id:\n", + " return \"\"\n", + " return session_id.split(\"++\")[1]\n", + "\n", + "def is_measurement(session_id: str, measurement: Literal[\"SomExc\", \"DendExc\", \"DendSpine\", \"DendConfSpine\", \"DendOEPSC\", \"StriAChFP\", \"BehavAIMs\", \"BehavRot\", \"BehavVideo\", \"AChFP\", \"AIMs\", \"ConfSpine\", \"SpineDens\", \"oEPSC\", \"video\"]) -> bool:\n", + " \"\"\"Filter data by measurement/experiment type\"\"\"\n", + " return get_measurement(session_id) == measurement\n", + "\n", + "def get_cell_type(session_id: str) -> str:\n", + " \"\"\"\n", + " Extract cell type:\n", + " - dSPN: Direct pathway spiny projection neurons\n", + " - iSPN: Indirect pathway spiny projection neurons \n", + " - pan: Pan-neuronal (both types)\n", + " \"\"\"\n", + " if not session_id:\n", + " return \"\"\n", + " return session_id.split(\"++\")[2]\n", + "\n", + "def is_cell_type(session_id: str, cell_type: Literal[\"dSPN\", \"iSPN\", \"pan\"]) -> bool:\n", + " \"\"\"Filter data by cell type\"\"\"\n", + " return get_cell_type(session_id) == cell_type\n", + "\n", + "def get_state(session_id: str) -> str:\n", + " \"\"\"\n", + " Extract experimental state:\n", + " - CTRL: Control condition\n", + " - PD: Parkinson's disease model\n", + " - OffState: No levodopa treatment\n", + " - OnState: With levodopa treatment\n", + " \"\"\"\n", + " if not session_id:\n", + " return \"\"\n", + " return session_id.split(\"++\")[3]\n", + "\n", + "def is_state(session_id: str, state: Literal[\"CTRL\", \"LesionedControl\", \"OFF\", \"ON\", \"OffState\", \"OnState\", \"PD\"]) -> bool:\n", + " \"\"\"Filter data by disease/treatment state\"\"\"\n", + " return get_state(session_id) == state\n", + "\n", + "def get_pharmacology(session_id: str) -> str:\n", + " \"\"\"\n", + " Extract pharmacological treatment:\n", + " - none: No drugs applied\n", + " - CNO: Clozapine N-oxide (DREADD activator)\n", + " - D1RaSch: D1 receptor agonist SCH23390\n", + " - M1RaOxoM: M1 receptor agonist Oxotremorine-M\n", + " \"\"\"\n", + " if not session_id:\n", + " return \"\"\n", + " return session_id.split(\"++\")[4]\n", + "\n", + "def is_pharmacology(session_id: str, pharmacology: Literal[\"none\", \"WT\", \"DMSO\", \"CNO\", \"D1RaSch\", \"D2RaSul\", \"M1RaOxoM\", \"M1RaThp\", \"M1RaTri\"]) -> bool:\n", + " \"\"\"Filter data by pharmacological condition\"\"\"\n", + " return get_pharmacology(session_id) == pharmacology\n", + "\n", + "def get_genotype(session_id: str) -> str:\n", + " \"\"\"\n", + " Extract animal genotype:\n", + " - WT: Wild type\n", + " - M1RCRISPR: M1 receptor knockout\n", + " - iSPNM1RKO: iSPN-specific M1 receptor knockout\n", + " \"\"\"\n", + " if not session_id:\n", + " return \"\"\n", + " return session_id.split(\"++\")[5]\n", + "\n", + "def is_genotype(session_id: str, genotype: Literal[\"CDGIKO\", \"M1RCRISPR\", \"WT\", \"iSPN\", \"iSPNM1RKO\"]) -> bool:\n", + " \"\"\"Filter data by genotype\"\"\"\n", + " return get_genotype(session_id) == genotype\n", + "\n", + "def get_timestamp(session_id: str) -> str:\n", + " \"\"\"Extract recording timestamp (YYYYMMDDHHMMSS format)\"\"\"\n", + " if not session_id:\n", + " return \"\"\n", + " return session_id.split(\"++\")[6]\n", + "\n", + "# Example: Parse metadata from a sample file\n", + "sample_asset = assets_list[2]\n", + "session_id = get_session_id(sample_asset.path)\n", + "figure_number = get_figure_number(session_id)\n", + "measurement = get_measurement(session_id)\n", + "cell_type = get_cell_type(session_id)\n", + "state = get_state(session_id)\n", + "pharmacology = get_pharmacology(session_id)\n", + "genotype = get_genotype(session_id)\n", + "timestamp = get_timestamp(session_id)\n", + "\n", + "print(\"Parsed metadata from session ID:\")\n", + "print(f\"Session ID: {session_id}\")\n", + "print(f\"Figure: {figure_number}\")\n", + "print(f\"Measurement: {measurement}\")\n", + "print(f\"Cell Type: {cell_type}\")\n", + "print(f\"State: {state}\")\n", + "print(f\"Pharmacology: {pharmacology}\")\n", + "print(f\"Genotype: {genotype}\")\n", + "print(f\"Timestamp: {timestamp}\")\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "0f0ab6db", + "metadata": {}, + "source": [ + "## Current Clamp Data Access and Organization\n", + "\n", + "### NWB Data Structure for Current Clamp Experiments\n", + "\n", + "Current clamp data in NWB files is organized across several modules:\n", + "- **`acquisition`**: Raw voltage and current time series from individual recording sweeps\n", + "- **`intracellular_recordings`**: High-level table linking stimuli to responses \n", + "- **`stimuli`**: Current injection protocols\n", + "- **`responses`**: Voltage recordings organized by experimental condition\n", + "\n", + "Let's explore how to access and work with this hierarchical data organization using the filters we defined above to extract data from the somatic excitability experiments of figure 1." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "afa1bf9d", + "metadata": {}, + "outputs": [], + "source": [ + "# Using the parsing functions to filter data\n", + "# Here we select somatic excitability experiments from Figure 1\n", + "\n", + "# Define selection criteria using lambda function\n", + "def is_figure_number(session_id: str, figure_number: Literal[\"F1\", \"F2\", \"F3\", \"F4\", \"F5\", \"F6\", \"F7\", \"F8\", \"SF3\"]) -> bool:\n", + " \"\"\"Check if data belongs to a specific figure\"\"\"\n", + " return get_figure_number(session_id) == figure_number\n", + "\n", + "\n", + "def is_measurement(session_id: str, measurement: Literal[\"SomExc\", \"DendExc\", \"DendSpine\", \"DendConfSpine\", \"DendOEPSC\", \"StriAChFP\", \"BehavAIMs\", \"BehavRot\", \"BehavVideo\", \"AChFP\", \"AIMs\", \"ConfSpine\", \"SpineDens\", \"oEPSC\", \"video\"]) -> bool:\n", + " \"\"\"Filter data by measurement/experiment type\"\"\"\n", + " return get_measurement(session_id) == measurement\n", + "\n", + "criteria = lambda asset: is_figure_number(get_session_id(asset.path), \"F1\") and is_measurement(get_session_id(asset.path), \"SomExc\")\n", + "\n", + "# Filter the assets list to get only matching files\n", + "available_assets = [asset for asset in assets_list if criteria(asset)]\n", + "\n", + "print(f\"Found {len(available_assets)} somatic excitability assets from Figure 1:\")\n", + "for i, asset in enumerate(available_assets[:3]): # Show first 3 files\n", + " session_id = get_session_id(asset.path)\n", + " print(f\" {i+1}. {asset.path}\")\n", + " print(f\" Session: {session_id}\")\n", + " print(f\" Cell type: {get_cell_type(session_id)}\")\n", + " print(f\" State: {get_state(session_id)}\")\n", + " print(f\" Genotype: {get_genotype(session_id)}\")\n", + " print()" + ] + }, + { + "cell_type": "markdown", + "id": "345187f6", + "metadata": {}, + "source": [ + "### Streaming NWB Files from DANDI\n", + "\n", + "Here we demonstrate **streaming access** to NWB files directly from\n", + "DANDI's cloud storage. This approach enables:\n", + "\n", + "- **No local downloads**: Access data directly over the internet without\n", + "storing files locally\n", + "- **Selective data access**: Read only the parts of the file you need\n", + "(lazy loading)\n", + "- **Memory efficiency**: Work with large datasets without loading\n", + "everything into memory\n", + "- **Global accessibility**: Access data from anywhere with an internet\n", + "connection\n", + "\n", + "This is particularly valuable for large neurophysiology datasets that may\n", + "be too big to download entirely." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "acea359f", + "metadata": {}, + "outputs": [], + "source": [ + "import h5py\n", + "import remfile\n", + "from pynwb import NWBHDF5IO\n", + "\n", + "asset = available_assets[0]\n", + "s3_url = asset.get_content_url(follow_redirects=1, strip_query=False)\n", + "file_system = remfile.File(s3_url)\n", + "file = h5py.File(file_system, mode=\"r\")\n", + "\n", + "io = NWBHDF5IO(file=file)\n", + "nwbfile = io.read()\n", + "\n", + "nwbfile" + ] + }, + { + "cell_type": "markdown", + "id": "ax6oed6hxzs", + "metadata": {}, + "source": [ + "### Exploring the Acquisition Module\n", + "\n", + "The `acquisition` module contains the raw data streams recorded during the experiment. For current clamp experiments, this typically includes multiple `CurrentClampSeries` objects, each representing one recording sweep with a specific current injection." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6b200bc7", + "metadata": {}, + "outputs": [], + "source": [ + "# The Current Clamp Responses are stored in acquisition. Let's check out the available keys\n", + "nwbfile.acquisition.keys()" + ] + }, + { + "cell_type": "markdown", + "id": "edf70eb0", + "metadata": {}, + "source": [ + "### Accessing Individual Time Series\n", + "\n", + "Each `CurrentClampSeries` in the acquisition module represents one recording sweep. Let's access and plot two different sweeps to see how individual time series are stored and retrieved. Notice how each series has its own timestamps and can be accessed independently." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "975f51d8", + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "\n", + "# Create subplots side by side\n", + "fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))\n", + "\n", + "# Plot CurrentClampSeries001 on the left\n", + "current_clamp_response_001 = nwbfile.acquisition[\"CurrentClampSeries001\"]\n", + "timestamps_001 = current_clamp_response_001.get_timestamps()\n", + "unit_001 = current_clamp_response_001.unit\n", + "\n", + "ax1.plot(timestamps_001, current_clamp_response_001.get_data_in_units())\n", + "ax1.set_xlabel(\"Time (s)\")\n", + "ax1.set_ylabel(f\"Voltage ({unit_001})\")\n", + "ax1.set_title(\"CurrentClampSeries001\")\n", + "\n", + "# Plot CurrentClampSeries002 on the right\n", + "current_clamp_response_002 = nwbfile.acquisition[\"CurrentClampSeries002\"]\n", + "timestamps_002 = current_clamp_response_002.get_timestamps()\n", + "unit_002 = current_clamp_response_002.unit\n", + "\n", + "ax2.plot(timestamps_002, current_clamp_response_002.get_data_in_units())\n", + "ax2.set_xlabel(\"Time (s)\")\n", + "ax2.set_ylabel(f\"Voltage ({unit_002})\")\n", + "ax2.set_title(\"CurrentClampSeries002\")\n", + "\n", + "plt.tight_layout()\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "11d0bfbd", + "metadata": {}, + "source": [ + "Note that the timestamps of the units are different as they\n", + "are recorded at different times during the experiment. All the timestamps in NWB are aligned to the same time base (e.g., the start of the recording session). We will see how to align these timestamps in a same base for comparision further down" + ] + }, + { + "cell_type": "markdown", + "id": "6efb536b", + "metadata": {}, + "source": [ + "### Systematic Data Access via Intracellular Recordings Table\n", + "\n", + "While individual sweeps can be accessed from the `acquisition` module, NWB provides a higher-level interface through the `intracellular_recordings` table. This table systematically organizes all stimulus-response pairs, making it easier to:\n", + "\n", + "- Link current injection parameters to voltage responses\n", + "- Access metadata for each recording sweep\n", + "- Perform systematic analysis across all experimental conditions\n", + "- Build frequency-intensity (F-I) curves and other analyses\n", + "\n", + "The table approach is particularly valuable when dealing with many recording sweeps with different experimental parameters." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "23b6fa90", + "metadata": {}, + "outputs": [], + "source": [ + "nwbfile.intracellular_recordings.to_dataframe()" + ] + }, + { + "cell_type": "markdown", + "id": "a8d6a97c", + "metadata": {}, + "source": [ + "We see in the table all the recordings, including their start and end times, as well as the associated metadata such as the electrode location and crucially the input current values." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ebaab249", + "metadata": {}, + "outputs": [], + "source": [ + "currents = nwbfile.intracellular_recordings[\"stimulus_current_pA\"][:]" + ] + }, + { + "cell_type": "markdown", + "id": "0b4a47ab", + "metadata": {}, + "source": [ + "### Accessing Individual Sweep Data Through the Table\n", + "\n", + "The `intracellular_recordings` table provides references to the actual time series data. Each row contains:\n", + "- **Stimulus parameters**: Current amplitude, timing, waveform\n", + "- **Response references**: Links to the voltage recordings in the acquisition module\n", + "- **Metadata**: Electrode properties, recording conditions\n", + "\n", + "Let's extract one specific stimulus-response pair to see how this referencing system works." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ba2c9241", + "metadata": {}, + "outputs": [], + "source": [ + "index = 20\n", + "current_in_pA = currents[index]\n", + "response_reference = nwbfile.intracellular_recordings[\"responses\"][\"response\"][index]\n", + "response = response_reference.timeseries\n", + "response" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "76fc5235", + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "\n", + "number_of_samples = response.data.shape[0]\n", + "sampling_rate = response.rate\n", + "aligned_times = np.arange(number_of_samples) / sampling_rate\n", + "unit = response.unit\n", + "data_in_volts = response.get_data_in_units()\n", + "data_in_millivolts = data_in_volts * 1000 # Convert to millivolts\n", + "\n", + "plt.plot(aligned_times, data_in_millivolts)\n", + "plt.xlabel(\"Time (s) aligned to response start\")\n", + "\n", + "plt.ylabel(f\"Voltage (mV)\")" + ] + }, + { + "cell_type": "markdown", + "id": "4f892ed3", + "metadata": {}, + "source": [ + "### Systematic Access to All Sweeps\n", + "\n", + "The real power of the `intracellular_recordings` table becomes apparent when accessing multiple sweeps systematically. Rather than manually tracking individual `CurrentClampSeries` names, we can iterate through the table to access all stimulus-response pairs.\n", + "\n", + "Note the timestamp alignment: we create artificial timestamps using sampling rate and number of samples, effectively aligning all responses to stimulus onset (t=0). This is crucial for comparing responses across different current injection amplitudes." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bbf3759b", + "metadata": {}, + "outputs": [], + "source": [ + "response_references = nwbfile.intracellular_recordings[\"responses\"][\"response\"]\n", + "\n", + "# Create a colormap for better visualization\n", + "colors = plt.cm.autumn_r(np.linspace(0, 1, len(currents)))\n", + "\n", + "for index, (current_in_pA, response_reference) in enumerate(zip(currents, response_references)):\n", + " # Skip every second plot for clarity\n", + " if index % 2 == 0:\n", + " response = response_reference.timeseries\n", + " number_of_samples = response.data.shape[0]\n", + " sampling_rate = response.rate\n", + " data_in_volts = response.get_data_in_units()\n", + " data_in_millivolts = data_in_volts * 1000 # Convert to millivolts \n", + " aligned_times = np.arange(number_of_samples) / sampling_rate\n", + " plt.plot(aligned_times, data_in_millivolts, \n", + " label=f\"{current_in_pA} pA\", color=colors[index])\n", + "\n", + "plt.xlabel(\"Time (s) aligned to response start\")\n", + "plt.ylabel(\"Voltage (mV)\")\n", + "plt.title(\"Voltage Responses to Stimulus Currents\")\n", + "plt.legend()\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "64affcff", + "metadata": {}, + "source": [ + "We have skipped every second response to compress the map and use a sequential color map with yellow for the smallest current and red for the largest current to showcase the variation in voltage responses more clearly." + ] + }, + { + "cell_type": "markdown", + "id": "77c04c2b", + "metadata": {}, + "source": [ + "## Dendritic Excitability Experiments - Line Scans \n", + "\n", + "### NWB Organization for Two-Photon Imaging Data\n", + "\n", + "Dendritic excitability experiments combine patch-clamp electrophysiology with two-photon microscopy. While the `acquisition` module also contains current clamp data (accessed the same way as above), we'll focus here on the optical physiology components:\n", + "\n", + "**Data stored in `acquisition` module:**\n", + "- **Source Images**: Stored as `Images` container with individual `GrayscaleImage` objects\n", + "- **Line Scan Raw Data**: Stored as `TimeSeries` objects with time × pixel dimensions\n", + "\n", + "**Data stored in `processing/ophys` module:**\n", + "- **ROI Responses**: Stored as `RoiResponseSeries` objects in a `Fluorescence` data interface\n", + "- **Plane Segmentation**: Stored as `PlaneSegmentation` tables containing pixel masks that define ROI locations\n", + "\n", + "These experiments typically include multiple trials for both **distal** and **proximal** dendritic locations, allowing comparison of calcium responses at different distances from the soma." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "189f446e", + "metadata": {}, + "outputs": [], + "source": [ + "criteria = lambda asset: is_figure_number(get_session_id(asset.path), \"F1\") and is_measurement(get_session_id(asset.path), \"DendExc\")\n", + "available_assets = [asset for asset in assets_list if criteria(asset)]" + ] + }, + { + "cell_type": "markdown", + "id": "0m90hkmy5lp", + "metadata": {}, + "source": [ + "### Filtering for Dendritic Excitability Data\n", + "\n", + "First, we filter the assets to find dendritic excitability experiments from Figure 1. These files contain the two-photon imaging data combined with electrophysiology measurements." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "99ebe563", + "metadata": {}, + "outputs": [], + "source": [ + "import h5py\n", + "import remfile\n", + "from pynwb import NWBHDF5IO\n", + "\n", + "# Get one of the assets\n", + "asset = available_assets[0]\n", + "s3_url = asset.get_content_url(follow_redirects=1, strip_query=False)\n", + "file_system = remfile.File(s3_url)\n", + "file = h5py.File(file_system, mode=\"r\")\n", + "io = NWBHDF5IO(file=file, load_namespaces=True)\n", + "nwbfile = io.read()\n", + "nwbfile" + ] + }, + { + "cell_type": "markdown", + "id": "qbpemtkhu7", + "metadata": {}, + "source": [ + "### Streaming a Dendritic Excitability NWB File\n", + "\n", + "We'll stream one of the dendritic excitability files to explore how two-photon imaging data is organized in NWB. Notice how the same streaming approach works for both electrophysiology and imaging data." + ] + }, + { + "cell_type": "markdown", + "id": "931d89a2", + "metadata": {}, + "source": [ + "### Accessing ROI Response Data from the Ophys Module\n", + "\n", + "ROI responses are stored in the `processing/ophys/Fluorescence` data interface as `RoiResponseSeries` objects. Each series represents fluorescence changes over time for a specific region of interest (ROI).\n", + "\n", + "Let's compare distal vs proximal dendritic responses by accessing multiple `RoiResponseSeries` objects. Notice the naming convention that allows us to programmatically identify different experimental conditions." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3706fdd9", + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "from scipy.signal import butter, filtfilt\n", + "\n", + "roi_response_names = [\n", + " \"RoiResponseSeriesFluo4DistalDendrite1Trial001\",\n", + " \"RoiResponseSeriesFluo4DistalDendrite1Trial002\",\n", + " \"RoiResponseSeriesFluo4DistalDendrite1Trial003\",\n", + " \"RoiResponseSeriesFluo4ProximalDendrite1Trial001\",\n", + " \"RoiResponseSeriesFluo4ProximalDendrite1Trial002\",\n", + " \"RoiResponseSeriesFluo4ProximalDendrite1Trial003\",\n", + "]\n", + "\n", + "ophys_module = nwbfile.processing['ophys']\n", + "fluoresence_module = ophys_module.data_interfaces[\"Fluorescence\"]\n", + "\n", + "\n", + "roi_response_proximal = [fluoresence_module[name] for name in roi_response_names if \"Proximal\" in name]\n", + "roi_response_distal = [fluoresence_module[name] for name in roi_response_names if \"Distal\" in name]\n", + "\n", + "\n", + "color_distal, color_proximal = \"tab:blue\", \"tab:orange\"\n", + "\n", + "plt.figure(figsize=(10, 6))\n", + "\n", + "# Distal (filtered, one color; vary linestyle/alpha per trial)\n", + "for i, roi_response in enumerate(roi_response_distal, start=1):\n", + " num_samples = roi_response.data.shape[0]\n", + " sampling_rate = roi_response.rate\n", + " aligned_timestamps = np.arange(num_samples) / sampling_rate\n", + " b, a = butter(4, 10/(0.5*sampling_rate), btype=\"low\") # 10 Hz cutoff\n", + " filtered_response = filtfilt(b, a, roi_response.data[:])\n", + " plt.plot(\n", + " aligned_timestamps, filtered_response,\n", + " color=color_distal,\n", + " label=f\"Distal Trial{roi_response.name[-4:]}\"\n", + " )\n", + "\n", + "# Proximal (filtered, one color; different style per trial)\n", + "for i, roi_response in enumerate(roi_response_proximal, start=1):\n", + " num_samples = roi_response.data.shape[0]\n", + " sampling_rate = roi_response.rate\n", + " aligned_timestamps = np.arange(num_samples) / sampling_rate\n", + " b, a = butter(4, 10/(0.5*sampling_rate), btype=\"low\") # 10 Hz cutoff\n", + " filtered_response = filtfilt(b, a, roi_response.data[:])\n", + " plt.plot(\n", + " aligned_timestamps, filtered_response,\n", + " color=color_proximal,\n", + " label=f\"Proximal Trial{roi_response.name[-4:]}\"\n", + " )\n", + "\n", + "plt.xlabel(\"Time (s)\")\n", + "plt.ylabel(\"Fluorescence (a.u.)\")\n", + "plt.title(\"Distal vs Proximal ROI responses (low-pass 10 Hz)\")\n", + "plt.legend()\n", + "plt.tight_layout()\n", + "plt.show()\n" + ] + }, + { + "cell_type": "markdown", + "id": "2efb4f3d", + "metadata": {}, + "source": [ + "### Line Scans and Fluoresence " + ] + }, + { + "cell_type": "markdown", + "id": "37e96a00", + "metadata": {}, + "source": [ + "### Comprehensive Line Scan Visualization: Linking Images, Raw Data, and ROI Responses\n", + "\n", + "This section demonstrates how to access and visualize the complete line scan data workflow in NWB:\n", + "\n", + "1. **Source Images** (`GrayscaleImage` objects): Anatomical reference showing where line scans were performed\n", + "2. **Line Scan Raw Data** (`TimeSeries` objects): Raw fluorescence measurements over time and space \n", + "3. **ROI Responses** (`RoiResponseSeries` objects): Processed fluorescence time series extracted from specific regions and are linked to them directly.\n", + "\n", + "This demonstrates how NWB maintains the complete data provenance chain from raw measurements to processed analysis results." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1ab71305", + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.colors as mcolors\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "from mpl_toolkits.axes_grid1.inset_locator import inset_axes\n", + "from scipy.signal import butter, filtfilt\n", + "\n", + "line_scan_raw_data_name = \"TimeSeriesLineScanRawFluo4DistalDendrite1Trial001\"\n", + "source_image_name = \"ImageAlexa568DistalDendrite1Trial001\"\n", + "roi_response_series_name = \"RoiResponseSeriesFluo4DistalDendrite1Trial001\"\n", + "\n", + "line_scan_data = nwbfile.acquisition[line_scan_raw_data_name]\n", + "source_images_container = nwbfile.acquisition['ImageLineScanSource']\n", + "source_image = source_images_container[source_image_name]\n", + "fluoresence_module = ophys_module.data_interfaces[\"Fluorescence\"]\n", + "roi_response = fluoresence_module[roi_response_series_name]\n", + "\n", + "# Get line scan coordinates from the ROI response series\n", + "pixel_mask = roi_response.rois[0][\"pixel_mask\"].iloc[0]\n", + " \n", + "print(f\"✓ Extracted line scan coordinates:\")\n", + "print(f\" X range: {pixel_mask['x'].min()} to {pixel_mask['x'].max()}\")\n", + "print(f\" Y range: {pixel_mask['y'].min()} to {pixel_mask['y'].max()}\")\n", + "print(f\" Number of pixels: {len(pixel_mask['x'])}\")\n", + "\n", + "# Define colors for different dyes\n", + "alexa568_color = '#FF4500' # Red-orange color for Alexa Fluor 568 (structural dye)\n", + "fluo4_color = '#32CD32' # Lime green color for Fluo-4 (looks good on white background)\n", + "\n", + "# Create custom colormaps\n", + "alexa568_cmap = mcolors.LinearSegmentedColormap.from_list(\n", + " \"alexa568\", [\"black\", alexa568_color], N=256\n", + ")\n", + "fluo4_cmap = mcolors.LinearSegmentedColormap.from_list(\n", + " \"fluo4\", [\"white\", fluo4_color], N=256\n", + ")\n", + "\n", + "# Get raw line scan data - no filtering\n", + "line_scan_raw_data = line_scan_data.data[:]\n", + "num_samples = line_scan_raw_data.shape[0]\n", + "num_pixels = line_scan_raw_data.shape[1]\n", + "\n", + "print(f\"Line scan data shape: {line_scan_raw_data.shape}\")\n", + "\n", + "# Get ROI response data and create filtered version\n", + "roi_response_data = roi_response.data[:]\n", + "roi_sampling_rate = roi_response.rate\n", + "print(f\"ROI response shape: {roi_response_data.shape}\")\n", + "\n", + "# Simple first-order Butterworth filter for ROI response only\n", + "cutoff_freq = 10 # Hz - appropriate for calcium signals\n", + "b_roi, a_roi = butter(1, cutoff_freq/(0.5*roi_sampling_rate), btype=\"low\") # First order\n", + "roi_response_filtered = filtfilt(b_roi, a_roi, roi_response_data)\n", + "\n", + "# Create figure with source image on left, raw data and fluorescence stacked on right\n", + "fig = plt.figure(figsize=(16, 10))\n", + "gs = fig.add_gridspec(nrows=2, ncols=2, hspace=0.3, wspace=0.25)\n", + "\n", + "# Left: Source image with line scan overlay (Alexa 568 - structural) - no inset\n", + "ax_src = fig.add_subplot(gs[:, 0])\n", + "ax_src.imshow(source_image, cmap=alexa568_cmap, aspect=\"equal\", origin=\"upper\")\n", + "ax_src.set_title(\"Source Image (Alexa 568) with Line Scan Overlay\")\n", + "ax_src.set_xlabel(\"X position (pixels)\")\n", + "ax_src.set_ylabel(\"Y position (pixels)\")\n", + "\n", + "ax_src.plot(pixel_mask[\"x\"], pixel_mask[\"y\"], \"yellow\", linewidth=3, alpha=0.9, label=\"Line scan\")\n", + "ax_src.legend(loc=\"best\")\n", + "\n", + "# Top right: Raw line scan data with Fluo-4 colormap\n", + "ax_raw = fig.add_subplot(gs[0, 1])\n", + "im_raw = ax_raw.imshow(\n", + " line_scan_raw_data.T,\n", + " aspect=\"auto\",\n", + " cmap=fluo4_cmap,\n", + " origin=\"lower\",\n", + " extent=[0, num_samples, 0, num_pixels],\n", + ")\n", + "ax_raw.set_title(\"Line Scan Raw Data (Fluo-4)\")\n", + "ax_raw.set_xlabel(\"Line Scans (samples)\")\n", + "ax_raw.set_ylabel(\"Position along line (pixels)\")\n", + "\n", + "# Ensure same x-limits for raw and ROI plots\n", + "ax_raw.set_xlim(0, num_samples)\n", + "\n", + "# Colorbar for raw data - positioned well inside the plot\n", + "cax_raw = inset_axes(\n", + " ax_raw,\n", + " width=\"3%\",\n", + " height=\"50%\",\n", + " loc=\"upper right\",\n", + " bbox_to_anchor=(-0.15, -0.15, 1, 1),\n", + " bbox_transform=ax_raw.transAxes,\n", + " borderpad=0,\n", + ")\n", + "fig.colorbar(im_raw, cax=cax_raw, label=\"Ca²⁺ fluorescence\")\n", + "\n", + "# Bottom right: Raw and filtered ROI response time series\n", + "ax_roi = fig.add_subplot(gs[1, 1])\n", + "ax_roi.plot(roi_response_data, color=fluo4_color, linewidth=1, alpha=0.6, label=\"Raw\")\n", + "ax_roi.plot(roi_response_filtered, color='darkgreen', linewidth=2, label=f\"Filtered ({cutoff_freq} Hz, 1st order)\")\n", + "ax_roi.set_title(\"ROI Response Time Series (Fluo-4)\")\n", + "ax_roi.set_xlabel(\"Line Scans (samples)\")\n", + "ax_roi.set_ylabel(\"Ca²⁺ fluorescence (a.u.)\")\n", + "ax_roi.legend()\n", + "\n", + "# Match x-limits to the raw line-scan plot\n", + "ax_roi.set_xlim(0, num_samples)\n", + "\n", + "plt.tight_layout()\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "n18sxr480lk", + "metadata": {}, + "source": [ + "## Optogenetic Experiments - oEPSC Analysis\n", + "\n", + "### NWB Organization for Optogenetic Data\n", + "\n", + "Optogenetic experiments combine voltage clamp electrophysiology with optical stimulation to study synaptic responses. These experiments measure optogenetically-evoked postsynaptic currents (oEPSCs) in striatal neurons following ChR2-mediated activation of cortical terminals.\n", + "\n", + "**Data stored in `acquisition` module:**\n", + "- **VoltageClampSeries**: Raw current recordings from voltage clamp experiments\n", + "- **OptogeneticSeries**: Optical stimulation parameters and timing (stored in `stimulus` module)\n", + "\n", + "**Data stored in `intervals` module:**\n", + "- **optogenetic_epochs_table**: Detailed timing information for stimulation and detection windows\n", + "- **Stage definitions**: pre_stimulation, stimulation, detection, post_detection, inter_sweep_interval" + ] + }, + { + "cell_type": "markdown", + "id": "6jy7q6xav9w", + "metadata": {}, + "source": [ + "### Filtering for Optogenetic Data from Figure 2\n", + "\n", + "We use the same filtering approach to identify optogenetic experiments (oEPSC measurements) from Figure 2. These files contain voltage clamp recordings with optical stimulation protocols." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ln4vhly9mc", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter for optogenetic experiments from Figure 2\n", + "criteria_oepsc = lambda asset: is_figure_number(get_session_id(asset.path), \"F2\") and is_measurement(get_session_id(asset.path), \"oEPSC\")\n", + "\n", + "available_oepsc_assets = [asset for asset in assets_list if criteria_oepsc(asset)]\n", + "\n", + "print(f\"Found {len(available_oepsc_assets)} oEPSC assets from Figure 2:\")\n", + "for i, asset in enumerate(available_oepsc_assets[:3]): # Show first 3 files\n", + " session_id = get_session_id(asset.path)\n", + " print(f\" {i+1}. {asset.path}\")\n", + " print(f\" Session: {session_id}\")\n", + " print(f\" Cell type: {get_cell_type(session_id)}\")\n", + " print(f\" State: {get_state(session_id)}\")\n", + " print(f\" Genotype: {get_genotype(session_id)}\")\n", + " print()" + ] + }, + { + "cell_type": "markdown", + "id": "ot45abgxlp", + "metadata": {}, + "source": [ + "### Streaming an NWB file with Optogenetic stimulation\n", + "\n", + "Let's stream one optogenetic file to explore the voltage clamp data organization and optogenetic stimulation protocols." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9phva2ks1qn", + "metadata": {}, + "outputs": [], + "source": [ + "import h5py\n", + "import remfile\n", + "from pynwb import NWBHDF5IO\n", + "\n", + "asset = available_oepsc_assets[0]\n", + "s3_url = asset.get_content_url(follow_redirects=1, strip_query=False)\n", + "file_system = remfile.File(s3_url)\n", + "file = h5py.File(file_system, mode=\"r\")\n", + "\n", + "io = NWBHDF5IO(file=file)\n", + "nwbfile = io.read()\n", + "nwbfile" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0b6428eb", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"Available acquisition keys:\")\n", + "acquisition_keys = list(nwbfile.acquisition.keys())\n", + "print(acquisition_keys)\n", + "\n", + "print(f\"\\nFound {len([k for k in acquisition_keys if 'VoltageClamp' in k])} VoltageClampSeries in acquisition module\")\n", + "print(\"These contain the raw current recordings from voltage clamp experiments\")" + ] + }, + { + "cell_type": "markdown", + "id": "dkik7h32jo4", + "metadata": {}, + "source": [ + "### Optogenetic Epochs Table\n", + "\n", + "The key to understanding optogenetic experiments is the `optogenetic_epochs_table` stored in the `intervals` module. This table provides precise timing information for each experimental phase within each trial, including stimulation and detection windows.\n", + "\n", + "**Column explanations:**\n", + "- **start_time/stop_time**: Precise timing boundaries for each epoch (seconds)\n", + "- **stimulation_on**: Boolean indicating if optical stimulation is active\n", + "- **pulse_length_in_ms**: Duration of each optical pulse (when stimulation_on=True)\n", + "- **period_in_ms**: Inter-pulse interval for pulse trains\n", + "- **number_pulses_per_pulse_train**: Number of pulses in train (1 for single pulses)\n", + "- **number_trains**: Number of pulse trains\n", + "- **intertrain_interval_in_ms**: Time between trains\n", + "- **power_in_mW**: Optical power delivered\n", + "- **optical_fiber_locations**: Description of fiber placement\n", + "- **stage_name**: Experimental phase (pre_stimulation, stimulation, detection, post_detection, inter_sweep_interval)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9xodvm3bbp", + "metadata": {}, + "outputs": [], + "source": [ + "# Access the optogenetic epochs table\n", + "optogenetics_table_df = nwbfile.intervals[\"optogenetic_epochs_table\"].to_dataframe()\n", + "optogenetics_table_df.head()" + ] + }, + { + "cell_type": "markdown", + "id": "bvjhutt0lju", + "metadata": {}, + "source": [ + "### Visualizing Voltage Clamp Responses with Stimulation Windows\n", + "\n", + "Let's plot two voltage clamp sweeps side-by-side, showing both the stimulation and detection windows that are crucial for oEPSC analysis. The optogenetic epochs table provides precise timing information for each experimental phase." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ssnhxwtqkw9", + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "\n", + "# Get optogenetic timing information\n", + "optogenetics_table_df = nwbfile.intervals[\"optogenetic_epochs_table\"].to_dataframe()\n", + "stimulation_entries_df = optogenetics_table_df[optogenetics_table_df[\"stimulation_on\"] == True]\n", + "detection_entries_df = optogenetics_table_df[optogenetics_table_df[\"stage_name\"] == \"detection\"]\n", + "\n", + "# Create subplots side by side\n", + "fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))\n", + "\n", + "for trial_index, (trial_name, ax, title) in enumerate([(1, ax1, \"VoltageClampSeriesSweep001\"), (7, ax2, \"VoltageClampSeriesSweep007\")]):\n", + " # Get voltage clamp response\n", + " series_name = f\"VoltageClampSeriesSweep{trial_index + 1:03d}\"\n", + " voltage_clamp_response = nwbfile.acquisition[series_name]\n", + " timestamps_in_seconds = voltage_clamp_response.get_timestamps()\n", + " data_in_amperes = voltage_clamp_response.get_data_in_units()\n", + " data_in_pico_amperes = data_in_amperes * 1e12\n", + " \n", + " # Get timing info for this trial\n", + " stimulation_info = stimulation_entries_df.iloc[trial_index]\n", + " detection_info = detection_entries_df.iloc[trial_index]\n", + " \n", + " stimulation_start_ms = stimulation_info[\"start_time\"] * 1000\n", + " stimulation_stop_ms = stimulation_info[\"stop_time\"] * 1000\n", + " detection_start_ms = detection_info[\"start_time\"] * 1000\n", + " detection_stop_ms = detection_info[\"stop_time\"] * 1000\n", + " \n", + " # Show data up to 50ms after detection window ends\n", + " timestamps_in_milliseconds = timestamps_in_seconds * 1000\n", + " starting_time = timestamps_in_milliseconds[0]\n", + " end_time = detection_stop_ms + 50\n", + " mask = (timestamps_in_milliseconds >= starting_time) & (timestamps_in_milliseconds <= end_time)\n", + " timestamps_filtered = timestamps_in_milliseconds[mask]\n", + " data_filtered = data_in_pico_amperes[mask]\n", + " \n", + " # Plot the data\n", + " ax.plot(timestamps_filtered, data_filtered, 'b-', linewidth=1)\n", + " ax.axvspan(stimulation_start_ms, stimulation_stop_ms, color=\"cyan\", alpha=0.8, label=\"Stimulation\")\n", + " ax.axvspan(detection_start_ms, detection_stop_ms, color=\"gray\", alpha=0.15, label=\"Detection\")\n", + " \n", + " ax.set_xlabel(\"Time (ms)\")\n", + " ax.set_ylabel(\"Current (pA)\")\n", + " ax.set_title(title)\n", + " \n", + " if trial_index == 0: # Add legend only to first subplot\n", + " ax.legend()\n", + "\n", + "plt.tight_layout()\n", + "plt.show()\n", + "\n", + "print(\"Stimulation and detection windows:\")\n", + "print(f\"Trial 1 - Stimulation: {stimulation_entries_df.iloc[0]['start_time']*1000:.1f}-{stimulation_entries_df.iloc[0]['stop_time']*1000:.1f} ms\")\n", + "print(f\"Trial 1 - Detection: {detection_entries_df.iloc[0]['start_time']*1000:.1f}-{detection_entries_df.iloc[0]['stop_time']*1000:.1f} ms\")" + ] + }, + { + "cell_type": "markdown", + "id": "kbaaafpi5", + "metadata": {}, + "source": [ + "### Event Detection and Analysis\n", + "\n", + "#### MAD-Based Noise Estimation for Robust Event Detection\n", + "\n", + "For accurate oEPSC event detection, we use **Median Absolute Deviation (MAD)** instead of standard deviation to estimate baseline noise. This approach is more robust to outliers (including the events themselves) and provides better threshold estimates.\n", + "\n", + "**Key parameters:**\n", + "- **Detection window shift**: 100ms after original detection start (to avoid stimulation artifacts)\n", + "- **Event threshold**: ±5 × MAD-based standard deviation from median\n", + "- **Event merging**: Combine events within 1ms (handles multi-threshold crossings from single events)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "s5t8nmaemd", + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "\n", + "# Control parameters for event detection\n", + "detection_window_shift_ms = 100 # Shift detection window start by this many ms\n", + "event_merge_distance_ms = 1.0 # Merge events within this distance\n", + "\n", + "def merge_nearby_events(event_times, event_amplitudes, merge_distance_ms):\n", + " \"\"\"Merge events within merge_distance_ms, keeping maximum amplitude\"\"\"\n", + " if len(event_times) == 0:\n", + " return event_times, event_amplitudes\n", + " \n", + " times = np.array(event_times)\n", + " amplitudes = np.array(event_amplitudes)\n", + " sorted_indices = np.argsort(times)\n", + " times = times[sorted_indices]\n", + " amplitudes = amplitudes[sorted_indices]\n", + " \n", + " merged_times = []\n", + " merged_amplitudes = []\n", + " \n", + " i = 0\n", + " while i < len(times):\n", + " current_time = times[i]\n", + " current_amp = amplitudes[i]\n", + " \n", + " # Find all events within merge_distance_ms\n", + " j = i + 1\n", + " max_amp = current_amp\n", + " max_amp_time = current_time\n", + " \n", + " while j < len(times) and (times[j] - current_time) <= merge_distance_ms:\n", + " if amplitudes[j] > max_amp:\n", + " max_amp = amplitudes[j]\n", + " max_amp_time = times[j]\n", + " j += 1\n", + " \n", + " merged_times.append(max_amp_time)\n", + " merged_amplitudes.append(max_amp)\n", + " i = j\n", + " \n", + " return merged_times, merged_amplitudes\n", + "\n", + "# Demonstrate event detection on one sweep\n", + "trial_index = 0\n", + "series_name = f\"VoltageClampSeriesSweep{trial_index + 1:03d}\"\n", + "voltage_clamp_response = nwbfile.acquisition[series_name]\n", + "timestamps_in_seconds = voltage_clamp_response.get_timestamps()\n", + "data_in_amperes = voltage_clamp_response.get_data_in_units()\n", + "data_in_pico_amperes = data_in_amperes * 1e12\n", + "\n", + "# Get timing information\n", + "stimulation_info = stimulation_entries_df.iloc[trial_index]\n", + "detection_info = detection_entries_df.iloc[trial_index]\n", + "\n", + "detection_start_ms_original = detection_info[\"start_time\"] * 1000\n", + "detection_stop_ms = detection_info[\"stop_time\"] * 1000\n", + "detection_start_ms_shifted = detection_start_ms_original + detection_window_shift_ms\n", + "\n", + "# Filter data to shifted detection window\n", + "timestamps_in_milliseconds = timestamps_in_seconds * 1000\n", + "detection_mask = (timestamps_in_milliseconds >= detection_start_ms_shifted) & (timestamps_in_milliseconds <= detection_stop_ms)\n", + "detection_data = data_in_pico_amperes[detection_mask]\n", + "detection_timestamps = timestamps_in_milliseconds[detection_mask]\n", + "\n", + "if len(detection_data) > 0:\n", + " # Calculate MAD-based standard deviation\n", + " noise_median = np.median(detection_data)\n", + " mad = np.median(np.abs(detection_data - noise_median))\n", + " mad_std = mad * 1.4826 # MAD to standard deviation conversion\n", + " \n", + " event_threshold_positive = noise_median + 5 * mad_std\n", + " event_threshold_negative = noise_median - 5 * mad_std\n", + " \n", + " # Find events\n", + " positive_event_indices = np.where(detection_data > event_threshold_positive)[0]\n", + " negative_event_indices = np.where(detection_data < event_threshold_negative)[0]\n", + " \n", + " # Calculate amplitudes and merge nearby events\n", + " positive_event_times_raw = detection_timestamps[positive_event_indices] if len(positive_event_indices) > 0 else []\n", + " negative_event_times_raw = detection_timestamps[negative_event_indices] if len(negative_event_indices) > 0 else []\n", + " \n", + " positive_event_amplitudes_raw = detection_data[positive_event_indices] - noise_median if len(positive_event_indices) > 0 else []\n", + " negative_event_amplitudes_raw = noise_median - detection_data[negative_event_indices] if len(negative_event_indices) > 0 else []\n", + " \n", + " positive_event_times_merged, positive_event_amplitudes_merged = merge_nearby_events(\n", + " positive_event_times_raw, positive_event_amplitudes_raw, event_merge_distance_ms)\n", + " negative_event_times_merged, negative_event_amplitudes_merged = merge_nearby_events(\n", + " negative_event_times_raw, negative_event_amplitudes_raw, event_merge_distance_ms)\n", + " \n", + " print(f\"Event Detection Results for {series_name}:\")\n", + " print(f\" Noise median: {noise_median:.2f} pA\")\n", + " print(f\" MAD-based std: {mad_std:.2f} pA\")\n", + " print(f\" Positive threshold: {event_threshold_positive:.2f} pA\")\n", + " print(f\" Negative threshold: {event_threshold_negative:.2f} pA\")\n", + " print(f\" Raw events: +{len(positive_event_amplitudes_raw)}, -{len(negative_event_amplitudes_raw)}\")\n", + " print(f\" Merged events: +{len(positive_event_amplitudes_merged)}, -{len(negative_event_amplitudes_merged)}\")\n", + " \n", + " if len(positive_event_amplitudes_merged) > 0:\n", + " print(f\" Positive event amplitudes: {[f'{amp:.1f}' for amp in positive_event_amplitudes_merged]} pA\")\n", + " if len(negative_event_amplitudes_merged) > 0:\n", + " print(f\" Negative event amplitudes: {[f'{amp:.1f}' for amp in negative_event_amplitudes_merged]} pA\")\n", + " \n", + "else:\n", + " print(f\"No data in detection window for {series_name}\")" + ] + }, + { + "cell_type": "markdown", + "id": "5l1d6tx6nnt", + "metadata": {}, + "source": [ + "### Comprehensive Event Analysis Grid\n", + "\n", + "Finally, let's analyze all voltage clamp sweeps in this session systematically, showing event detection across all trials in a grid format. This provides a complete overview of optogenetic responses throughout the experimental session." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "igsch3uqx1j", + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "\n", + "# Get all acquisition keys for voltage clamp series\n", + "acquisition_keys = [key for key in nwbfile.acquisition.keys() if key.startswith(\"VoltageClampSeries\")]\n", + "num_sweeps = len(acquisition_keys)\n", + "\n", + "# Calculate grid dimensions\n", + "cols = 3\n", + "rows = (num_sweeps + cols - 1) // cols\n", + "\n", + "# Create subplots grid\n", + "fig, axes = plt.subplots(rows, cols, figsize=(15, 5*rows))\n", + "axes = axes.flatten() if num_sweeps > 1 else [axes]\n", + "\n", + "# Initialize summary statistics\n", + "all_positive_amplitudes = []\n", + "all_negative_amplitudes = []\n", + "total_events = 0\n", + "\n", + "# Process each sweep\n", + "for i, sweep_key in enumerate(acquisition_keys):\n", + " trial_number = int(sweep_key.split('Sweep')[-1])\n", + " \n", + " # Get voltage clamp response\n", + " voltage_clamp_response = nwbfile.acquisition[sweep_key]\n", + " timestamps_in_seconds = voltage_clamp_response.get_timestamps()\n", + " data_in_amperes = voltage_clamp_response.get_data_in_units()\n", + " data_in_pico_amperes = data_in_amperes * 1e12\n", + " \n", + " # Get timing information for this trial\n", + " if trial_number <= len(stimulation_entries_df) and trial_number <= len(detection_entries_df):\n", + " stimulation_info = stimulation_entries_df.iloc[trial_number - 1]\n", + " detection_info = detection_entries_df.iloc[trial_number - 1]\n", + " \n", + " stimulation_start_ms = stimulation_info[\"start_time\"] * 1000\n", + " stimulation_stop_ms = stimulation_info[\"stop_time\"] * 1000\n", + " detection_start_ms_original = detection_info[\"start_time\"] * 1000\n", + " detection_stop_ms = detection_info[\"stop_time\"] * 1000\n", + " detection_start_ms_shifted = detection_start_ms_original + detection_window_shift_ms\n", + " \n", + " # Show data up to 50ms after detection window ends\n", + " timestamps_in_milliseconds = timestamps_in_seconds * 1000\n", + " starting_time = timestamps_in_milliseconds[0]\n", + " end_time = detection_stop_ms + 50\n", + " mask = (timestamps_in_milliseconds >= starting_time) & (timestamps_in_milliseconds <= end_time)\n", + " timestamps_filtered = timestamps_in_milliseconds[mask]\n", + " data_filtered = data_in_pico_amperes[mask]\n", + " \n", + " # Calculate noise statistics and detect events\n", + " detection_mask = (timestamps_filtered >= detection_start_ms_shifted) & (timestamps_filtered <= detection_stop_ms)\n", + " detection_data = data_filtered[detection_mask]\n", + " detection_timestamps = timestamps_filtered[detection_mask]\n", + " \n", + " if len(detection_data) > 0:\n", + " # MAD-based noise estimation\n", + " noise_median = np.median(detection_data)\n", + " mad = np.median(np.abs(detection_data - noise_median))\n", + " mad_std = mad * 1.4826\n", + " \n", + " event_threshold_positive = noise_median + 5 * mad_std\n", + " event_threshold_negative = noise_median - 5 * mad_std\n", + " \n", + " # Find and merge events\n", + " positive_event_indices = np.where(detection_data > event_threshold_positive)[0]\n", + " negative_event_indices = np.where(detection_data < event_threshold_negative)[0]\n", + " \n", + " positive_event_times_raw = detection_timestamps[positive_event_indices] if len(positive_event_indices) > 0 else []\n", + " negative_event_times_raw = detection_timestamps[negative_event_indices] if len(negative_event_indices) > 0 else []\n", + " \n", + " positive_event_amplitudes_raw = detection_data[positive_event_indices] - noise_median if len(positive_event_indices) > 0 else []\n", + " negative_event_amplitudes_raw = noise_median - detection_data[negative_event_indices] if len(negative_event_indices) > 0 else []\n", + " \n", + " # Merge nearby events\n", + " positive_event_times_merged, positive_event_amplitudes_merged = merge_nearby_events(\n", + " positive_event_times_raw, positive_event_amplitudes_raw, event_merge_distance_ms)\n", + " negative_event_times_merged, negative_event_amplitudes_merged = merge_nearby_events(\n", + " negative_event_times_raw, negative_event_amplitudes_raw, event_merge_distance_ms)\n", + " \n", + " # Collect amplitudes for summary\n", + " all_positive_amplitudes.extend(positive_event_amplitudes_merged)\n", + " all_negative_amplitudes.extend(negative_event_amplitudes_merged)\n", + " \n", + " sweep_events = len(positive_event_amplitudes_merged) + len(negative_event_amplitudes_merged)\n", + " total_events += sweep_events\n", + " \n", + " # Plot\n", + " ax = axes[i]\n", + " ax.plot(timestamps_filtered, data_filtered, 'b-', linewidth=1)\n", + " ax.axvspan(stimulation_start_ms, stimulation_stop_ms, color=\"cyan\", alpha=0.8, label=\"Stimulation\")\n", + " ax.axvspan(detection_start_ms_original, detection_stop_ms, color=\"gray\", alpha=0.05, label=\"Original Detection\")\n", + " ax.axvspan(detection_start_ms_shifted, detection_stop_ms, color=\"lightblue\", alpha=0.15, label=\"Shifted Detection\")\n", + " ax.axhline(y=noise_median, color='black', linestyle='-', alpha=0.5, linewidth=1)\n", + " ax.axhline(y=event_threshold_positive, color='red', linestyle='--', alpha=0.7, linewidth=1)\n", + " ax.axhline(y=event_threshold_negative, color='red', linestyle='--', alpha=0.7, linewidth=1)\n", + " \n", + " # Get plot limits for event markers\n", + " y_min, y_max = ax.get_ylim()\n", + " \n", + " # Add event markers\n", + " for event_time in positive_event_times_merged:\n", + " ax.plot([event_time, event_time], [event_threshold_positive, y_max], \n", + " color='red', alpha=0.8, linewidth=2)\n", + " \n", + " for event_time in negative_event_times_merged:\n", + " ax.plot([event_time, event_time], [y_min, event_threshold_negative], \n", + " color='red', alpha=0.8, linewidth=2)\n", + " \n", + " ax.set_xlabel(\"Time (ms)\")\n", + " ax.set_ylabel(\"Current (pA)\")\n", + " ax.set_title(f\"{sweep_key} (Events: {sweep_events})\")\n", + " \n", + " # Add legend only to first subplot\n", + " if i == 0:\n", + " ax.legend(loc='upper right', fontsize=8)\n", + "\n", + "# Hide empty subplots\n", + "for i in range(num_sweeps, len(axes)):\n", + " axes[i].set_visible(False)\n", + "\n", + "plt.tight_layout()\n", + "plt.show()\n", + "\n", + "# Print comprehensive summary\n", + "print(f\"\\n=== COMPREHENSIVE oEPSC ANALYSIS SUMMARY ===\")\n", + "print(f\"Session: {get_session_id(asset.path)}\")\n", + "print(f\"Total sweeps analyzed: {num_sweeps}\")\n", + "print(f\"Total events detected: {total_events}\")\n", + "print(f\"Positive events: {len(all_positive_amplitudes)}\")\n", + "print(f\"Negative events: {len(all_negative_amplitudes)}\")\n", + "\n", + "if len(all_positive_amplitudes) > 0:\n", + " print(f\"\\nPositive event statistics:\")\n", + " print(f\" Mean amplitude: {np.mean(all_positive_amplitudes):.2f} ± {np.std(all_positive_amplitudes):.2f} pA\")\n", + " print(f\" Median amplitude: {np.median(all_positive_amplitudes):.2f} pA\")\n", + " print(f\" Range: {np.min(all_positive_amplitudes):.1f} to {np.max(all_positive_amplitudes):.1f} pA\")\n", + "\n", + "if len(all_negative_amplitudes) > 0:\n", + " print(f\"\\nNegative event statistics:\")\n", + " print(f\" Mean amplitude: {np.mean(all_negative_amplitudes):.2f} ± {np.std(all_negative_amplitudes):.2f} pA\")\n", + " print(f\" Median amplitude: {np.median(all_negative_amplitudes):.2f} pA\")\n", + " print(f\" Range: {np.min(all_negative_amplitudes):.1f} to {np.max(all_negative_amplitudes):.1f} pA\")\n", + "\n", + "print(f\"\\nAnalysis parameters:\")\n", + "print(f\" Detection window shift: {detection_window_shift_ms} ms\")\n", + "print(f\" Event merge distance: {event_merge_distance_ms} ms\")\n", + "print(f\" Event threshold: ±5 x MAD-based standard deviation\")" + ] + }, + { + "cell_type": "markdown", + "id": "5cc9930f", + "metadata": {}, + "source": [ + "# Acetylcholine Biosensor Experiments - GRABACh3.0 Imaging\n", + "\n", + "## NWB Organization for Biosensor Data\n", + "\n", + "GRABACh3.0 acetylcholine biosensor experiments measure cholinergic signaling dynamics using genetically encoded fluorescent sensors. These experiments reveal state-dependent modulation of acetylcholine release in Parkinson's disease and levodopa-induced dyskinesia.\n", + "\n", + "**Key experimental parameters:**\n", + "- **Biosensor**: GRABACh3.0 genetically encoded acetylcholine indicator\n", + "- **Imaging**: Two-photon microscopy at 920nm excitation, 520nm emission \n", + "- **Target region**: Dorsal striatum\n", + "- **Stimulation**: Electrical stimulation to evoke acetylcholine release\n", + "- **Experimental conditions**: Control (CTRL), Parkinsonian (PD/6-OHDA), Dyskinetic off-state (OFF)\n", + "\n", + "**Data stored in `processing/ophys` module:**\n", + "- **ROI Response Series**: Processed fluorescence traces from regions of interest containing biosensor signal\n", + "- **Plane Segmentation**: ROI definitions showing areas of biosensor expression\n", + "- **Trials Table**: Experimental metadata including stimulation parameters, treatments, and timing\n", + "\n", + "**Analysis workflow:**\n", + "1. Extract fluorescence time series from ROI response series\n", + "2. Apply background subtraction and baseline normalization\n", + "3. Calculate area under curve (AUC) for stimulus-evoked responses\n", + "4. Normalize using calibration values (Fmax from acetylcholine, Fmin from TTX)\n", + "5. Compare responses across experimental conditions\n", + "\n", + "### Filtering for Acetylcholine Biosensor Data from Figure 5\n", + "\n", + "These experiments examine acetylcholine release dynamics across three experimental conditions that model different stages of Parkinson's disease pathology:\n", + "- **CTRL**: Control unlesioned mice with normal cholinergic signaling\n", + "- **PD**: Parkinsonian mice (6-OHDA lesioned, no levodopa treatment) showing reduced cholinergic tone\n", + "- **OFF**: Dyskinetic mice in off-state (24-48h after chronic levodopa) with altered cholinergic dynamics" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3e893fdf", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter for acetylcholine biosensor experiments from Figure 5\n", + "# Figure 5 examines acetylcholine release using GRABACh3.0 biosensor across disease states\n", + "criteria_oepsc = lambda asset: is_figure_number(get_session_id(asset.path), \"F5\")\n", + "\n", + "available_oepsc_assets = [asset for asset in assets_list if criteria_oepsc(asset)]\n", + "\n", + "print(f\"Found {len(available_oepsc_assets)} acetylcholine biosensor assets from Figure 5:\")\n", + "\n", + "# Display metadata for each NWB file to understand experimental design\n", + "for i, asset in enumerate(available_oepsc_assets[:]): \n", + " session_id = get_session_id(asset.path)\n", + " \n", + " # Parse session ID components:\n", + " # F5 = Figure 5, AChFP = Acetylcholine Fluorescent Protein (GRABACh3.0)\n", + " # pan = pan-striatal expression, CTRL/PD/OFF = experimental condition\n", + " print(f\" {i+1}. {asset.path}\")\n", + " print(f\" Session: {session_id}\")\n", + " print(f\" Cell type: {get_cell_type(session_id)}\") # Shows biosensor expression pattern\n", + " print(f\" State: {get_state(session_id)}\") # CTRL, PD (6-OHDA), or OFF (dyskinetic)\n", + " print(f\" Genotype: {get_genotype(session_id)}\") # WT mice with biosensor expression\n", + " print()" + ] + }, + { + "cell_type": "markdown", + "id": "od382alcdfg", + "metadata": {}, + "source": [ + "### Streaming Acetylcholine Biosensor Data\n", + "\n", + "The NWB files contain fluorescence time series from GRABACh3.0 biosensor experiments. Each file includes:\n", + "- **Trials table**: Metadata about stimulation timing, treatment conditions, and experimental parameters\n", + "- **ROI Response Series**: Processed fluorescence traces from regions of interest" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "tlagzx4bd9o", + "metadata": {}, + "outputs": [], + "source": [ + "import h5py\n", + "import remfile\n", + "from pynwb import NWBHDF5IO\n", + "\n", + "asset = available_oepsc_assets[0]\n", + "s3_url = asset.get_content_url(follow_redirects=1, strip_query=False)\n", + "file_system = remfile.File(s3_url)\n", + "file = h5py.File(file_system, mode=\"r\")\n", + "\n", + "io = NWBHDF5IO(file=file)\n", + "nwbfile = io.read()\n", + "nwbfile" + ] + }, + { + "cell_type": "markdown", + "id": "722190f7", + "metadata": {}, + "source": [ + "We can extract the trials table as a data frame with the following method and inspect it." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ca196ce9", + "metadata": {}, + "outputs": [], + "source": [ + "trials_df = nwbfile.trials.to_dataframe()\n", + "trials_df" + ] + }, + { + "cell_type": "markdown", + "id": "8d6ba069", + "metadata": {}, + "source": [ + "And then we can see the structure of the session. Each trial is characterized by a treatment (control, quinpirole, sulpride, etc) a type of stimulation (single_pulse, burst or calibration) and also contains the name of the ROIResponseSeries that corresponds to this." + ] + }, + { + "cell_type": "markdown", + "id": "f7dc6ea3", + "metadata": {}, + "source": [ + "### Accessing Fluorescence Time Series Data\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "70b6913b", + "metadata": {}, + "outputs": [], + "source": [ + "fluorescence_module = nwbfile.processing[\"ophys\"][\"Fluorescence\"]\n", + "fluorescence_module" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "27b0d3b1", + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "\n", + "trials_df = nwbfile.trials.to_dataframe()\n", + "trial_index = 10\n", + "trial_row = trials_df.iloc[trial_index]\n", + "stimulus_start_time = trial_row[\"stimulus_start_time\"]\n", + "treatment = trial_row[\"treatment\"]\n", + "roi_response_series_name = trial_row[\"roi_series_name\"]\n", + "stimulation = trial_row[\"stimulation\"]\n", + "\n", + "roi_response_series = fluorescence_module[roi_response_series_name]\n", + "\n", + "\n", + "timestamps = roi_response_series.get_timestamps()\n", + "data = roi_response_series.data[:]\n", + "\n", + "plt.plot(timestamps, data)\n", + "plt.axvline(stimulus_start_time, color='r', linestyle='--')\n", + "plt.title(f\"{treatment} - {stimulation}\")\n", + "plt.ylabel(\"Fluorescence (a.u.)\")\n", + "plt.xlabel(\"Time (s)\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d4db8dd4", + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "\n", + "# pick 6 random trials that have a stimulus_start_time\n", + "valid_idx = trials_df[trials_df[\"stimulus_start_time\"].notna()].index.values\n", + "rng = np.random.default_rng(42)\n", + "chosen = rng.choice(valid_idx, size=6, replace=False)\n", + "\n", + "fig, axes = plt.subplots(2, 3, figsize=(15, 8), sharex=False, sharey=True)\n", + "axes = axes.ravel()\n", + "\n", + "# compute global y-limits across the chosen series (skip missing)\n", + "y_mins, y_maxs = [], []\n", + "for tid in chosen:\n", + " row = trials_df.loc[tid]\n", + " roi_name = row[\"roi_series_name\"]\n", + " series = fluorescence_module[roi_name]\n", + "\n", + " y = series.data[:]\n", + " y_mins.append(np.nanmin(y))\n", + " y_maxs.append(np.nanmax(y))\n", + "\n", + "ymin = float(np.min(y_mins))\n", + "ymax = float(np.max(y_maxs))\n", + "pad = 0.05 * (ymax - ymin) if ymax > ymin else 1.0\n", + "ymin -= pad\n", + "ymax += pad\n", + "\n", + "for ax, tid in zip(axes, chosen):\n", + " row = trials_df.loc[tid]\n", + " roi_name = row[\"roi_series_name\"]\n", + " stim_t = float(row[\"stimulus_start_time\"])\n", + " series = fluorescence_module[roi_name]\n", + "\n", + " ts = series.get_timestamps()\n", + " y = series.data[:]\n", + " rel_ts = ts - stim_t\n", + "\n", + " ax.plot(rel_ts, y, color=\"C0\", lw=1)\n", + " ax.axvline(0.0, color=\"r\", linestyle=\"--\", lw=1)\n", + " ax.set_title(f\"Observation {tid}: {row['treatment']} — {row['stimulation']}\", fontsize=10)\n", + " ax.set_xlabel(\"Time from stimulus (s)\")\n", + " ax.set_ylabel(\"Fluorescence (a.u.)\")\n", + " ax.set_ylim(ymin, ymax)\n", + "\n", + "# overall layout\n", + "plt.tight_layout(rect=[0, 0, 1, 0.96])\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d840665e", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "226b85d3", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "21c4724a", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "05dd3079", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "surmeier-lab-to-nwb", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From e14dedc97457b64560440ec62133e69bdc8fc0a8 Mon Sep 17 00:00:00 2001 From: Heberto Mayorquin Date: Wed, 3 Sep 2025 13:34:54 -0600 Subject: [PATCH 2/2] fix typos --- .../zhai_2025/figure_1E_dspn_somatic_excitability.ipynb | 6 +++--- .../catalyst_neuro/zhai_2025/how_to_use_this_dataset.ipynb | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/001538/catalyst_neuro/zhai_2025/figure_1E_dspn_somatic_excitability.ipynb b/001538/catalyst_neuro/zhai_2025/figure_1E_dspn_somatic_excitability.ipynb index b23a9ed..086038e 100644 --- a/001538/catalyst_neuro/zhai_2025/figure_1E_dspn_somatic_excitability.ipynb +++ b/001538/catalyst_neuro/zhai_2025/figure_1E_dspn_somatic_excitability.ipynb @@ -176,7 +176,7 @@ ") -> int:\n", " \"\"\"Threshold-crossing spike count within a 500 ms stimulus window.\n", "\n", - " Matches approach in the original analysis; uses 200–700 ms from sweep start.\n", + " Matches approach in the original analysis; uses 200-700 ms from sweep start.\n", " \"\"\"\n", " if voltage_trace_mV.size == 0:\n", " return 0\n", @@ -321,8 +321,8 @@ " # Process each recording sweep\n", " for _, row in rec_df.iterrows():\n", " # Get protocol step and current\n", - " protocol_step = row.get((\"intracellular_recordings\", \"protocol_step\"), None)\n", - " current_pA = row.get((\"intracellular_recordings\", \"stimulus_current_pA\"), None)\n", + " protocol_step = row.get((\"intracellular_recordings\", \"protocol_step\"))\n", + " current_pA = row.get((\"intracellular_recordings\", \"stimulus_current_pA\"))\n", " \n", " response_ref = row[(\"responses\", \"response\")]\n", " ts = response_ref.timeseries\n", diff --git a/001538/catalyst_neuro/zhai_2025/how_to_use_this_dataset.ipynb b/001538/catalyst_neuro/zhai_2025/how_to_use_this_dataset.ipynb index e3a843f..320df12 100644 --- a/001538/catalyst_neuro/zhai_2025/how_to_use_this_dataset.ipynb +++ b/001538/catalyst_neuro/zhai_2025/how_to_use_this_dataset.ipynb @@ -22,7 +22,7 @@ "id": "1ba0f2bb", "metadata": {}, "source": [ - "One of the nice features of dandi is that the data can be streamed directly anywhere from the world without download (altought data can be loaded as well). To enable this stremaing workflow we need to instantiate a dandi client.\n" + "One of the nice features of dandi is that the data can be streamed directly anywhere from the world without download (although data can be loaded as well). To enable this streaming workflow we need to instantiate a dandi client.\n" ] }, { @@ -419,7 +419,7 @@ "metadata": {}, "source": [ "Note that the timestamps of the units are different as they\n", - "are recorded at different times during the experiment. All the timestamps in NWB are aligned to the same time base (e.g., the start of the recording session). We will see how to align these timestamps in a same base for comparision further down" + "are recorded at different times during the experiment. All the timestamps in NWB are aligned to the same time base (e.g., the start of the recording session). We will see how to align these timestamps in a same base for comparison further down" ] }, {