Update tuto about clustering statistics

deep-introspection · deep-introspection · commit cd5bdced16cc · 2025-03-12T15:32:49.000-04:00
diff --git a/tutorial/UCLAWorkshop/06 - Statistics.ipynb b/tutorial/UCLAWorkshop/06 - Statistics.ipynb
@@ -29,6 +29,7 @@
     "import mne\n",
     "import matplotlib.pyplot as plt\n",
     "import scipy.sparse as sp\n",
+    "import scipy.stats as sp_stats\n",
     "from collections import OrderedDict\n",
     "\n",
     "# Import HyPyP modules\n",
@@ -140,10 +141,12 @@
    "source": [
     "# Simulate multiple samples (e.g., across subjects or trials)\n",
     "n_samples = 20\n",
-    "# Flatten the PSD for each participant; assume n_tests = number of channels\n",
-    "psd_flat = data_psd.reshape(2, -1)  # shape: (2, n_tests)\n",
+    "noise_level = 1e-6\n",
+    "\n",
+    "psd = data_psd\n",
+    "\n",
     "# Create a sample data array by replicating and adding slight random noise\n",
-    "data_sample = np.array([psd_flat[0] + 1e-6 * np.random.randn(psd_flat.shape[1]) for _ in range(n_samples)])\n",
+    "data_sample = np.array([psd[0, :, 0] + noise_level * np.random.randn(psd.shape[1]) + noise_level * np.concat([np.zeros((21,1 )), np.ones((10, 1))]).flatten() for _ in range(n_samples)])\n",
     "# Add a frequency dimension (here, 1 frequency bin)\n",
     "data_sample = data_sample[..., np.newaxis]  # shape: (n_samples, n_tests, 1)\n",
     "print(\"Data for statsCond shape:\", data_sample.shape)\n",
@@ -153,22 +156,9 @@
     "print(\"Permutation t-test completed.\")\n",
     "print(\"T_obs shape:\", T_obs.shape)\n",
     "\n",
-    "# Average T_obs over frequency bins to obtain one value per sensor\n",
-    "n_channels = len(preproc_S1.info['ch_names'])  # e.g., 31\n",
-    "n_freq = T_obs.shape[0] // n_channels  # here, 248/31 = 8\n",
-    "T_obs_avg = T_obs.reshape(n_channels, n_freq).mean(axis=1)\n",
-    "\n",
-    "# Similarly, average T_obs_plot if needed:\n",
-    "T_obs_plot_avg = T_obs_plot.reshape(n_channels, n_freq).mean(axis=1)\n",
-    "print(\"Averaged T_obs shape:\", T_obs_avg.shape)\n",
-    "\n",
     "# Plot sensor-level T-values (all sensors) using the averaged T-values\n",
-    "viz.plot_significant_sensors(T_obs_plot=T_obs_avg, epochs=preproc_S1)\n",
-    "print(\"Sensor-level T-values plotted (all sensors).\")\n",
-    "\n",
-    "# Plot only significant sensors (after FDR correction) using averaged values\n",
-    "viz.plot_significant_sensors(T_obs_plot=T_obs_plot_avg, epochs=preproc_S1)\n",
-    "print(\"Significant sensors T-values plotted (FDR corrected).\")"
+    "viz.plot_significant_sensors(T_obs_plot=T_obs, epochs=preproc_S1, significant=T_obs_plot)\n",
+    "print(\"Sensor-level T-values plotted (all sensors).\")"
    ]
   },
   {
@@ -182,6 +172,16 @@
     "We first compute an a priori connectivity matrix (using `con_matrix`) and convert it to a SciPy sparse matrix."
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "729937df",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "psd[0, :, 0]"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -190,15 +190,14 @@
    "outputs": [],
    "source": [
     "# Simulate two conditions by adding small noise to the PSD data\n",
-    "noise_level = 1e-6\n",
-    "data_condition1 = data_psd + noise_level * np.random.randn(*data_psd.shape)\n",
-    "data_condition2 = data_psd + noise_level * np.random.randn(*data_psd.shape) - 0.05  # slight difference\n",
+    "data_condition1 = np.array([psd[0, :, 0] + noise_level * np.random.randn(psd.shape[1]) for _ in range(n_samples)])\n",
+    "data_condition2 = np.array([psd[0, :, 0] + noise_level * np.random.randn(psd.shape[1]) + noise_level * np.concat([np.zeros((21,1 )), np.ones((10, 1))]).flatten() for _ in range(n_samples)])\n",
     "\n",
     "# Create a list of data arrays (one per condition)\n",
     "data_list = [data_condition1, data_condition2]\n",
     "\n",
     "# Create connectivity matrix for a priori sensor connectivity using participant 1's sensor layout\n",
-    "con_matrixTuple = stats.con_matrix(preproc_S1, freqs_mean=psd1.freq_list, draw=True)\n",
+    "con_matrixTuple = stats.con_matrix(preproc_S1, freqs_mean=[psd1.freq_list[0]], draw=True)\n",
     "ch_con_freq = con_matrixTuple.ch_con_freq\n",
     "\n",
     "# Convert ch_con_freq to a SciPy sparse matrix (choose bsr or csr; here we use csr)\n",
@@ -207,7 +206,7 @@
     "# Run cluster-level permutation test (e.g., 5000 permutations, alpha=0.05)\n",
     "F_obs, clusters, cluster_p_values, H0_cluster, F_obs_plot = stats.statscondCluster(\n",
     "    data_list,\n",
-    "    freqs_mean=psd1.freq_list,\n",
+    "    freqs_mean=[psd1.freq_list[0]],\n",
     "    ch_con_freq=ch_con_freq_sparse,\n",
     "    tail=0,\n",
     "    n_permutations=5000,\n",
@@ -218,14 +217,8 @@
     "print(\"F_obs shape:\", F_obs.shape)\n",
     "print(\"Number of clusters found:\", len(cluster_p_values))\n",
     "\n",
-    "# Average F_obs_plot over frequency bins to obtain one value per sensor\n",
-    "F_obs_plot_avg = F_obs_plot.mean(axis=1)\n",
     "plt.figure()\n",
-    "plt.bar(range(len(F_obs_plot_avg)), F_obs_plot_avg)\n",
-    "plt.xlabel(\"Test index (sensor)\")\n",
-    "plt.ylabel(\"F-statistic\")\n",
-    "plt.title(\"Significant Cluster F-statistics (after correction)\")\n",
-    "plt.show()"
+    "viz.plot_significant_sensors(T_obs_plot=F_obs, epochs=preproc_S1, significant=F_obs_plot)"
    ]
   },
   {
@@ -239,6 +232,16 @@
     "We use the connectivity results (here simulated as `result_intra` and `values`) from previous analyses."
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8cf01121",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "result_intra[0].shape"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -252,20 +255,37 @@
     "n_channels = len(preproc_S1.info['ch_names'])\n",
     "result_intra = [np.random.rand(n_channels, n_channels), np.random.rand(n_channels, n_channels)]\n",
     "\n",
+    "mask = np.zeros(result_intra[0].shape)\n",
+    "mask[26:,:][:, 22:] = 1\n",
+    "\n",
     "# Create fake groups for intra-brain connectivity analysis by replicating and adding slight noise\n",
     "Alpha_Low = [\n",
-    "    np.array([result_intra[0] + np.random.normal(0, noise_level, result_intra[0].shape),\n",
-    "              result_intra[0] + np.random.normal(0, noise_level, result_intra[0].shape)]),\n",
+    "    np.array([result_intra[0] + np.random.normal(0, noise_level, result_intra[0].shape) + noise_level * mask,\n",
+    "              result_intra[0] + np.random.normal(0, noise_level, result_intra[0].shape) + noise_level * mask,\n",
+    "              result_intra[0] + np.random.normal(0, noise_level, result_intra[0].shape) + noise_level * mask,\n",
+    "              result_intra[0] + np.random.normal(0, noise_level, result_intra[0].shape) + noise_level * mask,\n",
+    "              result_intra[0] + np.random.normal(0, noise_level, result_intra[0].shape) + noise_level * mask]),\n",
     "    np.array([result_intra[1] + np.random.normal(0, noise_level, result_intra[1].shape),\n",
+    "              result_intra[1] + np.random.normal(0, noise_level, result_intra[1].shape),\n",
+    "              result_intra[1] + np.random.normal(0, noise_level, result_intra[1].shape),\n",
+    "              result_intra[1] + np.random.normal(0, noise_level, result_intra[1].shape),\n",
     "              result_intra[1] + np.random.normal(0, noise_level, result_intra[1].shape)])\n",
     "]\n",
     "\n",
     "# Create fake groups for inter-brain connectivity analysis.\n",
     "# For demonstration, assume \"values\" is a connectivity matrix (e.g., from inter-brain analysis) of shape (n_channels, n_channels)\n",
     "values = np.random.rand(n_channels, n_channels)\n",
     "data_inter = [\n",
-    "    np.array([values, values + np.random.normal(0, noise_level, values.shape)]),\n",
-    "    np.array([result_intra[0], result_intra[0] + np.random.normal(0, noise_level, result_intra[0].shape)])\n",
+    "    np.array([values, \n",
+    "              values, \n",
+    "              values, \n",
+    "              values, \n",
+    "              values]),\n",
+    "    np.array([values + np.random.normal(0, noise_level, result_intra[0].shape),\n",
+    "              values + np.random.normal(0, noise_level, result_intra[0].shape),\n",
+    "              values + np.random.normal(0, noise_level, result_intra[0].shape),\n",
+    "              values + np.random.normal(0, noise_level, result_intra[0].shape),\n",
+    "              values + np.random.normal(0, noise_level, result_intra[0].shape)])\n",
     "]\n",
     "\n",
     "print(\"Fake groups for connectivity analysis created. Shapes:\",\n",
@@ -308,6 +328,26 @@
     "print(\"Intra-brain connectivity cluster test completed.\")"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "55695d0c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "cluster_p_values_intra"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9da75ec5",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "viz.viz_2D_topomap_intra(preproc_S1, preproc_S2, F_obs_plot_intra, F_obs_plot_intra, threshold='auto', steps=10, lab=True)"
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "7d9d8185-0b39-4ca9-a347-2c2d54754a89",
@@ -336,6 +376,26 @@
     "print(\"Inter-brain connectivity cluster test completed.\")"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b61140f1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "cluster_p_values_inter"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f6e85e25",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "viz.viz_2D_topomap_inter(preproc_S1, preproc_S2, F_obs_plot_inter, threshold='auto', steps=10, lab=True)"
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "7c3f53d3-61f7-45bd-95c4-dee64ec82c74",