Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
184 changes: 184 additions & 0 deletions demo/APO Sample Existing Demo.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,184 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "75f106cc",
"metadata": {},
"outputs": [],
"source": [
"import sys\n",
"sys.path.insert(0, '../')\n",
"\n",
"print(sys.path)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "117a382f",
"metadata": {},
"outputs": [],
"source": [
"import obsidian\n",
"import pandas as pd\n",
"import numpy as np\n",
"print(f'obsidian version: ' + obsidian.__version__)\n",
"\n",
"from obsidian.experiment import AdvExpDesigner\n",
"from obsidian.experiment.sampling import sample_with_bias, best_sample"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1cd0a6b0",
"metadata": {},
"outputs": [],
"source": [
"#generate random data for this demo\n",
"np.random.seed(42)\n",
"\n",
"n = 1000\n",
"demo_data = pd.DataFrame({\n",
" 'reagent_conc': np.round(np.random.uniform(0.1, 1.0, n), 2),\n",
" 'ionic_strength': np.round(np.random.uniform(10, 100, n), 2),\n",
" 'surfactant_conc': np.round(np.random.uniform(0.01, 0.2, n), 3),\n",
" 'compound_A': np.round(np.random.uniform(0, 50, n), 2),\n",
" 'compound_B': np.round(np.random.uniform(0, 50, n), 2),\n",
" 'sugar': np.random.choice(['glucose', 'fructose', 'sucrose'], n),\n",
" 'surfactant': np.random.choice(['SDS', 'Tween20', 'TritonX'], n),\n",
" 'buffer': np.random.choice(['PBS', 'Tris', 'HEPES'], n),\n",
" 'pH': np.round(np.random.uniform(5.5, 8.5, n), 2)\n",
"})\n",
"\n",
"demo_data.index.name = 'FormulationID'\n",
"demo_data"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "57ed0226",
"metadata": {},
"outputs": [],
"source": [
"#Initialize existing experimental data as an AdvExpDesigner object\n",
"designer = AdvExpDesigner(design_df=demo_data)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9feae24b",
"metadata": {},
"outputs": [],
"source": [
"\"\"\"\n",
"You can sample an existing dataset with or without bias: \n",
"Bias dictionary format : {\"column\": [lower_bound, upper_bound, relative_weight]}\n",
"\n",
"- Weight >1 increases sampling probability for in-range rows.\n",
"- Weight <1 decreases it.\n",
"- Weight = 0 excludes those rows entirely.\n",
"\"\"\"\n",
"\n",
"bias = {\n",
" \"ionic_strength\": [50, 60, 3.0], \n",
"}\n",
"\n",
"seed = np.random.randint(0,1000)\n",
"print(f\"Random seed for reproducibility: {seed}\")\n",
"\n",
"#We can easily create a random sample of n samples with weights using built in Pandas functions\n",
"#enforce = True allows you to force the boundary to be true ; resultant sample may not be space-filling.\n",
"sample = sample_with_bias(designer.design, n=1000, replace=False, seed=seed, bias=bias, plot_weights=True, enforce=False)\n",
"\n",
"sample"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ab457ed8",
"metadata": {},
"outputs": [],
"source": [
"#One-hot encode your categorical columns for easy handling in determining Euclidean distance\n",
"df_encoded = pd.get_dummies(designer.design, columns=[\"sugar\", \"surfactant\", \"buffer\"], dtype=int) "
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9a46bcd0",
"metadata": {},
"outputs": [],
"source": [
"\"\"\"\n",
"perform random sampling n_trial times, select the best one via criteria metric:\n",
"metric:\n",
" - \"maximin\": maximize the minimum pairwise Euclidean distance\n",
" - \"mean_nn\": maximize the mean nearest-neighbor Euclidean distance\n",
" - \"hybrid\": 0.6*maximin + 0.4*mean_nn \n",
"\"\"\"\n",
"seed = np.random.randint(0,1000)\n",
"print(f\"Random seed for reproducibility: {seed}\")\n",
"\n",
"optimal_sample, info = best_sample(\n",
" df_encoded, 10, feature_cols=df_encoded.columns, n_trials=1000,\n",
" bias=bias, plot_weights=True, enforce=False, random_state=seed, metric=\"hybrid\"\n",
")\n",
"\n",
"print(info)\n",
"optimal_sample\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3ea1bcd8",
"metadata": {},
"outputs": [],
"source": [
"#decode from one-hot encoding\n",
"normal_cols = list(optimal_sample.columns)[0:6]\n",
"encoded_cols = list(optimal_sample.columns)[6:]\n",
"decoded = pd.from_dummies(optimal_sample[encoded_cols],sep=\"_\")\n",
"optimal_design_decoded = pd.concat([optimal_sample[normal_cols], decoded], axis=1)\n",
"optimal_design_decoded"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "2137e315",
"metadata": {},
"outputs": [],
"source": [
"print(designer.plot_histograms(optimal_design_decoded))"
]
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv (3.13.5)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.5"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
55 changes: 29 additions & 26 deletions obsidian/experiment/advanced_design.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,25 +22,32 @@ class AdvExpDesigner:
"""

def __init__(
self, continuous_params, conditional_subparameters, subparam_mapping=None
self, continuous_params=None, conditional_subparameters=None, subparam_mapping=None, design_df=None
):
"""
Initializes the AdvExpDesigner with experimental parameters and optional subparameter mappings.

:param continuous_params: A dictionary containing the continuous parameters for the design.
:param conditional_subparameters: A dictionary containing the conditional subparameters for the design.
:param subparam_mapping: A dictionary for mapping, will be inferred if not provided.
:param design_df: A Pandas DataFrame of an existing experimental design, default None
"""
self.continuous_params = continuous_params
self.conditional_subparameters = conditional_subparameters
self.subparam_mapping = subparam_mapping or infer_subparam_mapping(
self.conditional_subparameters
)
self.continuous_keys = list(self.continuous_params.keys())
self.categorical_keys = list(self.conditional_subparameters.keys())
self.subparam_key = (
list(self.subparam_mapping.values())[0] if self.subparam_mapping else None
)
self.continuous_params = continuous_params if continuous_params else {}
self.conditional_subparameters = conditional_subparameters if conditional_subparameters else {}

if design_df is not None and not design_df.empty:
self.design = design_df
self.categorical_keys = design_df.select_dtypes(exclude=['number']).columns.tolist()
if continuous_params:
self.continuous_keys = list(self.continuous_params.keys())
else:
self.continuous_keys = design_df.select_dtypes(include=['number']).columns.tolist()
else:
self.continuous_keys = list(self.continuous_params.keys()) if continuous_params else []
self.categorical_keys = list(self.conditional_subparameters.keys()) if conditional_subparameters else []

self.subparam_mapping = subparam_mapping or infer_subparam_mapping(self.conditional_subparameters)
self.subparam_key = (list(self.subparam_mapping.values())[0] if self.subparam_mapping else None)

def generate_design(self, seed, n_samples, optimize_categories=True):
"""
Expand Down Expand Up @@ -426,13 +433,16 @@ def assign_conditional_subparameter(

def infer_subparam_mapping(conditional_subparameters):
mapping = {}
for cat_param, levels in conditional_subparameters.items():
subparam_candidates = set()
for level_info in levels.values():
subparams = [k for k in level_info if k != "freq"]
subparam_candidates.update(subparams)
if len(subparam_candidates) == 1:
mapping[cat_param] = subparam_candidates.pop()
if len(conditional_subparameters) == 0:
return mapping
else:
for cat_param, levels in conditional_subparameters.items():
subparam_candidates = set()
for level_info in levels.values():
subparams = [k for k in level_info if k != "freq"]
subparam_candidates.update(subparams)
if len(subparam_candidates) == 1:
mapping[cat_param] = subparam_candidates.pop()
return mapping


Expand Down Expand Up @@ -915,14 +925,7 @@ def plot_design_quality_evolution(metrics_df):
metrics_df = metrics_df.sort_values("seed")

fig, axes = plt.subplots(2, 3, figsize=(15, 10))
metrics = [
"D-optimality",
"A-optimality",
"Pairwise Distance CV",
"Max Continuous Corr",
"Max Categorical Corr",
"score",
]
metrics = metrics_df.columns

for i, metric in enumerate(metrics):
ax = axes[i // 3, i % 3]
Expand Down
Loading
Loading