diff --git a/Playground/AidanPage_T126/.gitignore b/Playground/AidanPage_T126/.gitignore
new file mode 100644
index 0000000000..f45af34b8b
--- /dev/null
+++ b/Playground/AidanPage_T126/.gitignore
@@ -0,0 +1,2 @@
+venv/
+data/
diff --git a/Playground/AidanPage_T126/notebooks/01_data_exploration.ipynb b/Playground/AidanPage_T126/notebooks/01_data_exploration.ipynb
new file mode 100644
index 0000000000..254cf5cb9d
--- /dev/null
+++ b/Playground/AidanPage_T126/notebooks/01_data_exploration.ipynb
@@ -0,0 +1,1595 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "27d2046c",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "pandas: 2.3.3\n",
+ "geopandas: 1.0.1\n",
+ "Setup complete!\n"
+ ]
+ }
+ ],
+ "source": [
+ "import pandas as pd\n",
+ "import geopandas as gpd\n",
+ "import matplotlib.pyplot as plt\n",
+ "\n",
+ "print(\"pandas:\", pd.__version__)\n",
+ "print(\"geopandas:\", gpd.__version__)\n",
+ "print(\"Setup complete!\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "b71d78ad",
+ "metadata": {},
+ "source": [
+ "Dataset 1: Urban Forest Tree Species "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "3f3bbc31",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Shape: (82064, 20)\n",
+ "\n",
+ "Column names:\n",
+ "['com_id', 'common_name', 'scientific_name', 'genus', 'family', 'diameter_breast_height', 'year_planted', 'date_planted', 'age_description', 'useful_life_expectency', 'useful_life_expectency_value', 'precinct', 'located_in', 'uploaddate', 'latitude', 'longitude', 'easting', 'northing', 'geolocation', 'geometry']\n",
+ "\n",
+ "Data types:\n",
+ "com_id object\n",
+ "common_name object\n",
+ "scientific_name object\n",
+ "genus object\n",
+ "family object\n",
+ "diameter_breast_height float64\n",
+ "year_planted object\n",
+ "date_planted datetime64[ms]\n",
+ "age_description object\n",
+ "useful_life_expectency object\n",
+ "useful_life_expectency_value int32\n",
+ "precinct object\n",
+ "located_in object\n",
+ "uploaddate object\n",
+ "latitude float64\n",
+ "longitude float64\n",
+ "easting object\n",
+ "northing object\n",
+ "geolocation object\n",
+ "geometry geometry\n",
+ "dtype: object\n",
+ "\n",
+ "First 5 rows:\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " com_id | \n",
+ " common_name | \n",
+ " scientific_name | \n",
+ " genus | \n",
+ " family | \n",
+ " diameter_breast_height | \n",
+ " year_planted | \n",
+ " date_planted | \n",
+ " age_description | \n",
+ " useful_life_expectency | \n",
+ " useful_life_expectency_value | \n",
+ " precinct | \n",
+ " located_in | \n",
+ " uploaddate | \n",
+ " latitude | \n",
+ " longitude | \n",
+ " easting | \n",
+ " northing | \n",
+ " geolocation | \n",
+ " geometry | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 1070378 | \n",
+ " Tulip Tree | \n",
+ " Liriodendron tulipifera | \n",
+ " Liriodendron | \n",
+ " Magnoliaceae | \n",
+ " 20.0 | \n",
+ " 2006 | \n",
+ " 2006-12-15 | \n",
+ " Mature | \n",
+ " > 41 years | \n",
+ " 50 | \n",
+ " South Yarra | \n",
+ " Street | \n",
+ " None | \n",
+ " -37.832567 | \n",
+ " 144.986879 | \n",
+ " 322843.14 | \n",
+ " 5810852.35 | \n",
+ " { \"lon\": 144.98687896999999, \"lat\": -37.832567... | \n",
+ " POINT (144.98688 -37.83257) | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 1070382 | \n",
+ " Tulip Tree | \n",
+ " Liriodendron tulipifera | \n",
+ " Liriodendron | \n",
+ " Magnoliaceae | \n",
+ " 21.0 | \n",
+ " 2006 | \n",
+ " 2006-12-15 | \n",
+ " Mature | \n",
+ " > 41 years | \n",
+ " 50 | \n",
+ " South Yarra | \n",
+ " Street | \n",
+ " None | \n",
+ " -37.831669 | \n",
+ " 144.987059 | \n",
+ " 322856.8 | \n",
+ " 5810952.41 | \n",
+ " { \"lon\": 144.98705856999999, \"lat\": -37.831668... | \n",
+ " POINT (144.98706 -37.83167) | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 1796650 | \n",
+ " Cook pine | \n",
+ " Araucaria columnaris | \n",
+ " Araucaria | \n",
+ " Araucariaceae | \n",
+ " NaN | \n",
+ " 2020 | \n",
+ " 2020-12-14 | \n",
+ " Semi-mature | \n",
+ " 21 - 30 years | \n",
+ " 30 | \n",
+ " Carlton | \n",
+ " Park | \n",
+ " None | \n",
+ " -37.802222 | \n",
+ " 144.962852 | \n",
+ " 320655.27 | \n",
+ " 5814177.03 | \n",
+ " { \"lon\": 144.96285247, \"lat\": -37.80222191 } | \n",
+ " POINT (144.96285 -37.80222) | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 1457913 | \n",
+ " Yellow Box | \n",
+ " Eucalyptus melliodora | \n",
+ " Eucalyptus | \n",
+ " Myrtaceae | \n",
+ " 25.0 | \n",
+ " 2010 | \n",
+ " 2010-12-14 | \n",
+ " Mature | \n",
+ " > 41 years | \n",
+ " 50 | \n",
+ " Kensington | \n",
+ " Park | \n",
+ " None | \n",
+ " -37.797537 | \n",
+ " 144.923519 | \n",
+ " 317180.37 | \n",
+ " 5814617.41 | \n",
+ " { \"lon\": 144.92351884999999, \"lat\": -37.797537... | \n",
+ " POINT (144.92352 -37.79754) | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 1457915 | \n",
+ " Yellow Box | \n",
+ " Eucalyptus melliodora | \n",
+ " Eucalyptus | \n",
+ " Myrtaceae | \n",
+ " 22.0 | \n",
+ " 2010 | \n",
+ " 2010-12-14 | \n",
+ " Mature | \n",
+ " > 41 years | \n",
+ " 50 | \n",
+ " Kensington | \n",
+ " Park | \n",
+ " None | \n",
+ " -37.797540 | \n",
+ " 144.923459 | \n",
+ " 317175.13 | \n",
+ " 5814617.01 | \n",
+ " { \"lon\": 144.92345922000001, \"lat\": -37.797539... | \n",
+ " POINT (144.92346 -37.79754) | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " com_id common_name scientific_name genus family \\\n",
+ "0 1070378 Tulip Tree Liriodendron tulipifera Liriodendron Magnoliaceae \n",
+ "1 1070382 Tulip Tree Liriodendron tulipifera Liriodendron Magnoliaceae \n",
+ "2 1796650 Cook pine Araucaria columnaris Araucaria Araucariaceae \n",
+ "3 1457913 Yellow Box Eucalyptus melliodora Eucalyptus Myrtaceae \n",
+ "4 1457915 Yellow Box Eucalyptus melliodora Eucalyptus Myrtaceae \n",
+ "\n",
+ " diameter_breast_height year_planted date_planted age_description \\\n",
+ "0 20.0 2006 2006-12-15 Mature \n",
+ "1 21.0 2006 2006-12-15 Mature \n",
+ "2 NaN 2020 2020-12-14 Semi-mature \n",
+ "3 25.0 2010 2010-12-14 Mature \n",
+ "4 22.0 2010 2010-12-14 Mature \n",
+ "\n",
+ " useful_life_expectency useful_life_expectency_value precinct \\\n",
+ "0 > 41 years 50 South Yarra \n",
+ "1 > 41 years 50 South Yarra \n",
+ "2 21 - 30 years 30 Carlton \n",
+ "3 > 41 years 50 Kensington \n",
+ "4 > 41 years 50 Kensington \n",
+ "\n",
+ " located_in uploaddate latitude longitude easting northing \\\n",
+ "0 Street None -37.832567 144.986879 322843.14 5810852.35 \n",
+ "1 Street None -37.831669 144.987059 322856.8 5810952.41 \n",
+ "2 Park None -37.802222 144.962852 320655.27 5814177.03 \n",
+ "3 Park None -37.797537 144.923519 317180.37 5814617.41 \n",
+ "4 Park None -37.797540 144.923459 317175.13 5814617.01 \n",
+ "\n",
+ " geolocation \\\n",
+ "0 { \"lon\": 144.98687896999999, \"lat\": -37.832567... \n",
+ "1 { \"lon\": 144.98705856999999, \"lat\": -37.831668... \n",
+ "2 { \"lon\": 144.96285247, \"lat\": -37.80222191 } \n",
+ "3 { \"lon\": 144.92351884999999, \"lat\": -37.797537... \n",
+ "4 { \"lon\": 144.92345922000001, \"lat\": -37.797539... \n",
+ "\n",
+ " geometry \n",
+ "0 POINT (144.98688 -37.83257) \n",
+ "1 POINT (144.98706 -37.83167) \n",
+ "2 POINT (144.96285 -37.80222) \n",
+ "3 POINT (144.92352 -37.79754) \n",
+ "4 POINT (144.92346 -37.79754) "
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "#Load Data (Urban Forest Tree Dataset)\n",
+ "trees = gpd.read_file(\"../data/raw/trees-with-species-and-dimensions-urban-forest.geojson\")\n",
+ "\n",
+ "# Basic shape and info\n",
+ "print(f\"Shape: {trees.shape}\")\n",
+ "print(f\"\\nColumn names:\\n{trees.columns.tolist()}\")\n",
+ "print(f\"\\nData types:\\n{trees.dtypes}\")\n",
+ "print(f\"\\nFirst 5 rows:\")\n",
+ "trees.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "a43a9882",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Missing values:\n",
+ "com_id 0\n",
+ "common_name 0\n",
+ "scientific_name 0\n",
+ "genus 0\n",
+ "family 0\n",
+ "diameter_breast_height 44863\n",
+ "year_planted 0\n",
+ "date_planted 0\n",
+ "age_description 0\n",
+ "useful_life_expectency 17410\n",
+ "useful_life_expectency_value 0\n",
+ "precinct 0\n",
+ "located_in 0\n",
+ "uploaddate 82064\n",
+ "latitude 0\n",
+ "longitude 0\n",
+ "easting 0\n",
+ "northing 0\n",
+ "geolocation 0\n",
+ "geometry 0\n",
+ "dtype: int64\n",
+ "\n",
+ "Missing value percentages:\n",
+ "com_id 0.0\n",
+ "common_name 0.0\n",
+ "scientific_name 0.0\n",
+ "genus 0.0\n",
+ "family 0.0\n",
+ "diameter_breast_height 54.7\n",
+ "year_planted 0.0\n",
+ "date_planted 0.0\n",
+ "age_description 0.0\n",
+ "useful_life_expectency 21.2\n",
+ "useful_life_expectency_value 0.0\n",
+ "precinct 0.0\n",
+ "located_in 0.0\n",
+ "uploaddate 100.0\n",
+ "latitude 0.0\n",
+ "longitude 0.0\n",
+ "easting 0.0\n",
+ "northing 0.0\n",
+ "geolocation 0.0\n",
+ "geometry 0.0\n",
+ "dtype: float64\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Missing values\n",
+ "print(f\"Missing values:\\n{trees.isnull().sum()}\")\n",
+ "print(f\"\\nMissing value percentages:\\n{(trees.isnull().sum() / len(trees) * 100).round(1)}\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "id": "0db23ad6",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Useful Life Expectancy:\n",
+ "useful_life_expectency\n",
+ "> 41 years 22604\n",
+ "21 - 30 years 21115\n",
+ "31 - 40 years 15989\n",
+ "11 - 20 years 4283\n",
+ "< 10 years 663\n",
+ "Name: count, dtype: int64\n",
+ "\n",
+ "Age Description:\n",
+ "age_description\n",
+ "Mature 51090\n",
+ "Semi-mature 21699\n",
+ "Unestablished 9260\n",
+ "Planter box 15\n",
+ "Name: count, dtype: int64\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Key categorical distributions\n",
+ "print(\"Useful Life Expectancy:\")\n",
+ "print(trees['useful_life_expectency'].value_counts())\n",
+ "print(f\"\\nAge Description:\")\n",
+ "print(trees['age_description'].value_counts())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "id": "16973a58",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Useful Life Expectancy Value:\n",
+ "count 82064.000000\n",
+ "mean 41.016767\n",
+ "std 10.079900\n",
+ "min 10.000000\n",
+ "25% 30.000000\n",
+ "50% 40.000000\n",
+ "75% 50.000000\n",
+ "max 50.000000\n",
+ "Name: useful_life_expectency_value, dtype: float64\n",
+ "\n",
+ "Value counts:\n",
+ "useful_life_expectency_value\n",
+ "10 663\n",
+ "20 4283\n",
+ "30 21115\n",
+ "40 15989\n",
+ "50 40014\n",
+ "Name: count, dtype: int64\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(\"Useful Life Expectancy Value:\")\n",
+ "print(trees['useful_life_expectency_value'].describe())\n",
+ "print(f\"\\nValue counts:\\n{trees['useful_life_expectency_value'].value_counts().sort_index()}\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "id": "c4f02ac2",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Diameter Breast Height (non-null):\n",
+ "count 37201.000000\n",
+ "mean 35.471627\n",
+ "std 137.639141\n",
+ "min 1.000000\n",
+ "25% 18.000000\n",
+ "50% 30.000000\n",
+ "75% 45.000000\n",
+ "max 26027.000000\n",
+ "Name: diameter_breast_height, dtype: float64\n",
+ "\n",
+ "Top 10 species:\n",
+ "common_name\n",
+ "River red gum 8338\n",
+ "London Plane 5070\n",
+ "Drooping sheoak 3452\n",
+ "English Elm 3291\n",
+ "Yellow Box 3043\n",
+ "Black Wattle 3001\n",
+ "Spotted Gum 2902\n",
+ "River Sheoak 1665\n",
+ "Lightwood Wattle 1619\n",
+ "Sweet Bursaria 1574\n",
+ "Name: count, dtype: int64\n"
+ ]
+ }
+ ],
+ "source": [
+ "# DBH distribution (for non-missing values)\n",
+ "print(\"Diameter Breast Height (non-null):\")\n",
+ "print(trees['diameter_breast_height'].describe())\n",
+ "\n",
+ "print(f\"\\nTop 10 species:\")\n",
+ "print(trees['common_name'].value_counts().head(10))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "id": "01137143",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Latitude range: -37.8505 to -37.7755\n",
+ "Longitude range: 144.9004 to 144.9911\n",
+ "\n",
+ "Precinct distribution:\n",
+ "precinct\n",
+ "Parkville 28567\n",
+ "Melbourne 9063\n",
+ "Kensington 7555\n",
+ "West Melbourne 6756\n",
+ "Docklands 5816\n",
+ "North Melbourne 4715\n",
+ "East Melbourne 4506\n",
+ "Carlton 4368\n",
+ "South Yarra 2631\n",
+ "Port Melbourne 2623\n",
+ "Southbank 2338\n",
+ "Carlton North 2323\n",
+ "Flemington 510\n",
+ "North And West Melbourne 113\n",
+ "Princes Hill 65\n",
+ "Fishermans Bend 61\n",
+ "Central City 26\n",
+ "Brunswick West 24\n",
+ "Richmond 2\n",
+ "Brunswick 2\n",
+ "Name: count, dtype: int64\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Quick spatial check\n",
+ "print(f\"Latitude range: {trees['latitude'].min():.4f} to {trees['latitude'].max():.4f}\")\n",
+ "print(f\"Longitude range: {trees['longitude'].min():.4f} to {trees['longitude'].max():.4f}\")\n",
+ "print(f\"\\nPrecinct distribution:\")\n",
+ "print(trees['precinct'].value_counts())"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "6ee84c19",
+ "metadata": {},
+ "source": [
+ "Dataset 2: Microclimate Sensor Data"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "id": "b3f25ab4",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Shape: (591803, 16)\n",
+ "\n",
+ "Column names:\n",
+ "['Device_id', 'Time', 'SensorLocation', 'LatLong', 'MinimumWindDirection', 'AverageWindDirection', 'MaximumWindDirection', 'MinimumWindSpeed', 'AverageWindSpeed', 'GustWindSpeed', 'AirTemperature', 'RelativeHumidity', 'AtmosphericPressure', 'PM25', 'PM10', 'Noise']\n",
+ "\n",
+ "Data types:\n",
+ "Device_id object\n",
+ "Time object\n",
+ "SensorLocation object\n",
+ "LatLong object\n",
+ "MinimumWindDirection float64\n",
+ "AverageWindDirection float64\n",
+ "MaximumWindDirection float64\n",
+ "MinimumWindSpeed float64\n",
+ "AverageWindSpeed float64\n",
+ "GustWindSpeed float64\n",
+ "AirTemperature float64\n",
+ "RelativeHumidity float64\n",
+ "AtmosphericPressure float64\n",
+ "PM25 float64\n",
+ "PM10 float64\n",
+ "Noise float64\n",
+ "dtype: object\n",
+ "\n",
+ "First 5 rows:\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Device_id | \n",
+ " Time | \n",
+ " SensorLocation | \n",
+ " LatLong | \n",
+ " MinimumWindDirection | \n",
+ " AverageWindDirection | \n",
+ " MaximumWindDirection | \n",
+ " MinimumWindSpeed | \n",
+ " AverageWindSpeed | \n",
+ " GustWindSpeed | \n",
+ " AirTemperature | \n",
+ " RelativeHumidity | \n",
+ " AtmosphericPressure | \n",
+ " PM25 | \n",
+ " PM10 | \n",
+ " Noise | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " ICTMicroclimate-09 | \n",
+ " 2025-11-16T13:17:20+11:00 | \n",
+ " SkyFarm (Jeff's Shed). Rooftop - Melbourne Con... | \n",
+ " -37.8223306, 144.9521696 | \n",
+ " 0.0 | \n",
+ " 308.0 | \n",
+ " 359.0 | \n",
+ " 0.0 | \n",
+ " 1.9 | \n",
+ " 6.6 | \n",
+ " 15.6 | \n",
+ " 71.6 | \n",
+ " 1000.100000 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 55.600000 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " ICTMicroclimate-06 | \n",
+ " 2025-11-16T03:17:34+11:00 | \n",
+ " Tram Stop 7B - Melbourne Tennis Centre Precinc... | \n",
+ " -37.8194993, 144.9787211 | \n",
+ " 0.0 | \n",
+ " 271.0 | \n",
+ " 359.0 | \n",
+ " 0.0 | \n",
+ " 0.4 | \n",
+ " 2.0 | \n",
+ " 15.2 | \n",
+ " 78.9 | \n",
+ " 1001.900000 | \n",
+ " 2.0 | \n",
+ " 3.0 | \n",
+ " 57.000000 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " ICTMicroclimate-03 | \n",
+ " 2025-11-16T13:14:29+11:00 | \n",
+ " CH1 rooftop | \n",
+ " -37.8140348, 144.96728 | \n",
+ " 0.0 | \n",
+ " 356.0 | \n",
+ " 350.0 | \n",
+ " 0.0 | \n",
+ " 0.8 | \n",
+ " 1.9 | \n",
+ " 15.7 | \n",
+ " 79.0 | \n",
+ " 995.700000 | \n",
+ " 2.0 | \n",
+ " 4.0 | \n",
+ " 61.400000 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " ICTMicroclimate-03 | \n",
+ " 2025-08-18T01:10:39+10:00 | \n",
+ " CH1 rooftop | \n",
+ " -37.8140348, 144.96728 | \n",
+ " 169.0 | \n",
+ " 183.0 | \n",
+ " 193.0 | \n",
+ " 1.5 | \n",
+ " 1.7 | \n",
+ " 2.2 | \n",
+ " 9.1 | \n",
+ " 71.8 | \n",
+ " 1018.900000 | \n",
+ " 1.0 | \n",
+ " 3.0 | \n",
+ " 74.700000 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " ICTMicroclimate-01 | \n",
+ " 2025-11-16T03:23:06+11:00 | \n",
+ " Birrarung Marr Park - Pole 1131 | \n",
+ " -37.8185931, 144.9716404 | \n",
+ " NaN | \n",
+ " 341.0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.2 | \n",
+ " NaN | \n",
+ " 14.6 | \n",
+ " 89.0 | \n",
+ " 1000.900024 | \n",
+ " 6.0 | \n",
+ " 9.0 | \n",
+ " 47.599998 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Device_id Time \\\n",
+ "0 ICTMicroclimate-09 2025-11-16T13:17:20+11:00 \n",
+ "1 ICTMicroclimate-06 2025-11-16T03:17:34+11:00 \n",
+ "2 ICTMicroclimate-03 2025-11-16T13:14:29+11:00 \n",
+ "3 ICTMicroclimate-03 2025-08-18T01:10:39+10:00 \n",
+ "4 ICTMicroclimate-01 2025-11-16T03:23:06+11:00 \n",
+ "\n",
+ " SensorLocation \\\n",
+ "0 SkyFarm (Jeff's Shed). Rooftop - Melbourne Con... \n",
+ "1 Tram Stop 7B - Melbourne Tennis Centre Precinc... \n",
+ "2 CH1 rooftop \n",
+ "3 CH1 rooftop \n",
+ "4 Birrarung Marr Park - Pole 1131 \n",
+ "\n",
+ " LatLong MinimumWindDirection AverageWindDirection \\\n",
+ "0 -37.8223306, 144.9521696 0.0 308.0 \n",
+ "1 -37.8194993, 144.9787211 0.0 271.0 \n",
+ "2 -37.8140348, 144.96728 0.0 356.0 \n",
+ "3 -37.8140348, 144.96728 169.0 183.0 \n",
+ "4 -37.8185931, 144.9716404 NaN 341.0 \n",
+ "\n",
+ " MaximumWindDirection MinimumWindSpeed AverageWindSpeed GustWindSpeed \\\n",
+ "0 359.0 0.0 1.9 6.6 \n",
+ "1 359.0 0.0 0.4 2.0 \n",
+ "2 350.0 0.0 0.8 1.9 \n",
+ "3 193.0 1.5 1.7 2.2 \n",
+ "4 NaN NaN 0.2 NaN \n",
+ "\n",
+ " AirTemperature RelativeHumidity AtmosphericPressure PM25 PM10 \\\n",
+ "0 15.6 71.6 1000.100000 1.0 1.0 \n",
+ "1 15.2 78.9 1001.900000 2.0 3.0 \n",
+ "2 15.7 79.0 995.700000 2.0 4.0 \n",
+ "3 9.1 71.8 1018.900000 1.0 3.0 \n",
+ "4 14.6 89.0 1000.900024 6.0 9.0 \n",
+ "\n",
+ " Noise \n",
+ "0 55.600000 \n",
+ "1 57.000000 \n",
+ "2 61.400000 \n",
+ "3 74.700000 \n",
+ "4 47.599998 "
+ ]
+ },
+ "execution_count": 12,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Load microclimate sensor data\n",
+ "sensors = pd.read_csv(\"../data/raw/microclimate-sensors-data.csv\")\n",
+ "\n",
+ "print(f\"Shape: {sensors.shape}\")\n",
+ "print(f\"\\nColumn names:\\n{sensors.columns.tolist()}\")\n",
+ "print(f\"\\nData types:\\n{sensors.dtypes}\")\n",
+ "print(f\"\\nFirst 5 rows:\")\n",
+ "sensors.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "id": "8c1390d4",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Missing values:\n",
+ "AirTemperature 651\n",
+ "RelativeHumidity 651\n",
+ "LatLong 11483\n",
+ "Time 0\n",
+ "dtype: int64\n",
+ "\n",
+ "Air Temperature:\n",
+ "count 591152.000000\n",
+ "mean 16.296348\n",
+ "std 5.921106\n",
+ "min -0.800000\n",
+ "25% 12.200000\n",
+ "50% 15.700000\n",
+ "75% 19.500000\n",
+ "max 45.400002\n",
+ "Name: AirTemperature, dtype: float64\n",
+ "\n",
+ "Relative Humidity:\n",
+ "count 591152.000000\n",
+ "mean 66.278247\n",
+ "std 18.152886\n",
+ "min 4.000000\n",
+ "25% 54.700001\n",
+ "50% 67.900000\n",
+ "75% 79.300000\n",
+ "max 99.800003\n",
+ "Name: RelativeHumidity, dtype: float64\n",
+ "\n",
+ "Unique sensors: 12\n",
+ "\n",
+ "Sensor locations:\n",
+ "SensorLocation\n",
+ "1 Treasury Place 75821\n",
+ "Birrarung Marr Park - Pole 1131 61388\n",
+ "101 Collins St L11 Rooftop 59430\n",
+ "Tram Stop 7C - Melbourne Tennis Centre Precinct - Rod Laver Arena 59021\n",
+ "Tram Stop 7B - Melbourne Tennis Centre Precinct - Rod Laver Arena 58845\n",
+ "CH1 rooftop 58665\n",
+ "Swanston St - Tram Stop 13 adjacent Federation Sq & Flinders St Station 58237\n",
+ "SkyFarm (Jeff's Shed). Rooftop - Melbourne Conference & Exhibition Centre (MCEC) 53371\n",
+ "Enterprize Park - Pole ID: COM1667 44065\n",
+ "Royal Park Asset ID: COM2707 30688\n",
+ "Batman Park 26129\n",
+ "Name: count, dtype: int64\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Missing values for key columns\n",
+ "print(\"Missing values:\")\n",
+ "print(sensors[['AirTemperature', 'RelativeHumidity', 'LatLong', 'Time']].isnull().sum())\n",
+ "\n",
+ "# Temperature and humidity ranges\n",
+ "print(f\"\\nAir Temperature:\")\n",
+ "print(sensors['AirTemperature'].describe())\n",
+ "print(f\"\\nRelative Humidity:\")\n",
+ "print(sensors['RelativeHumidity'].describe())\n",
+ "\n",
+ "# How many unique sensors\n",
+ "print(f\"\\nUnique sensors: {sensors['Device_id'].nunique()}\")\n",
+ "print(f\"\\nSensor locations:\")\n",
+ "print(sensors['SensorLocation'].value_counts())"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "c3dd35a0",
+ "metadata": {},
+ "source": [
+ "Dataset 3: BOM Temperature "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "id": "d3c6419d",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Shape: (4830, 8)\n",
+ "\n",
+ "Column names:\n",
+ "['Product code', 'Bureau of Meteorology station number', 'Year', 'Month', 'Day', 'Maximum temperature (Degree C)', 'Days of accumulation of maximum temperature', 'Quality']\n",
+ "\n",
+ "First 5 rows:\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Product code | \n",
+ " Bureau of Meteorology station number | \n",
+ " Year | \n",
+ " Month | \n",
+ " Day | \n",
+ " Maximum temperature (Degree C) | \n",
+ " Days of accumulation of maximum temperature | \n",
+ " Quality | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " IDCJAC0010 | \n",
+ " 86338 | \n",
+ " 2013 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " IDCJAC0010 | \n",
+ " 86338 | \n",
+ " 2013 | \n",
+ " 1 | \n",
+ " 2 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " IDCJAC0010 | \n",
+ " 86338 | \n",
+ " 2013 | \n",
+ " 1 | \n",
+ " 3 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " IDCJAC0010 | \n",
+ " 86338 | \n",
+ " 2013 | \n",
+ " 1 | \n",
+ " 4 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " IDCJAC0010 | \n",
+ " 86338 | \n",
+ " 2013 | \n",
+ " 1 | \n",
+ " 5 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Product code Bureau of Meteorology station number Year Month Day \\\n",
+ "0 IDCJAC0010 86338 2013 1 1 \n",
+ "1 IDCJAC0010 86338 2013 1 2 \n",
+ "2 IDCJAC0010 86338 2013 1 3 \n",
+ "3 IDCJAC0010 86338 2013 1 4 \n",
+ "4 IDCJAC0010 86338 2013 1 5 \n",
+ "\n",
+ " Maximum temperature (Degree C) \\\n",
+ "0 NaN \n",
+ "1 NaN \n",
+ "2 NaN \n",
+ "3 NaN \n",
+ "4 NaN \n",
+ "\n",
+ " Days of accumulation of maximum temperature Quality \n",
+ "0 NaN NaN \n",
+ "1 NaN NaN \n",
+ "2 NaN NaN \n",
+ "3 NaN NaN \n",
+ "4 NaN NaN "
+ ]
+ },
+ "execution_count": 14,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Load BOM max temperature data\n",
+ "bom_temp = pd.read_csv(\"../data/raw/IDCJAC0010_086338_1800_Data.csv\")\n",
+ "\n",
+ "print(f\"Shape: {bom_temp.shape}\")\n",
+ "print(f\"\\nColumn names:\\n{bom_temp.columns.tolist()}\")\n",
+ "print(f\"\\nFirst 5 rows:\")\n",
+ "bom_temp.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "76cfb1d8",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Missing values:\n",
+ "Product code 0\n",
+ "Bureau of Meteorology station number 0\n",
+ "Year 0\n",
+ "Month 0\n",
+ "Day 0\n",
+ "Maximum temperature (Degree C) 155\n",
+ "Days of accumulation of maximum temperature 155\n",
+ "Quality 156\n",
+ "dtype: int64\n",
+ "\n",
+ "Year range: 2013 to 2026\n",
+ "\n",
+ "Total rows: 4830\n",
+ "Rows with temperature: 4675\n",
+ "\n",
+ "Temperature stats (non-null):\n",
+ "count 4675.000000\n",
+ "mean 20.445348\n",
+ "std 6.067834\n",
+ "min 9.000000\n",
+ "25% 15.800000\n",
+ "50% 19.200000\n",
+ "75% 23.800000\n",
+ "max 43.500000\n",
+ "Name: Maximum temperature (Degree C), dtype: float64\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Missing values and date range\n",
+ "print(f\"Missing values:\\n{bom_temp.isnull().sum()}\")\n",
+ "\n",
+ "print(f\"\\nYear range: {bom_temp['Year'].min()} to {bom_temp['Year'].max()}\")\n",
+ "\n",
+ "# How many actual temperature readings\n",
+ "print(f\"\\nTotal rows: {len(bom_temp)}\")\n",
+ "print(f\"Rows with temperature: {bom_temp['Maximum temperature (Degree C)'].notna().sum()}\")\n",
+ "\n",
+ "print(f\"\\nTemperature stats (non-null):\")\n",
+ "print(bom_temp['Maximum temperature (Degree C)'].describe())"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "7f55c269",
+ "metadata": {},
+ "source": [
+ "Dataset 4: BOM Rainfall"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "id": "5125bf36",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Shape: (4831, 8)\n",
+ "\n",
+ "Column names:\n",
+ "['Product code', 'Bureau of Meteorology station number', 'Year', 'Month', 'Day', 'Rainfall amount (millimetres)', 'Period over which rainfall was measured (days)', 'Quality']\n",
+ "\n",
+ "Missing values:\n",
+ "Product code 0\n",
+ "Bureau of Meteorology station number 0\n",
+ "Year 0\n",
+ "Month 0\n",
+ "Day 0\n",
+ "Rainfall amount (millimetres) 157\n",
+ "Period over which rainfall was measured (days) 159\n",
+ "Quality 157\n",
+ "dtype: int64\n",
+ "\n",
+ "First 5 rows:\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Product code | \n",
+ " Bureau of Meteorology station number | \n",
+ " Year | \n",
+ " Month | \n",
+ " Day | \n",
+ " Rainfall amount (millimetres) | \n",
+ " Period over which rainfall was measured (days) | \n",
+ " Quality | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " IDCJAC0009 | \n",
+ " 86338 | \n",
+ " 2013 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " IDCJAC0009 | \n",
+ " 86338 | \n",
+ " 2013 | \n",
+ " 1 | \n",
+ " 2 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " IDCJAC0009 | \n",
+ " 86338 | \n",
+ " 2013 | \n",
+ " 1 | \n",
+ " 3 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " IDCJAC0009 | \n",
+ " 86338 | \n",
+ " 2013 | \n",
+ " 1 | \n",
+ " 4 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " IDCJAC0009 | \n",
+ " 86338 | \n",
+ " 2013 | \n",
+ " 1 | \n",
+ " 5 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Product code Bureau of Meteorology station number Year Month Day \\\n",
+ "0 IDCJAC0009 86338 2013 1 1 \n",
+ "1 IDCJAC0009 86338 2013 1 2 \n",
+ "2 IDCJAC0009 86338 2013 1 3 \n",
+ "3 IDCJAC0009 86338 2013 1 4 \n",
+ "4 IDCJAC0009 86338 2013 1 5 \n",
+ "\n",
+ " Rainfall amount (millimetres) \\\n",
+ "0 NaN \n",
+ "1 NaN \n",
+ "2 NaN \n",
+ "3 NaN \n",
+ "4 NaN \n",
+ "\n",
+ " Period over which rainfall was measured (days) Quality \n",
+ "0 NaN NaN \n",
+ "1 NaN NaN \n",
+ "2 NaN NaN \n",
+ "3 NaN NaN \n",
+ "4 NaN NaN "
+ ]
+ },
+ "execution_count": 16,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Load BOM rainfall data\n",
+ "bom_rain = pd.read_csv(\"../data/raw/IDCJAC0009_086338_1800_Data.csv\")\n",
+ "\n",
+ "print(f\"Shape: {bom_rain.shape}\")\n",
+ "print(f\"\\nColumn names:\\n{bom_rain.columns.tolist()}\")\n",
+ "print(f\"\\nMissing values:\\n{bom_rain.isnull().sum()}\")\n",
+ "print(f\"\\nFirst 5 rows:\")\n",
+ "bom_rain.head()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "cd2fd126",
+ "metadata": {},
+ "source": [
+ "Dataset 4: Soil Sensor Locations"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "id": "c77d8ac7",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Soil Sensor Locations Shape: (84, 6)\n",
+ "\n",
+ "Column names:\n",
+ "['Site_ID', 'Site_Name', 'Property_Name', 'Latitude', 'Longitude', 'Location']\n",
+ "\n",
+ "First 5 rows:\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Site_ID | \n",
+ " Site_Name | \n",
+ " Property_Name | \n",
+ " Latitude | \n",
+ " Longitude | \n",
+ " Location | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 50924 | \n",
+ " Fitzroy West 09 | \n",
+ " Fitzroy Gardens | \n",
+ " -37.810675 | \n",
+ " 144.979618 | \n",
+ " -37.81067469320403, 144.979618148459 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 64971 | \n",
+ " 5th Fairway | \n",
+ " Royal Park | \n",
+ " -37.779210 | \n",
+ " 144.952510 | \n",
+ " -37.77920999998876, 144.95251000000945 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 64973 | \n",
+ " Argyle Square | \n",
+ " Argyle Square | \n",
+ " -37.802902 | \n",
+ " 144.966011 | \n",
+ " -37.8029015228744, 144.966010728849 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 64975 | \n",
+ " McAlister Oval | \n",
+ " Royal Park | \n",
+ " -37.778805 | \n",
+ " 144.956444 | \n",
+ " -37.77880526150242, 144.95644368244822 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 64977 | \n",
+ " Treasury Gardens South | \n",
+ " Treasury Gardens | \n",
+ " -37.814943 | \n",
+ " 144.976201 | \n",
+ " -37.814943422335645, 144.9762009715822 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Site_ID Site_Name Property_Name Latitude Longitude \\\n",
+ "0 50924 Fitzroy West 09 Fitzroy Gardens -37.810675 144.979618 \n",
+ "1 64971 5th Fairway Royal Park -37.779210 144.952510 \n",
+ "2 64973 Argyle Square Argyle Square -37.802902 144.966011 \n",
+ "3 64975 McAlister Oval Royal Park -37.778805 144.956444 \n",
+ "4 64977 Treasury Gardens South Treasury Gardens -37.814943 144.976201 \n",
+ "\n",
+ " Location \n",
+ "0 -37.81067469320403, 144.979618148459 \n",
+ "1 -37.77920999998876, 144.95251000000945 \n",
+ "2 -37.8029015228744, 144.966010728849 \n",
+ "3 -37.77880526150242, 144.95644368244822 \n",
+ "4 -37.814943422335645, 144.9762009715822 "
+ ]
+ },
+ "execution_count": 17,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Load soil sensor locations first (small file)\n",
+ "soil_locations = pd.read_csv(\"../data/raw/soil-sensor-locations.csv\")\n",
+ "\n",
+ "print(f\"Soil Sensor Locations Shape: {soil_locations.shape}\")\n",
+ "print(f\"\\nColumn names:\\n{soil_locations.columns.tolist()}\")\n",
+ "print(f\"\\nFirst 5 rows:\")\n",
+ "soil_locations.head()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "8f4fbd6c",
+ "metadata": {},
+ "source": [
+ "Dataset 5: Soil Sensor Readings"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "id": "6e46db1f",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Shape: (1000, 9)\n",
+ "\n",
+ "Column names:\n",
+ "['Local_Time', 'Site_Name', 'Site_ID', 'ID', 'Probe_ID', 'Probe_Measure', 'Soil_Value', 'Unit', 'json_featuretype']\n",
+ "\n",
+ "Data types:\n",
+ "Local_Time object\n",
+ "Site_Name object\n",
+ "Site_ID int64\n",
+ "ID int64\n",
+ "Probe_ID int64\n",
+ "Probe_Measure object\n",
+ "Soil_Value float64\n",
+ "Unit object\n",
+ "json_featuretype object\n",
+ "dtype: object\n",
+ "\n",
+ "First 5 rows:\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Local_Time | \n",
+ " Site_Name | \n",
+ " Site_ID | \n",
+ " ID | \n",
+ " Probe_ID | \n",
+ " Probe_Measure | \n",
+ " Soil_Value | \n",
+ " Unit | \n",
+ " json_featuretype | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 2025-04-10T01:00:00+08:00 | \n",
+ " Royal Parade CSIRO | \n",
+ " 88403 | \n",
+ " 26868350 | \n",
+ " 2031673 | \n",
+ " Soil Salinity 20cm | \n",
+ " 0.51 | \n",
+ " µS/cm | \n",
+ " Output | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 2025-04-10T02:00:00+08:00 | \n",
+ " Kings Domain South fireyard | \n",
+ " 66199 | \n",
+ " 26868390 | \n",
+ " 1338696 | \n",
+ " Soil Salinity 60cm #0 | \n",
+ " 1.02 | \n",
+ " µS/cm | \n",
+ " Output | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 2025-04-09T22:00:00+08:00 | \n",
+ " Princess bridge East p06 | \n",
+ " 101038 | \n",
+ " 26868410 | \n",
+ " 2019364 | \n",
+ " Soil Temperature 20cm | \n",
+ " 16.42 | \n",
+ " ºC | \n",
+ " Output | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 2025-04-09T22:00:00+08:00 | \n",
+ " Batman Park | \n",
+ " 75504 | \n",
+ " 26868823 | \n",
+ " 1618930 | \n",
+ " Soil Temperature 60cm #0 | \n",
+ " 16.33 | \n",
+ " ºC | \n",
+ " Output | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 2025-04-09T20:00:00+08:00 | \n",
+ " Royal Parade CSIRO | \n",
+ " 88403 | \n",
+ " 26869014 | \n",
+ " 2031672 | \n",
+ " Soil Salinity 10cm | \n",
+ " 0.22 | \n",
+ " µS/cm | \n",
+ " Output | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Local_Time Site_Name Site_ID ID \\\n",
+ "0 2025-04-10T01:00:00+08:00 Royal Parade CSIRO 88403 26868350 \n",
+ "1 2025-04-10T02:00:00+08:00 Kings Domain South fireyard 66199 26868390 \n",
+ "2 2025-04-09T22:00:00+08:00 Princess bridge East p06 101038 26868410 \n",
+ "3 2025-04-09T22:00:00+08:00 Batman Park 75504 26868823 \n",
+ "4 2025-04-09T20:00:00+08:00 Royal Parade CSIRO 88403 26869014 \n",
+ "\n",
+ " Probe_ID Probe_Measure Soil_Value Unit json_featuretype \n",
+ "0 2031673 Soil Salinity 20cm 0.51 µS/cm Output \n",
+ "1 1338696 Soil Salinity 60cm #0 1.02 µS/cm Output \n",
+ "2 2019364 Soil Temperature 20cm 16.42 ºC Output \n",
+ "3 1618930 Soil Temperature 60cm #0 16.33 ºC Output \n",
+ "4 2031672 Soil Salinity 10cm 0.22 µS/cm Output "
+ ]
+ },
+ "execution_count": 18,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Load just the first 1000 rows to check structure\n",
+ "soil_sample = pd.read_csv(\"../data/raw/soil-sensor-readings-historical-data.csv\", nrows=1000)\n",
+ "\n",
+ "print(f\"Shape: {soil_sample.shape}\")\n",
+ "print(f\"\\nColumn names:\\n{soil_sample.columns.tolist()}\")\n",
+ "print(f\"\\nData types:\\n{soil_sample.dtypes}\")\n",
+ "print(f\"\\nFirst 5 rows:\")\n",
+ "soil_sample.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "id": "17e7f1f6",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Measurement types:\n",
+ "Probe_Measure\n",
+ "Soil Moisture 40cm #0 37\n",
+ "Soil Moisture 30cm #0 32\n",
+ "Temperature 28\n",
+ "Soil Moisture 20cm #0 26\n",
+ "Soil Moisture 10cm #0 26\n",
+ " ..\n",
+ "Soil Moisture 80cm #0 (Sandy Loam) 1\n",
+ "Soil Moisture 70cm #0 (Loam) 1\n",
+ "Soil Salinity 80cm 1\n",
+ "Soil Moisture 50cm #0 (Sand) 1\n",
+ "Soil Moisture 60cm (Adjusted) 1\n",
+ "Name: count, Length: 98, dtype: int64\n",
+ "\n",
+ "Units:\n",
+ "Unit\n",
+ "%VWC 500\n",
+ "ºC 303\n",
+ "µS/cm 197\n",
+ "Name: count, dtype: int64\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(\"Measurement types:\")\n",
+ "print(soil_sample['Probe_Measure'].value_counts())\n",
+ "\n",
+ "print(f\"\\nUnits:\")\n",
+ "print(soil_sample['Unit'].value_counts())"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Urban Forest (venv)",
+ "language": "python",
+ "name": "urban-forest"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.9.6"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/Playground/AidanPage_T126/notebooks/02_data_cleaning.ipynb b/Playground/AidanPage_T126/notebooks/02_data_cleaning.ipynb
new file mode 100644
index 0000000000..c4d7d21c3b
--- /dev/null
+++ b/Playground/AidanPage_T126/notebooks/02_data_cleaning.ipynb
@@ -0,0 +1,1107 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "47819991",
+ "metadata": {},
+ "source": [
+ "Tree Species Data "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "96fe4022",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Trees loaded: (82064, 20)\n"
+ ]
+ }
+ ],
+ "source": [
+ "import pandas as pd\n",
+ "import geopandas as gpd\n",
+ "import matplotlib.pyplot as plt\n",
+ "import seaborn as sns\n",
+ "\n",
+ "# Load trees\n",
+ "trees = gpd.read_file(\"../data/raw/trees-with-species-and-dimensions-urban-forest.geojson\")\n",
+ "\n",
+ "print(f\"Trees loaded: {trees.shape}\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "0d150416",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "DBH before cleaning:\n",
+ " Trees with DBH > 200cm: 54\n",
+ " Trees with DBH > 100cm: 482\n",
+ " Missing DBH: 44863\n",
+ "\n",
+ "DBH after capping outliers:\n",
+ "count 37147.000000\n",
+ "mean 34.354968\n",
+ "std 23.819303\n",
+ "min 1.000000\n",
+ "25% 18.000000\n",
+ "50% 30.000000\n",
+ "75% 45.000000\n",
+ "max 200.000000\n",
+ "Name: diameter_breast_height, dtype: float64\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Step 1: Drop unnecessary columns\n",
+ "trees = trees.drop(columns=['uploaddate', 'easting', 'northing', 'geolocation'])\n",
+ "\n",
+ "# Step 2: Check DBH outliers before fixing\n",
+ "print(\"DBH before cleaning:\")\n",
+ "print(f\" Trees with DBH > 200cm: {(trees['diameter_breast_height'] > 200).sum()}\")\n",
+ "print(f\" Trees with DBH > 100cm: {(trees['diameter_breast_height'] > 100).sum()}\")\n",
+ "print(f\" Missing DBH: {trees['diameter_breast_height'].isnull().sum()}\")\n",
+ "\n",
+ "# Cap DBH at 200cm \n",
+ "trees.loc[trees['diameter_breast_height'] > 200, 'diameter_breast_height'] = None\n",
+ "\n",
+ "print(f\"\\nDBH after capping outliers:\")\n",
+ "print(trees['diameter_breast_height'].describe())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "99a99888",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Still missing after species median fill: 1946\n",
+ "Filled remaining with overall median: 21.5\n",
+ "\n",
+ "Final DBH stats:\n",
+ "count 82064.000000\n",
+ "mean 29.084885\n",
+ "std 18.490648\n",
+ "min 1.000000\n",
+ "25% 20.000000\n",
+ "50% 21.500000\n",
+ "75% 35.000000\n",
+ "max 200.000000\n",
+ "Name: diameter_breast_height, dtype: float64\n",
+ "\n",
+ "Missing DBH: 0\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n",
+ " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Step 3: Fill missing DBH with species median\n",
+ "trees['diameter_breast_height'] = trees.groupby('common_name')['diameter_breast_height'].transform(\n",
+ " lambda x: x.fillna(x.median())\n",
+ ")\n",
+ "\n",
+ "# Check if any are still missing (species where ALL trees had missing DBH)\n",
+ "remaining_missing = trees['diameter_breast_height'].isnull().sum()\n",
+ "print(f\"Still missing after species median fill: {remaining_missing}\")\n",
+ "\n",
+ "# Fill any remaining with the overall median\n",
+ "if remaining_missing > 0:\n",
+ " overall_median = trees['diameter_breast_height'].median()\n",
+ " trees['diameter_breast_height'] = trees['diameter_breast_height'].fillna(overall_median)\n",
+ " print(f\"Filled remaining with overall median: {overall_median}\")\n",
+ "\n",
+ "print(f\"\\nFinal DBH stats:\")\n",
+ "print(trees['diameter_breast_height'].describe())\n",
+ "print(f\"\\nMissing DBH: {trees['diameter_breast_height'].isnull().sum()}\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "3f11db91",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Risk class distribution:\n",
+ "risk_class\n",
+ "LOW 56003\n",
+ "MEDIUM 21115\n",
+ "HIGH 4946\n",
+ "Name: count, dtype: int64\n",
+ "\n",
+ "As percentages:\n",
+ "risk_class\n",
+ "LOW 68.2\n",
+ "MEDIUM 25.7\n",
+ "HIGH 6.0\n",
+ "Name: proportion, dtype: float64\n",
+ "\n",
+ "Tree age stats:\n",
+ "count 82064.000000\n",
+ "mean 47.688816\n",
+ "std 54.307899\n",
+ "min 1.000000\n",
+ "25% 8.000000\n",
+ "50% 14.000000\n",
+ "75% 126.000000\n",
+ "max 126.000000\n",
+ "Name: tree_age, dtype: float64\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Step 4: Create risk target variable from useful life expectancy\n",
+ "trees['risk_class'] = trees['useful_life_expectency_value'].map({\n",
+ " 10: 'HIGH',\n",
+ " 20: 'HIGH',\n",
+ " 30: 'MEDIUM',\n",
+ " 40: 'LOW',\n",
+ " 50: 'LOW'\n",
+ "})\n",
+ "\n",
+ "print(\"Risk class distribution:\")\n",
+ "print(trees['risk_class'].value_counts())\n",
+ "print(f\"\\nAs percentages:\")\n",
+ "print(trees['risk_class'].value_counts(normalize=True).round(3) * 100)\n",
+ "\n",
+ "# Step 5: Calculate tree age\n",
+ "trees['year_planted'] = pd.to_numeric(trees['year_planted'], errors='coerce')\n",
+ "trees['tree_age'] = 2026 - trees['year_planted']\n",
+ "\n",
+ "print(f\"\\nTree age stats:\")\n",
+ "print(trees['tree_age'].describe())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "f118721a",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Cleaned trees saved: (82064, 18)\n",
+ "Columns: ['com_id', 'common_name', 'scientific_name', 'genus', 'family', 'diameter_breast_height', 'year_planted', 'date_planted', 'age_description', 'useful_life_expectency', 'useful_life_expectency_value', 'precinct', 'located_in', 'latitude', 'longitude', 'geometry', 'risk_class', 'tree_age']\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Step 6: Save cleaned trees\n",
+ "trees.to_file(\"../data/processed/trees_cleaned.geojson\", driver=\"GeoJSON\")\n",
+ "print(f\"Cleaned trees saved: {trees.shape}\")\n",
+ "print(f\"Columns: {trees.columns.tolist()}\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "45fdfd52",
+ "metadata": {},
+ "source": [
+ "Microclimate Sensor data cleaning"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "id": "0866d247",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Shape after cleaning: (580320, 19)\n",
+ "\n",
+ "Sample of parsed data:\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Time | \n",
+ " date | \n",
+ " lat | \n",
+ " lon | \n",
+ " AirTemperature | \n",
+ " RelativeHumidity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 2025-11-16 02:17:20+00:00 | \n",
+ " 2025-11-16 | \n",
+ " -37.822331 | \n",
+ " 144.952170 | \n",
+ " 15.6 | \n",
+ " 71.6 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 2025-11-15 16:17:34+00:00 | \n",
+ " 2025-11-15 | \n",
+ " -37.819499 | \n",
+ " 144.978721 | \n",
+ " 15.2 | \n",
+ " 78.9 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 2025-11-16 02:14:29+00:00 | \n",
+ " 2025-11-16 | \n",
+ " -37.814035 | \n",
+ " 144.967280 | \n",
+ " 15.7 | \n",
+ " 79.0 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 2025-08-17 15:10:39+00:00 | \n",
+ " 2025-08-17 | \n",
+ " -37.814035 | \n",
+ " 144.967280 | \n",
+ " 9.1 | \n",
+ " 71.8 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 2025-11-15 16:23:06+00:00 | \n",
+ " 2025-11-15 | \n",
+ " -37.818593 | \n",
+ " 144.971640 | \n",
+ " 14.6 | \n",
+ " 89.0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Time date lat lon \\\n",
+ "0 2025-11-16 02:17:20+00:00 2025-11-16 -37.822331 144.952170 \n",
+ "1 2025-11-15 16:17:34+00:00 2025-11-15 -37.819499 144.978721 \n",
+ "2 2025-11-16 02:14:29+00:00 2025-11-16 -37.814035 144.967280 \n",
+ "3 2025-08-17 15:10:39+00:00 2025-08-17 -37.814035 144.967280 \n",
+ "4 2025-11-15 16:23:06+00:00 2025-11-15 -37.818593 144.971640 \n",
+ "\n",
+ " AirTemperature RelativeHumidity \n",
+ "0 15.6 71.6 \n",
+ "1 15.2 78.9 \n",
+ "2 15.7 79.0 \n",
+ "3 9.1 71.8 \n",
+ "4 14.6 89.0 "
+ ]
+ },
+ "execution_count": 8,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Load and clean microclimate sensors\n",
+ "sensors = pd.read_csv(\"../data/raw/microclimate-sensors-data.csv\")\n",
+ "\n",
+ "# Parse timestamp with UTC to handle mixed timezones\n",
+ "sensors['Time'] = pd.to_datetime(sensors['Time'], utc=True)\n",
+ "\n",
+ "# Split LatLong into separate columns\n",
+ "sensors[['lat', 'lon']] = sensors['LatLong'].str.split(',', expand=True).astype(float)\n",
+ "\n",
+ "# Drop rows with no coordinates\n",
+ "sensors = sensors.dropna(subset=['lat', 'lon'])\n",
+ "\n",
+ "# Extract date for daily aggregation\n",
+ "sensors['date'] = sensors['Time'].dt.date\n",
+ "\n",
+ "print(f\"Shape after cleaning: {sensors.shape}\")\n",
+ "print(f\"\\nSample of parsed data:\")\n",
+ "sensors[['Time', 'date', 'lat', 'lon', 'AirTemperature', 'RelativeHumidity']].head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "id": "436df871",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Daily sensor data shape: (6443, 6)\n",
+ "\n",
+ "Sample:\n",
+ "\n",
+ "Saved to data/processed/sensors_daily.csv\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Aggregate to daily averages per sensor\n",
+ "sensor_daily = sensors.groupby(['SensorLocation', 'lat', 'lon', 'date']).agg({\n",
+ " 'AirTemperature': 'mean',\n",
+ " 'RelativeHumidity': 'mean'\n",
+ "}).reset_index()\n",
+ "\n",
+ "sensor_daily.columns = ['sensor_location', 'lat', 'lon', 'date', 'avg_temp', 'avg_humidity']\n",
+ "\n",
+ "print(f\"Daily sensor data shape: {sensor_daily.shape}\")\n",
+ "print(f\"\\nSample:\")\n",
+ "sensor_daily.head()\n",
+ "\n",
+ "# Save\n",
+ "sensor_daily.to_csv(\"../data/processed/sensors_daily.csv\", index=False)\n",
+ "print(f\"\\nSaved to data/processed/sensors_daily.csv\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "3c88befe",
+ "metadata": {},
+ "source": [
+ "BOM weather data cleaning"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "id": "382df919",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Weather shape: (4679, 3)\n",
+ "\n",
+ "Missing values:\n",
+ "date 0\n",
+ "max_temp 0\n",
+ "rainfall_mm 0\n",
+ "dtype: int64\n",
+ "\n",
+ "Date range: 2013-06-01 00:00:00 to 2026-03-24 00:00:00\n",
+ "\n",
+ "Sample:\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " date | \n",
+ " max_temp | \n",
+ " rainfall_mm | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 151 | \n",
+ " 2013-06-01 | \n",
+ " 15.8 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | 152 | \n",
+ " 2013-06-02 | \n",
+ " 15.7 | \n",
+ " 5.0 | \n",
+ "
\n",
+ " \n",
+ " | 153 | \n",
+ " 2013-06-03 | \n",
+ " 14.8 | \n",
+ " 0.2 | \n",
+ "
\n",
+ " \n",
+ " | 154 | \n",
+ " 2013-06-04 | \n",
+ " 15.0 | \n",
+ " 0.2 | \n",
+ "
\n",
+ " \n",
+ " | 155 | \n",
+ " 2013-06-05 | \n",
+ " 14.6 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " date max_temp rainfall_mm\n",
+ "151 2013-06-01 15.8 0.0\n",
+ "152 2013-06-02 15.7 5.0\n",
+ "153 2013-06-03 14.8 0.2\n",
+ "154 2013-06-04 15.0 0.2\n",
+ "155 2013-06-05 14.6 0.0"
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Load BOM data\n",
+ "bom_temp = pd.read_csv(\"../data/raw/IDCJAC0010_086338_1800_Data.csv\")\n",
+ "bom_rain = pd.read_csv(\"../data/raw/IDCJAC0009_086338_1800_Data.csv\")\n",
+ "\n",
+ "# Create date column for both\n",
+ "bom_temp['date'] = pd.to_datetime(bom_temp[['Year', 'Month', 'Day']])\n",
+ "bom_rain['date'] = pd.to_datetime(bom_rain[['Year', 'Month', 'Day']])\n",
+ "\n",
+ "# Keep only whats needed\n",
+ "bom_temp = bom_temp[['date', 'Maximum temperature (Degree C)']].rename(\n",
+ " columns={'Maximum temperature (Degree C)': 'max_temp'}\n",
+ ")\n",
+ "bom_rain = bom_rain[['date', 'Rainfall amount (millimetres)']].rename(\n",
+ " columns={'Rainfall amount (millimetres)': 'rainfall_mm'}\n",
+ ")\n",
+ "\n",
+ "# Merge on date\n",
+ "weather = bom_temp.merge(bom_rain, on='date', how='outer')\n",
+ "\n",
+ "# Drop rows where both are missing\n",
+ "weather = weather.dropna(subset=['max_temp', 'rainfall_mm'], how='all')\n",
+ "\n",
+ "# Fill missing rainfall with 0 (no recorded rain = no rain)\n",
+ "weather['rainfall_mm'] = weather['rainfall_mm'].fillna(0)\n",
+ "\n",
+ "# Interpolate short temp gaps\n",
+ "weather = weather.sort_values('date')\n",
+ "weather['max_temp'] = weather['max_temp'].interpolate(method='linear', limit=3)\n",
+ "\n",
+ "print(f\"Weather shape: {weather.shape}\")\n",
+ "print(f\"\\nMissing values:\\n{weather.isnull().sum()}\")\n",
+ "print(f\"\\nDate range: {weather['date'].min()} to {weather['date'].max()}\")\n",
+ "print(f\"\\nSample:\")\n",
+ "weather.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "id": "c1d638a2",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Weather saved.\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Save weather\n",
+ "weather.to_csv(\"../data/processed/weather_cleaned.csv\", index=False)\n",
+ "print(\"Weather saved.\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "947aba18",
+ "metadata": {},
+ "source": [
+ "Soil Sensor Data Cleaning "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "id": "2fc3d765",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Daily soil moisture shape: (33115, 5)\n",
+ "\n",
+ "Missing values:\n",
+ "site_id 0\n",
+ "date 0\n",
+ "avg_soil_moisture 0\n",
+ "Latitude 0\n",
+ "Longitude 0\n",
+ "dtype: int64\n",
+ "\n",
+ "Sample:\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " site_id | \n",
+ " date | \n",
+ " avg_soil_moisture | \n",
+ " Latitude | \n",
+ " Longitude | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 64970 | \n",
+ " 2023-09-02 | \n",
+ " 33.529375 | \n",
+ " -37.7864 | \n",
+ " 144.96259 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 64970 | \n",
+ " 2023-09-03 | \n",
+ " 33.350313 | \n",
+ " -37.7864 | \n",
+ " 144.96259 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 64970 | \n",
+ " 2023-09-04 | \n",
+ " 33.352187 | \n",
+ " -37.7864 | \n",
+ " 144.96259 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 64970 | \n",
+ " 2023-09-05 | \n",
+ " 33.262500 | \n",
+ " -37.7864 | \n",
+ " 144.96259 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 64970 | \n",
+ " 2023-09-06 | \n",
+ " 33.066875 | \n",
+ " -37.7864 | \n",
+ " 144.96259 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " site_id date avg_soil_moisture Latitude Longitude\n",
+ "0 64970 2023-09-02 33.529375 -37.7864 144.96259\n",
+ "1 64970 2023-09-03 33.350313 -37.7864 144.96259\n",
+ "2 64970 2023-09-04 33.352187 -37.7864 144.96259\n",
+ "3 64970 2023-09-05 33.262500 -37.7864 144.96259\n",
+ "4 64970 2023-09-06 33.066875 -37.7864 144.96259"
+ ]
+ },
+ "execution_count": 12,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Clean soil sensor data — load only moisture \n",
+ "soil = pd.read_csv(\"../data/raw/soil-sensor-readings-historical-data.csv\")\n",
+ "soil_locations = pd.read_csv(\"../data/raw/soil-sensor-locations.csv\")\n",
+ "\n",
+ "# Filter to just soil moisture readings\n",
+ "soil_moisture = soil[soil['Unit'] == '%VWC'].copy()\n",
+ "\n",
+ "# Parse timestamp\n",
+ "soil_moisture['Local_Time'] = pd.to_datetime(soil_moisture['Local_Time'], utc=True)\n",
+ "soil_moisture['date'] = soil_moisture['Local_Time'].dt.date\n",
+ "\n",
+ "# Aggregate to daily average moisture per site\n",
+ "soil_daily = soil_moisture.groupby(['Site_ID', 'date']).agg({\n",
+ " 'Soil_Value': 'mean'\n",
+ "}).reset_index()\n",
+ "soil_daily.columns = ['site_id', 'date', 'avg_soil_moisture']\n",
+ "\n",
+ "# Join to locations for coordinates\n",
+ "soil_daily = soil_daily.merge(\n",
+ " soil_locations[['Site_ID', 'Latitude', 'Longitude']],\n",
+ " left_on='site_id',\n",
+ " right_on='Site_ID',\n",
+ " how='left'\n",
+ ").drop(columns=['Site_ID'])\n",
+ "\n",
+ "print(f\"Daily soil moisture shape: {soil_daily.shape}\")\n",
+ "print(f\"\\nMissing values:\\n{soil_daily.isnull().sum()}\")\n",
+ "print(f\"\\nSample:\")\n",
+ "soil_daily.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "id": "90ef6b67",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Soil data saved.\n"
+ ]
+ }
+ ],
+ "source": [
+ "soil_daily.to_csv(\"../data/processed/soil_daily.csv\", index=False)\n",
+ "print(\"Soil data saved.\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "1b1a3b93",
+ "metadata": {},
+ "source": [
+ "Coordinate Reference System (CRS)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "id": "1685c8b4",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Trees CRS: EPSG:4326\n",
+ "Trees reprojected to: EPSG:7844\n",
+ "Sensors reprojected to: EPSG:7844\n",
+ "Soil reprojected to: EPSG:7844\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Check current CRS of trees\n",
+ "print(f\"Trees CRS: {trees.crs}\")\n",
+ "\n",
+ "# Set project CRS — GDA2020 (EPSG:7844) for Melbourne\n",
+ "trees = trees.to_crs(\"EPSG:7844\")\n",
+ "print(f\"Trees reprojected to: {trees.crs}\")\n",
+ "\n",
+ "# Convert sensor data to GeoDataFrame and set CRS\n",
+ "sensor_daily = pd.read_csv(\"../data/processed/sensors_daily.csv\")\n",
+ "sensor_gdf = gpd.GeoDataFrame(\n",
+ " sensor_daily,\n",
+ " geometry=gpd.points_from_xy(sensor_daily['lon'], sensor_daily['lat']),\n",
+ " crs=\"EPSG:4326\" # Raw GPS coordinates are WGS84\n",
+ ").to_crs(\"EPSG:7844\")\n",
+ "print(f\"Sensors reprojected to: {sensor_gdf.crs}\")\n",
+ "\n",
+ "# Convert soil data to GeoDataFrame and set CRS\n",
+ "soil_daily = pd.read_csv(\"../data/processed/soil_daily.csv\")\n",
+ "soil_gdf = gpd.GeoDataFrame(\n",
+ " soil_daily,\n",
+ " geometry=gpd.points_from_xy(soil_daily['Longitude'], soil_daily['Latitude']),\n",
+ " crs=\"EPSG:4326\"\n",
+ ").to_crs(\"EPSG:7844\")\n",
+ "print(f\"Soil reprojected to: {soil_gdf.crs}\")"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Urban Forest (venv)",
+ "language": "python",
+ "name": "urban-forest"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.9.6"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/Playground/AidanPage_T126/notebooks/03_spatial_joins.ipynb b/Playground/AidanPage_T126/notebooks/03_spatial_joins.ipynb
new file mode 100644
index 0000000000..0455fed8e2
--- /dev/null
+++ b/Playground/AidanPage_T126/notebooks/03_spatial_joins.ipynb
@@ -0,0 +1,186 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "b01f3375",
+ "metadata": {},
+ "source": [
+ "Spatial Joins for cleaned data"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "d5590fb7",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Trees: 82064\n",
+ "Unique microclimate sensors: 11\n",
+ "Unique soil sensors: 69\n"
+ ]
+ }
+ ],
+ "source": [
+ "import pandas as pd\n",
+ "import geopandas as gpd\n",
+ "\n",
+ "# Load cleaned data\n",
+ "trees = gpd.read_file(\"../data/processed/trees_cleaned.geojson\")\n",
+ "sensor_daily = pd.read_csv(\"../data/processed/sensors_daily.csv\")\n",
+ "soil_daily = pd.read_csv(\"../data/processed/soil_daily.csv\")\n",
+ "weather = pd.read_csv(\"../data/processed/weather_cleaned.csv\")\n",
+ "\n",
+ "# Reproject trees to EPSG:7844\n",
+ "trees = trees.to_crs(\"EPSG:7844\")\n",
+ "\n",
+ "# Get unique sensor locations (one row per sensor not per day)\n",
+ "sensor_locations = sensor_daily.drop_duplicates(subset=['sensor_location'])[['sensor_location', 'lat', 'lon']]\n",
+ "sensor_locations_gdf = gpd.GeoDataFrame(\n",
+ " sensor_locations,\n",
+ " geometry=gpd.points_from_xy(sensor_locations['lon'], sensor_locations['lat']),\n",
+ " crs=\"EPSG:4326\"\n",
+ ").to_crs(\"EPSG:7844\")\n",
+ "\n",
+ "# Get unique soil sensor locations\n",
+ "soil_locations = soil_daily.drop_duplicates(subset=['site_id'])[['site_id', 'Latitude', 'Longitude']]\n",
+ "soil_locations_gdf = gpd.GeoDataFrame(\n",
+ " soil_locations,\n",
+ " geometry=gpd.points_from_xy(soil_locations['Longitude'], soil_locations['Latitude']),\n",
+ " crs=\"EPSG:4326\"\n",
+ ").to_crs(\"EPSG:7844\")\n",
+ "\n",
+ "print(f\"Trees: {trees.shape[0]}\")\n",
+ "print(f\"Unique microclimate sensors: {sensor_locations_gdf.shape[0]}\")\n",
+ "print(f\"Unique soil sensors: {soil_locations_gdf.shape[0]}\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "b7378485",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/geopandas/array.py:403: UserWarning: Geometry is in a geographic CRS. Results from 'sjoin_nearest' are likely incorrect. Use 'GeoSeries.to_crs()' to re-project geometries to a projected CRS before this operation.\n",
+ "\n",
+ " warnings.warn(\n",
+ "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/geopandas/array.py:403: UserWarning: Geometry is in a geographic CRS. Results from 'sjoin_nearest' are likely incorrect. Use 'GeoSeries.to_crs()' to re-project geometries to a projected CRS before this operation.\n",
+ "\n",
+ " warnings.warn(\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Trees with nearest microclimate sensor: (82064, 21)\n",
+ "\n",
+ "Distance to nearest sensor (metres):\n",
+ "count 82064.000000\n",
+ "mean 0.014189\n",
+ "std 0.010299\n",
+ "min 0.000022\n",
+ "25% 0.006639\n",
+ "50% 0.011951\n",
+ "75% 0.017868\n",
+ "max 0.052376\n",
+ "Name: sensor_distance_m, dtype: float64\n",
+ "\n",
+ "Trees with both sensors: (82064, 23)\n",
+ "\n",
+ "Distance to nearest soil sensor (metres):\n",
+ "count 82064.000000\n",
+ "mean 0.006912\n",
+ "std 0.007596\n",
+ "min 0.000003\n",
+ "25% 0.001781\n",
+ "50% 0.003806\n",
+ "75% 0.008004\n",
+ "max 0.041724\n",
+ "Name: soil_sensor_distance_m, dtype: float64\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Join each tree to its nearest microclimate sensor\n",
+ "trees_with_sensors = gpd.sjoin_nearest(\n",
+ " trees,\n",
+ " sensor_locations_gdf[['sensor_location', 'geometry']],\n",
+ " how='left',\n",
+ " distance_col='sensor_distance_m'\n",
+ ")\n",
+ "\n",
+ "print(f\"Trees with nearest microclimate sensor: {trees_with_sensors.shape}\")\n",
+ "print(f\"\\nDistance to nearest sensor (metres):\")\n",
+ "print(trees_with_sensors['sensor_distance_m'].describe())\n",
+ "\n",
+ "# Join each tree to its nearest soil sensor\n",
+ "trees_with_sensors = gpd.sjoin_nearest(\n",
+ " trees_with_sensors.drop(columns=['index_right']),\n",
+ " soil_locations_gdf[['site_id', 'geometry']],\n",
+ " how='left',\n",
+ " distance_col='soil_sensor_distance_m'\n",
+ ")\n",
+ "\n",
+ "print(f\"\\nTrees with both sensors: {trees_with_sensors.shape}\")\n",
+ "print(f\"\\nDistance to nearest soil sensor (metres):\")\n",
+ "print(trees_with_sensors['soil_sensor_distance_m'].describe())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "bb95212d",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Columns:\n",
+ "['com_id', 'common_name', 'scientific_name', 'genus', 'family', 'diameter_breast_height', 'year_planted', 'date_planted', 'age_description', 'useful_life_expectency', 'useful_life_expectency_value', 'precinct', 'located_in', 'latitude', 'longitude', 'risk_class', 'tree_age', 'geometry', 'sensor_location', 'sensor_distance_m', 'index_right', 'site_id', 'soil_sensor_distance_m']\n",
+ "\n",
+ "Saved: (82064, 23)\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Check columns \n",
+ "print(\"Columns:\")\n",
+ "print(trees_with_sensors.columns.tolist())\n",
+ "\n",
+ "# Save the joined tree data\n",
+ "trees_with_sensors.to_file(\"../data/processed/trees_with_sensors.geojson\", driver=\"GeoJSON\")\n",
+ "print(f\"\\nSaved: {trees_with_sensors.shape}\")"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Urban Forest (venv)",
+ "language": "python",
+ "name": "urban-forest"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.9.6"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/Playground/AidanPage_T126/notebooks/04_feature_engineering.ipynb b/Playground/AidanPage_T126/notebooks/04_feature_engineering.ipynb
new file mode 100644
index 0000000000..1b0e6ccf57
--- /dev/null
+++ b/Playground/AidanPage_T126/notebooks/04_feature_engineering.ipynb
@@ -0,0 +1,350 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "763bbfb5",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Trees: (82064, 23)\n",
+ "Weather: (4679, 3)\n",
+ "Sensor daily: (6443, 6)\n",
+ "Soil daily: (33115, 5)\n"
+ ]
+ }
+ ],
+ "source": [
+ "import pandas as pd\n",
+ "import geopandas as gpd\n",
+ "\n",
+ "# Load joined tree data\n",
+ "trees = gpd.read_file(\"../data/processed/trees_with_sensors.geojson\")\n",
+ "\n",
+ "# Load weather and sensor daily data\n",
+ "weather = pd.read_csv(\"../data/processed/weather_cleaned.csv\")\n",
+ "weather['date'] = pd.to_datetime(weather['date'])\n",
+ "\n",
+ "sensor_daily = pd.read_csv(\"../data/processed/sensors_daily.csv\")\n",
+ "soil_daily = pd.read_csv(\"../data/processed/soil_daily.csv\")\n",
+ "\n",
+ "print(f\"Trees: {trees.shape}\")\n",
+ "print(f\"Weather: {weather.shape}\")\n",
+ "print(f\"Sensor daily: {sensor_daily.shape}\")\n",
+ "print(f\"Soil daily: {soil_daily.shape}\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "2ff007b7",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Weather features created:\n",
+ " date max_temp avg_temp_7d days_since_rain consec_hot_days \\\n",
+ "4669 2026-03-15 26.2 24.364286 1 0 \n",
+ "4670 2026-03-16 19.7 22.907143 2 0 \n",
+ "4671 2026-03-17 21.1 21.635714 0 0 \n",
+ "4672 2026-03-18 21.1 21.185714 0 0 \n",
+ "4673 2026-03-19 19.7 21.357143 0 0 \n",
+ "4674 2026-03-20 20.8 21.528571 1 0 \n",
+ "4675 2026-03-21 21.8 21.485714 2 0 \n",
+ "4676 2026-03-22 30.2 22.057143 3 0 \n",
+ "4677 2026-03-23 27.5 23.171429 4 0 \n",
+ "4678 2026-03-24 27.5 24.085714 5 0 \n",
+ "\n",
+ " heatwave_flag \n",
+ "4669 0 \n",
+ "4670 0 \n",
+ "4671 0 \n",
+ "4672 0 \n",
+ "4673 0 \n",
+ "4674 0 \n",
+ "4675 0 \n",
+ "4676 0 \n",
+ "4677 0 \n",
+ "4678 0 \n"
+ ]
+ }
+ ],
+ "source": [
+ "# Weather feature engineering\n",
+ "weather = weather.sort_values('date')\n",
+ "\n",
+ "# Rolling temperature averages\n",
+ "weather['avg_temp_7d'] = weather['max_temp'].rolling(7).mean()\n",
+ "weather['avg_temp_14d'] = weather['max_temp'].rolling(14).mean()\n",
+ "weather['avg_temp_30d'] = weather['max_temp'].rolling(30).mean()\n",
+ "\n",
+ "# Days since last rainfall\n",
+ "weather['had_rain'] = (weather['rainfall_mm'] > 0).astype(int)\n",
+ "rain_groups = (weather['had_rain'] != weather['had_rain'].shift()).cumsum()\n",
+ "weather['days_since_rain'] = weather.groupby(rain_groups).cumcount()\n",
+ "weather.loc[weather['had_rain'] == 1, 'days_since_rain'] = 0\n",
+ "\n",
+ "# Heatwave features\n",
+ "weather['above_35'] = (weather['max_temp'] >= 35).astype(int)\n",
+ "weather['heat_degrees'] = (weather['max_temp'] - 35).clip(lower=0)\n",
+ "weather['heat_degree_days_14d'] = weather['heat_degrees'].rolling(14).sum()\n",
+ "\n",
+ "# Consecutive hot days\n",
+ "hot_groups = (weather['above_35'] != weather['above_35'].shift()).cumsum()\n",
+ "weather['consec_hot_days'] = weather.groupby(hot_groups).cumcount() + 1\n",
+ "weather.loc[weather['above_35'] == 0, 'consec_hot_days'] = 0\n",
+ "weather['heatwave_flag'] = (weather['consec_hot_days'] >= 3).astype(int)\n",
+ "\n",
+ "print(\"Weather features created:\")\n",
+ "print(weather[['date', 'max_temp', 'avg_temp_7d', 'days_since_rain', 'consec_hot_days', 'heatwave_flag']].tail(10))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "166c8dae",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Trees after sensor merge: (82064, 25)\n",
+ "\n",
+ "Missing sensor values:\n",
+ "sensor_avg_temp 0\n",
+ "sensor_avg_humidity 0\n",
+ "dtype: int64\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Summarise sensor data per location\n",
+ "sensor_summary = sensor_daily.groupby('sensor_location').agg({\n",
+ " 'avg_temp': 'mean',\n",
+ " 'avg_humidity': 'mean'\n",
+ "}).reset_index()\n",
+ "\n",
+ "sensor_summary.columns = ['sensor_location', 'sensor_avg_temp', 'sensor_avg_humidity']\n",
+ "\n",
+ "# Merge to trees\n",
+ "trees = trees.merge(sensor_summary, on='sensor_location', how='left')\n",
+ "\n",
+ "print(f\"Trees after sensor merge: {trees.shape}\")\n",
+ "print(f\"\\nMissing sensor values:\")\n",
+ "print(trees[['sensor_avg_temp', 'sensor_avg_humidity']].isnull().sum())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "cc18a967",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Trees after soil merge: (82064, 26)\n",
+ "\n",
+ "Missing soil values:\n",
+ "avg_soil_moisture 0\n",
+ "dtype: int64\n"
+ ]
+ }
+ ],
+ "source": [
+ "#Summarise soil data per site\n",
+ "soil_summary = soil_daily.groupby('site_id').agg({\n",
+ " 'avg_soil_moisture': 'mean'\n",
+ "}).reset_index()\n",
+ "\n",
+ "soil_summary.columns = ['site_id', 'avg_soil_moisture']\n",
+ "\n",
+ "# Merge to trees\n",
+ "trees = trees.merge(soil_summary, on='site_id', how='left')\n",
+ "\n",
+ "print(f\"Trees after soil merge: {trees.shape}\")\n",
+ "print(f\"\\nMissing soil values:\")\n",
+ "print(trees[['avg_soil_moisture']].isnull().sum())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "70ab116f",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Trees with all features: (82064, 33)\n",
+ "\n",
+ "Weather snapshot date: 2026-03-24 00:00:00\n",
+ "Max temp: 27.5°C\n",
+ "7-day avg: 24.1°C\n",
+ "Days since rain: 5\n",
+ "Heatwave: No\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Get most recent weather features\n",
+ "latest_weather = weather.dropna().tail(1).squeeze()\n",
+ "\n",
+ "# Add weather features to all trees (same weather applies city-wide)\n",
+ "trees['max_temp_latest'] = latest_weather['max_temp']\n",
+ "trees['avg_temp_7d'] = latest_weather['avg_temp_7d']\n",
+ "trees['avg_temp_14d'] = latest_weather['avg_temp_14d']\n",
+ "trees['avg_temp_30d'] = latest_weather['avg_temp_30d']\n",
+ "trees['days_since_rain'] = latest_weather['days_since_rain']\n",
+ "trees['heat_degree_days_14d'] = latest_weather['heat_degree_days_14d']\n",
+ "trees['heatwave_flag'] = latest_weather['heatwave_flag']\n",
+ "\n",
+ "print(f\"Trees with all features: {trees.shape}\")\n",
+ "print(f\"\\nWeather snapshot date: {latest_weather['date']}\")\n",
+ "print(f\"Max temp: {latest_weather['max_temp']}°C\")\n",
+ "print(f\"7-day avg: {latest_weather['avg_temp_7d']:.1f}°C\")\n",
+ "print(f\"Days since rain: {int(latest_weather['days_since_rain'])}\")\n",
+ "print(f\"Heatwave: {'Yes' if latest_weather['heatwave_flag'] else 'No'}\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "db389631",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Feature table shape: (82064, 24)\n",
+ "\n",
+ "Missing values:\n",
+ "com_id 0\n",
+ "common_name 0\n",
+ "scientific_name 0\n",
+ "genus 0\n",
+ "family 0\n",
+ "diameter_breast_height 0\n",
+ "year_planted 0\n",
+ "tree_age 0\n",
+ "age_description 0\n",
+ "latitude 0\n",
+ "longitude 0\n",
+ "precinct 0\n",
+ "sensor_avg_temp 0\n",
+ "sensor_avg_humidity 0\n",
+ "avg_soil_moisture 0\n",
+ "max_temp_latest 0\n",
+ "avg_temp_7d 0\n",
+ "avg_temp_14d 0\n",
+ "avg_temp_30d 0\n",
+ "days_since_rain 0\n",
+ "heat_degree_days_14d 0\n",
+ "heatwave_flag 0\n",
+ "useful_life_expectency_value 0\n",
+ "risk_class 0\n",
+ "dtype: int64\n",
+ "\n",
+ "Target distribution:\n",
+ "risk_class\n",
+ "LOW 56003\n",
+ "MEDIUM 21115\n",
+ "HIGH 4946\n",
+ "Name: count, dtype: int64\n"
+ ]
+ }
+ ],
+ "source": [
+ "#Create final feature table\n",
+ "feature_table = trees[[\n",
+ " # Tree ID\n",
+ " 'com_id',\n",
+ " \n",
+ " # Tree characteristics\n",
+ " 'common_name', 'scientific_name', 'genus', 'family',\n",
+ " 'diameter_breast_height', 'year_planted', 'tree_age',\n",
+ " 'age_description',\n",
+ " \n",
+ " # Location\n",
+ " 'latitude', 'longitude', 'precinct',\n",
+ " \n",
+ " # Microclimate sensor features\n",
+ " 'sensor_avg_temp', 'sensor_avg_humidity',\n",
+ " \n",
+ " # Soil features\n",
+ " 'avg_soil_moisture',\n",
+ " \n",
+ " # Weather features\n",
+ " 'max_temp_latest', 'avg_temp_7d', 'avg_temp_14d', 'avg_temp_30d',\n",
+ " 'days_since_rain', 'heat_degree_days_14d', 'heatwave_flag',\n",
+ " \n",
+ " # Target variable\n",
+ " 'useful_life_expectency_value', 'risk_class'\n",
+ "]].copy()\n",
+ "\n",
+ "print(f\"Feature table shape: {feature_table.shape}\")\n",
+ "print(f\"\\nMissing values:\\n{feature_table.isnull().sum()}\")\n",
+ "print(f\"\\nTarget distribution:\\n{feature_table['risk_class'].value_counts()}\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "3a47f277",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Feature table saved!\n",
+ "\n",
+ "Final summary:\n",
+ " Rows: 82064\n",
+ " Features: 22 (excluding target columns)\n",
+ " Target classes: {'LOW': 56003, 'MEDIUM': 21115, 'HIGH': 4946}\n"
+ ]
+ }
+ ],
+ "source": [
+ "#Save feature table\n",
+ "feature_table.to_csv(\"../data/processed/feature_table.csv\", index=False)\n",
+ "print(\"Feature table saved!\")\n",
+ "print(f\"\\nFinal summary:\")\n",
+ "print(f\" Rows: {feature_table.shape[0]}\")\n",
+ "print(f\" Features: {feature_table.shape[1] - 2} (excluding target columns)\")\n",
+ "print(f\" Target classes: {feature_table['risk_class'].value_counts().to_dict()}\")"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Urban Forest (venv)",
+ "language": "python",
+ "name": "urban-forest"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.9.6"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/Playground/AidanPage_T126/notebooks/05_ml_model.ipynb b/Playground/AidanPage_T126/notebooks/05_ml_model.ipynb
new file mode 100644
index 0000000000..e593694fcc
--- /dev/null
+++ b/Playground/AidanPage_T126/notebooks/05_ml_model.ipynb
@@ -0,0 +1,279 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "0ff06e1f",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Shape: (82064, 24)\n",
+ "\n",
+ "Target distribution:\n",
+ "risk_class\n",
+ "LOW 56003\n",
+ "MEDIUM 21115\n",
+ "HIGH 4946\n",
+ "Name: count, dtype: int64\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " com_id | \n",
+ " common_name | \n",
+ " scientific_name | \n",
+ " genus | \n",
+ " family | \n",
+ " diameter_breast_height | \n",
+ " year_planted | \n",
+ " tree_age | \n",
+ " age_description | \n",
+ " latitude | \n",
+ " ... | \n",
+ " avg_soil_moisture | \n",
+ " max_temp_latest | \n",
+ " avg_temp_7d | \n",
+ " avg_temp_14d | \n",
+ " avg_temp_30d | \n",
+ " days_since_rain | \n",
+ " heat_degree_days_14d | \n",
+ " heatwave_flag | \n",
+ " useful_life_expectency_value | \n",
+ " risk_class | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 1070378 | \n",
+ " Tulip Tree | \n",
+ " Liriodendron tulipifera | \n",
+ " Liriodendron | \n",
+ " Magnoliaceae | \n",
+ " 20.0 | \n",
+ " 2006 | \n",
+ " 20 | \n",
+ " Mature | \n",
+ " -37.832567 | \n",
+ " ... | \n",
+ " 39.305003 | \n",
+ " 27.5 | \n",
+ " 24.085714 | \n",
+ " 22.860714 | \n",
+ " 24.305 | \n",
+ " 5 | \n",
+ " 0.0 | \n",
+ " 0 | \n",
+ " 50 | \n",
+ " LOW | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 1070382 | \n",
+ " Tulip Tree | \n",
+ " Liriodendron tulipifera | \n",
+ " Liriodendron | \n",
+ " Magnoliaceae | \n",
+ " 21.0 | \n",
+ " 2006 | \n",
+ " 20 | \n",
+ " Mature | \n",
+ " -37.831669 | \n",
+ " ... | \n",
+ " 39.305003 | \n",
+ " 27.5 | \n",
+ " 24.085714 | \n",
+ " 22.860714 | \n",
+ " 24.305 | \n",
+ " 5 | \n",
+ " 0.0 | \n",
+ " 0 | \n",
+ " 50 | \n",
+ " LOW | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 1796650 | \n",
+ " Cook pine | \n",
+ " Araucaria columnaris | \n",
+ " Araucaria | \n",
+ " Araucariaceae | \n",
+ " 50.0 | \n",
+ " 2020 | \n",
+ " 6 | \n",
+ " Semi-mature | \n",
+ " -37.802222 | \n",
+ " ... | \n",
+ " 29.021206 | \n",
+ " 27.5 | \n",
+ " 24.085714 | \n",
+ " 22.860714 | \n",
+ " 24.305 | \n",
+ " 5 | \n",
+ " 0.0 | \n",
+ " 0 | \n",
+ " 30 | \n",
+ " MEDIUM | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 1457913 | \n",
+ " Yellow Box | \n",
+ " Eucalyptus melliodora | \n",
+ " Eucalyptus | \n",
+ " Myrtaceae | \n",
+ " 25.0 | \n",
+ " 2010 | \n",
+ " 16 | \n",
+ " Mature | \n",
+ " -37.797537 | \n",
+ " ... | \n",
+ " 17.674442 | \n",
+ " 27.5 | \n",
+ " 24.085714 | \n",
+ " 22.860714 | \n",
+ " 24.305 | \n",
+ " 5 | \n",
+ " 0.0 | \n",
+ " 0 | \n",
+ " 50 | \n",
+ " LOW | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 1457915 | \n",
+ " Yellow Box | \n",
+ " Eucalyptus melliodora | \n",
+ " Eucalyptus | \n",
+ " Myrtaceae | \n",
+ " 22.0 | \n",
+ " 2010 | \n",
+ " 16 | \n",
+ " Mature | \n",
+ " -37.797540 | \n",
+ " ... | \n",
+ " 17.674442 | \n",
+ " 27.5 | \n",
+ " 24.085714 | \n",
+ " 22.860714 | \n",
+ " 24.305 | \n",
+ " 5 | \n",
+ " 0.0 | \n",
+ " 0 | \n",
+ " 50 | \n",
+ " LOW | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
5 rows × 24 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " com_id common_name scientific_name genus family \\\n",
+ "0 1070378 Tulip Tree Liriodendron tulipifera Liriodendron Magnoliaceae \n",
+ "1 1070382 Tulip Tree Liriodendron tulipifera Liriodendron Magnoliaceae \n",
+ "2 1796650 Cook pine Araucaria columnaris Araucaria Araucariaceae \n",
+ "3 1457913 Yellow Box Eucalyptus melliodora Eucalyptus Myrtaceae \n",
+ "4 1457915 Yellow Box Eucalyptus melliodora Eucalyptus Myrtaceae \n",
+ "\n",
+ " diameter_breast_height year_planted tree_age age_description latitude \\\n",
+ "0 20.0 2006 20 Mature -37.832567 \n",
+ "1 21.0 2006 20 Mature -37.831669 \n",
+ "2 50.0 2020 6 Semi-mature -37.802222 \n",
+ "3 25.0 2010 16 Mature -37.797537 \n",
+ "4 22.0 2010 16 Mature -37.797540 \n",
+ "\n",
+ " ... avg_soil_moisture max_temp_latest avg_temp_7d avg_temp_14d \\\n",
+ "0 ... 39.305003 27.5 24.085714 22.860714 \n",
+ "1 ... 39.305003 27.5 24.085714 22.860714 \n",
+ "2 ... 29.021206 27.5 24.085714 22.860714 \n",
+ "3 ... 17.674442 27.5 24.085714 22.860714 \n",
+ "4 ... 17.674442 27.5 24.085714 22.860714 \n",
+ "\n",
+ " avg_temp_30d days_since_rain heat_degree_days_14d heatwave_flag \\\n",
+ "0 24.305 5 0.0 0 \n",
+ "1 24.305 5 0.0 0 \n",
+ "2 24.305 5 0.0 0 \n",
+ "3 24.305 5 0.0 0 \n",
+ "4 24.305 5 0.0 0 \n",
+ "\n",
+ " useful_life_expectency_value risk_class \n",
+ "0 50 LOW \n",
+ "1 50 LOW \n",
+ "2 30 MEDIUM \n",
+ "3 50 LOW \n",
+ "4 50 LOW \n",
+ "\n",
+ "[5 rows x 24 columns]"
+ ]
+ },
+ "execution_count": 2,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "import pandas as pd\n",
+ "import numpy as np\n",
+ "from sklearn.model_selection import train_test_split, cross_val_score\n",
+ "from sklearn.preprocessing import LabelEncoder\n",
+ "from sklearn.metrics import classification_report, confusion_matrix\n",
+ "from sklearn.linear_model import LogisticRegression\n",
+ "from sklearn.tree import DecisionTreeClassifier\n",
+ "import warnings\n",
+ "warnings.filterwarnings('ignore')\n",
+ "\n",
+ "# Load feature table\n",
+ "df = pd.read_csv(\"../data/processed/feature_table.csv\")\n",
+ "\n",
+ "print(f\"Shape: {df.shape}\")\n",
+ "print(f\"\\nTarget distribution:\")\n",
+ "print(df['risk_class'].value_counts())\n",
+ "df.head()"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Urban Forest (venv)",
+ "language": "python",
+ "name": "urban-forest"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.9.6"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/Playground/AidanPage_T126/requirements.txt b/Playground/AidanPage_T126/requirements.txt
new file mode 100644
index 0000000000..507ba864bb
--- /dev/null
+++ b/Playground/AidanPage_T126/requirements.txt
@@ -0,0 +1,120 @@
+anyio==4.12.1
+appnope==0.1.4
+argon2-cffi==25.1.0
+argon2-cffi-bindings==25.1.0
+arrow==1.4.0
+asttokens==3.0.1
+async-lru==2.0.5
+attrs==25.4.0
+babel==2.18.0
+beautifulsoup4==4.14.3
+bleach==6.2.0
+branca==0.8.2
+certifi==2026.2.25
+cffi==2.0.0
+charset-normalizer==3.4.6
+comm==0.2.3
+contourpy==1.3.0
+cycler==0.12.1
+debugpy==1.8.20
+decorator==5.2.1
+defusedxml==0.7.1
+exceptiongroup==1.3.1
+executing==2.2.1
+fastjsonschema==2.21.2
+folium==0.20.0
+fonttools==4.60.2
+fqdn==1.5.1
+geopandas==1.0.1
+h11==0.16.0
+httpcore==1.0.9
+httpx==0.28.1
+idna==3.11
+importlib_metadata==8.7.1
+importlib_resources==6.5.2
+ipykernel==6.31.0
+ipython==8.18.1
+ipywidgets==8.1.8
+isoduration==20.11.0
+jedi==0.19.2
+Jinja2==3.1.6
+json5==0.13.0
+jsonpointer==3.0.0
+jsonschema==4.25.1
+jsonschema-specifications==2025.9.1
+jupyter==1.1.1
+jupyter-console==6.6.3
+jupyter-events==0.12.0
+jupyter-lsp==2.3.0
+jupyter_client==8.6.3
+jupyter_core==5.8.1
+jupyter_server==2.17.0
+jupyter_server_terminals==0.5.4
+jupyterlab==4.5.6
+jupyterlab_pygments==0.3.0
+jupyterlab_server==2.28.0
+jupyterlab_widgets==3.0.16
+kiwisolver==1.4.7
+lark==1.3.1
+MarkupSafe==3.0.3
+matplotlib==3.9.4
+matplotlib-inline==0.2.1
+mistune==3.2.0
+nbclient==0.10.2
+nbconvert==7.17.0
+nbformat==5.10.4
+nest-asyncio==1.6.0
+notebook==7.5.5
+notebook_shim==0.2.4
+numpy==2.0.2
+overrides==7.7.0
+packaging==26.0
+pandas==2.3.3
+pandocfilters==1.5.1
+parso==0.8.6
+pexpect==4.9.0
+pillow==11.3.0
+platformdirs==4.4.0
+prometheus_client==0.24.1
+prompt_toolkit==3.0.52
+psutil==7.2.2
+ptyprocess==0.7.0
+pure_eval==0.2.3
+pycparser==2.23
+Pygments==2.19.2
+pyogrio==0.11.1
+pyparsing==3.3.2
+pyproj==3.6.1
+python-dateutil==2.9.0.post0
+python-json-logger==4.0.0
+pytz==2026.1.post1
+PyYAML==6.0.3
+pyzmq==27.1.0
+referencing==0.36.2
+requests==2.32.5
+rfc3339-validator==0.1.4
+rfc3986-validator==0.1.1
+rfc3987-syntax==1.1.0
+rpds-py==0.27.1
+seaborn==0.13.2
+Send2Trash==2.1.0
+shapely==2.0.7
+six==1.17.0
+soupsieve==2.8.3
+stack-data==0.6.3
+terminado==0.18.1
+tinycss2==1.4.0
+tomli==2.4.0
+tornado==6.5.5
+traitlets==5.14.3
+typing_extensions==4.15.0
+tzdata==2025.3
+uri-template==1.3.0
+urllib3==2.6.3
+wcwidth==0.6.0
+webcolors==24.11.1
+webencodings==0.5.1
+websocket-client==1.9.0
+widgetsnbextension==4.0.15
+xyzservices==2025.11.0
+zipp==3.23.0