diff --git a/Playground/AidanPage_T126/.gitignore b/Playground/AidanPage_T126/.gitignore new file mode 100644 index 0000000000..f45af34b8b --- /dev/null +++ b/Playground/AidanPage_T126/.gitignore @@ -0,0 +1,2 @@ +venv/ +data/ diff --git a/Playground/AidanPage_T126/notebooks/01_data_exploration.ipynb b/Playground/AidanPage_T126/notebooks/01_data_exploration.ipynb new file mode 100644 index 0000000000..254cf5cb9d --- /dev/null +++ b/Playground/AidanPage_T126/notebooks/01_data_exploration.ipynb @@ -0,0 +1,1595 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "27d2046c", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "pandas: 2.3.3\n", + "geopandas: 1.0.1\n", + "Setup complete!\n" + ] + } + ], + "source": [ + "import pandas as pd\n", + "import geopandas as gpd\n", + "import matplotlib.pyplot as plt\n", + "\n", + "print(\"pandas:\", pd.__version__)\n", + "print(\"geopandas:\", gpd.__version__)\n", + "print(\"Setup complete!\")" + ] + }, + { + "cell_type": "markdown", + "id": "b71d78ad", + "metadata": {}, + "source": [ + "Dataset 1: Urban Forest Tree Species " + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "3f3bbc31", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Shape: (82064, 20)\n", + "\n", + "Column names:\n", + "['com_id', 'common_name', 'scientific_name', 'genus', 'family', 'diameter_breast_height', 'year_planted', 'date_planted', 'age_description', 'useful_life_expectency', 'useful_life_expectency_value', 'precinct', 'located_in', 'uploaddate', 'latitude', 'longitude', 'easting', 'northing', 'geolocation', 'geometry']\n", + "\n", + "Data types:\n", + "com_id object\n", + "common_name object\n", + "scientific_name object\n", + "genus object\n", + "family object\n", + "diameter_breast_height float64\n", + "year_planted object\n", + "date_planted datetime64[ms]\n", + "age_description object\n", + "useful_life_expectency object\n", + "useful_life_expectency_value int32\n", + "precinct object\n", + "located_in object\n", + "uploaddate object\n", + "latitude float64\n", + "longitude float64\n", + "easting object\n", + "northing object\n", + "geolocation object\n", + "geometry geometry\n", + "dtype: object\n", + "\n", + "First 5 rows:\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
com_idcommon_namescientific_namegenusfamilydiameter_breast_heightyear_planteddate_plantedage_descriptionuseful_life_expectencyuseful_life_expectency_valueprecinctlocated_inuploaddatelatitudelongitudeeastingnorthinggeolocationgeometry
01070378Tulip TreeLiriodendron tulipiferaLiriodendronMagnoliaceae20.020062006-12-15Mature> 41 years50South YarraStreetNone-37.832567144.986879322843.145810852.35{ \"lon\": 144.98687896999999, \"lat\": -37.832567...POINT (144.98688 -37.83257)
11070382Tulip TreeLiriodendron tulipiferaLiriodendronMagnoliaceae21.020062006-12-15Mature> 41 years50South YarraStreetNone-37.831669144.987059322856.85810952.41{ \"lon\": 144.98705856999999, \"lat\": -37.831668...POINT (144.98706 -37.83167)
21796650Cook pineAraucaria columnarisAraucariaAraucariaceaeNaN20202020-12-14Semi-mature21 - 30 years30CarltonParkNone-37.802222144.962852320655.275814177.03{ \"lon\": 144.96285247, \"lat\": -37.80222191 }POINT (144.96285 -37.80222)
31457913Yellow BoxEucalyptus melliodoraEucalyptusMyrtaceae25.020102010-12-14Mature> 41 years50KensingtonParkNone-37.797537144.923519317180.375814617.41{ \"lon\": 144.92351884999999, \"lat\": -37.797537...POINT (144.92352 -37.79754)
41457915Yellow BoxEucalyptus melliodoraEucalyptusMyrtaceae22.020102010-12-14Mature> 41 years50KensingtonParkNone-37.797540144.923459317175.135814617.01{ \"lon\": 144.92345922000001, \"lat\": -37.797539...POINT (144.92346 -37.79754)
\n", + "
" + ], + "text/plain": [ + " com_id common_name scientific_name genus family \\\n", + "0 1070378 Tulip Tree Liriodendron tulipifera Liriodendron Magnoliaceae \n", + "1 1070382 Tulip Tree Liriodendron tulipifera Liriodendron Magnoliaceae \n", + "2 1796650 Cook pine Araucaria columnaris Araucaria Araucariaceae \n", + "3 1457913 Yellow Box Eucalyptus melliodora Eucalyptus Myrtaceae \n", + "4 1457915 Yellow Box Eucalyptus melliodora Eucalyptus Myrtaceae \n", + "\n", + " diameter_breast_height year_planted date_planted age_description \\\n", + "0 20.0 2006 2006-12-15 Mature \n", + "1 21.0 2006 2006-12-15 Mature \n", + "2 NaN 2020 2020-12-14 Semi-mature \n", + "3 25.0 2010 2010-12-14 Mature \n", + "4 22.0 2010 2010-12-14 Mature \n", + "\n", + " useful_life_expectency useful_life_expectency_value precinct \\\n", + "0 > 41 years 50 South Yarra \n", + "1 > 41 years 50 South Yarra \n", + "2 21 - 30 years 30 Carlton \n", + "3 > 41 years 50 Kensington \n", + "4 > 41 years 50 Kensington \n", + "\n", + " located_in uploaddate latitude longitude easting northing \\\n", + "0 Street None -37.832567 144.986879 322843.14 5810852.35 \n", + "1 Street None -37.831669 144.987059 322856.8 5810952.41 \n", + "2 Park None -37.802222 144.962852 320655.27 5814177.03 \n", + "3 Park None -37.797537 144.923519 317180.37 5814617.41 \n", + "4 Park None -37.797540 144.923459 317175.13 5814617.01 \n", + "\n", + " geolocation \\\n", + "0 { \"lon\": 144.98687896999999, \"lat\": -37.832567... \n", + "1 { \"lon\": 144.98705856999999, \"lat\": -37.831668... \n", + "2 { \"lon\": 144.96285247, \"lat\": -37.80222191 } \n", + "3 { \"lon\": 144.92351884999999, \"lat\": -37.797537... \n", + "4 { \"lon\": 144.92345922000001, \"lat\": -37.797539... \n", + "\n", + " geometry \n", + "0 POINT (144.98688 -37.83257) \n", + "1 POINT (144.98706 -37.83167) \n", + "2 POINT (144.96285 -37.80222) \n", + "3 POINT (144.92352 -37.79754) \n", + "4 POINT (144.92346 -37.79754) " + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Load Data (Urban Forest Tree Dataset)\n", + "trees = gpd.read_file(\"../data/raw/trees-with-species-and-dimensions-urban-forest.geojson\")\n", + "\n", + "# Basic shape and info\n", + "print(f\"Shape: {trees.shape}\")\n", + "print(f\"\\nColumn names:\\n{trees.columns.tolist()}\")\n", + "print(f\"\\nData types:\\n{trees.dtypes}\")\n", + "print(f\"\\nFirst 5 rows:\")\n", + "trees.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "a43a9882", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Missing values:\n", + "com_id 0\n", + "common_name 0\n", + "scientific_name 0\n", + "genus 0\n", + "family 0\n", + "diameter_breast_height 44863\n", + "year_planted 0\n", + "date_planted 0\n", + "age_description 0\n", + "useful_life_expectency 17410\n", + "useful_life_expectency_value 0\n", + "precinct 0\n", + "located_in 0\n", + "uploaddate 82064\n", + "latitude 0\n", + "longitude 0\n", + "easting 0\n", + "northing 0\n", + "geolocation 0\n", + "geometry 0\n", + "dtype: int64\n", + "\n", + "Missing value percentages:\n", + "com_id 0.0\n", + "common_name 0.0\n", + "scientific_name 0.0\n", + "genus 0.0\n", + "family 0.0\n", + "diameter_breast_height 54.7\n", + "year_planted 0.0\n", + "date_planted 0.0\n", + "age_description 0.0\n", + "useful_life_expectency 21.2\n", + "useful_life_expectency_value 0.0\n", + "precinct 0.0\n", + "located_in 0.0\n", + "uploaddate 100.0\n", + "latitude 0.0\n", + "longitude 0.0\n", + "easting 0.0\n", + "northing 0.0\n", + "geolocation 0.0\n", + "geometry 0.0\n", + "dtype: float64\n" + ] + } + ], + "source": [ + "# Missing values\n", + "print(f\"Missing values:\\n{trees.isnull().sum()}\")\n", + "print(f\"\\nMissing value percentages:\\n{(trees.isnull().sum() / len(trees) * 100).round(1)}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "0db23ad6", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Useful Life Expectancy:\n", + "useful_life_expectency\n", + "> 41 years 22604\n", + "21 - 30 years 21115\n", + "31 - 40 years 15989\n", + "11 - 20 years 4283\n", + "< 10 years 663\n", + "Name: count, dtype: int64\n", + "\n", + "Age Description:\n", + "age_description\n", + "Mature 51090\n", + "Semi-mature 21699\n", + "Unestablished 9260\n", + "Planter box 15\n", + "Name: count, dtype: int64\n" + ] + } + ], + "source": [ + "# Key categorical distributions\n", + "print(\"Useful Life Expectancy:\")\n", + "print(trees['useful_life_expectency'].value_counts())\n", + "print(f\"\\nAge Description:\")\n", + "print(trees['age_description'].value_counts())" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "16973a58", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Useful Life Expectancy Value:\n", + "count 82064.000000\n", + "mean 41.016767\n", + "std 10.079900\n", + "min 10.000000\n", + "25% 30.000000\n", + "50% 40.000000\n", + "75% 50.000000\n", + "max 50.000000\n", + "Name: useful_life_expectency_value, dtype: float64\n", + "\n", + "Value counts:\n", + "useful_life_expectency_value\n", + "10 663\n", + "20 4283\n", + "30 21115\n", + "40 15989\n", + "50 40014\n", + "Name: count, dtype: int64\n" + ] + } + ], + "source": [ + "print(\"Useful Life Expectancy Value:\")\n", + "print(trees['useful_life_expectency_value'].describe())\n", + "print(f\"\\nValue counts:\\n{trees['useful_life_expectency_value'].value_counts().sort_index()}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "c4f02ac2", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Diameter Breast Height (non-null):\n", + "count 37201.000000\n", + "mean 35.471627\n", + "std 137.639141\n", + "min 1.000000\n", + "25% 18.000000\n", + "50% 30.000000\n", + "75% 45.000000\n", + "max 26027.000000\n", + "Name: diameter_breast_height, dtype: float64\n", + "\n", + "Top 10 species:\n", + "common_name\n", + "River red gum 8338\n", + "London Plane 5070\n", + "Drooping sheoak 3452\n", + "English Elm 3291\n", + "Yellow Box 3043\n", + "Black Wattle 3001\n", + "Spotted Gum 2902\n", + "River Sheoak 1665\n", + "Lightwood Wattle 1619\n", + "Sweet Bursaria 1574\n", + "Name: count, dtype: int64\n" + ] + } + ], + "source": [ + "# DBH distribution (for non-missing values)\n", + "print(\"Diameter Breast Height (non-null):\")\n", + "print(trees['diameter_breast_height'].describe())\n", + "\n", + "print(f\"\\nTop 10 species:\")\n", + "print(trees['common_name'].value_counts().head(10))" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "01137143", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Latitude range: -37.8505 to -37.7755\n", + "Longitude range: 144.9004 to 144.9911\n", + "\n", + "Precinct distribution:\n", + "precinct\n", + "Parkville 28567\n", + "Melbourne 9063\n", + "Kensington 7555\n", + "West Melbourne 6756\n", + "Docklands 5816\n", + "North Melbourne 4715\n", + "East Melbourne 4506\n", + "Carlton 4368\n", + "South Yarra 2631\n", + "Port Melbourne 2623\n", + "Southbank 2338\n", + "Carlton North 2323\n", + "Flemington 510\n", + "North And West Melbourne 113\n", + "Princes Hill 65\n", + "Fishermans Bend 61\n", + "Central City 26\n", + "Brunswick West 24\n", + "Richmond 2\n", + "Brunswick 2\n", + "Name: count, dtype: int64\n" + ] + } + ], + "source": [ + "# Quick spatial check\n", + "print(f\"Latitude range: {trees['latitude'].min():.4f} to {trees['latitude'].max():.4f}\")\n", + "print(f\"Longitude range: {trees['longitude'].min():.4f} to {trees['longitude'].max():.4f}\")\n", + "print(f\"\\nPrecinct distribution:\")\n", + "print(trees['precinct'].value_counts())" + ] + }, + { + "cell_type": "markdown", + "id": "6ee84c19", + "metadata": {}, + "source": [ + "Dataset 2: Microclimate Sensor Data" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "b3f25ab4", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Shape: (591803, 16)\n", + "\n", + "Column names:\n", + "['Device_id', 'Time', 'SensorLocation', 'LatLong', 'MinimumWindDirection', 'AverageWindDirection', 'MaximumWindDirection', 'MinimumWindSpeed', 'AverageWindSpeed', 'GustWindSpeed', 'AirTemperature', 'RelativeHumidity', 'AtmosphericPressure', 'PM25', 'PM10', 'Noise']\n", + "\n", + "Data types:\n", + "Device_id object\n", + "Time object\n", + "SensorLocation object\n", + "LatLong object\n", + "MinimumWindDirection float64\n", + "AverageWindDirection float64\n", + "MaximumWindDirection float64\n", + "MinimumWindSpeed float64\n", + "AverageWindSpeed float64\n", + "GustWindSpeed float64\n", + "AirTemperature float64\n", + "RelativeHumidity float64\n", + "AtmosphericPressure float64\n", + "PM25 float64\n", + "PM10 float64\n", + "Noise float64\n", + "dtype: object\n", + "\n", + "First 5 rows:\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Device_idTimeSensorLocationLatLongMinimumWindDirectionAverageWindDirectionMaximumWindDirectionMinimumWindSpeedAverageWindSpeedGustWindSpeedAirTemperatureRelativeHumidityAtmosphericPressurePM25PM10Noise
0ICTMicroclimate-092025-11-16T13:17:20+11:00SkyFarm (Jeff's Shed). Rooftop - Melbourne Con...-37.8223306, 144.95216960.0308.0359.00.01.96.615.671.61000.1000001.01.055.600000
1ICTMicroclimate-062025-11-16T03:17:34+11:00Tram Stop 7B - Melbourne Tennis Centre Precinc...-37.8194993, 144.97872110.0271.0359.00.00.42.015.278.91001.9000002.03.057.000000
2ICTMicroclimate-032025-11-16T13:14:29+11:00CH1 rooftop-37.8140348, 144.967280.0356.0350.00.00.81.915.779.0995.7000002.04.061.400000
3ICTMicroclimate-032025-08-18T01:10:39+10:00CH1 rooftop-37.8140348, 144.96728169.0183.0193.01.51.72.29.171.81018.9000001.03.074.700000
4ICTMicroclimate-012025-11-16T03:23:06+11:00Birrarung Marr Park - Pole 1131-37.8185931, 144.9716404NaN341.0NaNNaN0.2NaN14.689.01000.9000246.09.047.599998
\n", + "
" + ], + "text/plain": [ + " Device_id Time \\\n", + "0 ICTMicroclimate-09 2025-11-16T13:17:20+11:00 \n", + "1 ICTMicroclimate-06 2025-11-16T03:17:34+11:00 \n", + "2 ICTMicroclimate-03 2025-11-16T13:14:29+11:00 \n", + "3 ICTMicroclimate-03 2025-08-18T01:10:39+10:00 \n", + "4 ICTMicroclimate-01 2025-11-16T03:23:06+11:00 \n", + "\n", + " SensorLocation \\\n", + "0 SkyFarm (Jeff's Shed). Rooftop - Melbourne Con... \n", + "1 Tram Stop 7B - Melbourne Tennis Centre Precinc... \n", + "2 CH1 rooftop \n", + "3 CH1 rooftop \n", + "4 Birrarung Marr Park - Pole 1131 \n", + "\n", + " LatLong MinimumWindDirection AverageWindDirection \\\n", + "0 -37.8223306, 144.9521696 0.0 308.0 \n", + "1 -37.8194993, 144.9787211 0.0 271.0 \n", + "2 -37.8140348, 144.96728 0.0 356.0 \n", + "3 -37.8140348, 144.96728 169.0 183.0 \n", + "4 -37.8185931, 144.9716404 NaN 341.0 \n", + "\n", + " MaximumWindDirection MinimumWindSpeed AverageWindSpeed GustWindSpeed \\\n", + "0 359.0 0.0 1.9 6.6 \n", + "1 359.0 0.0 0.4 2.0 \n", + "2 350.0 0.0 0.8 1.9 \n", + "3 193.0 1.5 1.7 2.2 \n", + "4 NaN NaN 0.2 NaN \n", + "\n", + " AirTemperature RelativeHumidity AtmosphericPressure PM25 PM10 \\\n", + "0 15.6 71.6 1000.100000 1.0 1.0 \n", + "1 15.2 78.9 1001.900000 2.0 3.0 \n", + "2 15.7 79.0 995.700000 2.0 4.0 \n", + "3 9.1 71.8 1018.900000 1.0 3.0 \n", + "4 14.6 89.0 1000.900024 6.0 9.0 \n", + "\n", + " Noise \n", + "0 55.600000 \n", + "1 57.000000 \n", + "2 61.400000 \n", + "3 74.700000 \n", + "4 47.599998 " + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Load microclimate sensor data\n", + "sensors = pd.read_csv(\"../data/raw/microclimate-sensors-data.csv\")\n", + "\n", + "print(f\"Shape: {sensors.shape}\")\n", + "print(f\"\\nColumn names:\\n{sensors.columns.tolist()}\")\n", + "print(f\"\\nData types:\\n{sensors.dtypes}\")\n", + "print(f\"\\nFirst 5 rows:\")\n", + "sensors.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "8c1390d4", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Missing values:\n", + "AirTemperature 651\n", + "RelativeHumidity 651\n", + "LatLong 11483\n", + "Time 0\n", + "dtype: int64\n", + "\n", + "Air Temperature:\n", + "count 591152.000000\n", + "mean 16.296348\n", + "std 5.921106\n", + "min -0.800000\n", + "25% 12.200000\n", + "50% 15.700000\n", + "75% 19.500000\n", + "max 45.400002\n", + "Name: AirTemperature, dtype: float64\n", + "\n", + "Relative Humidity:\n", + "count 591152.000000\n", + "mean 66.278247\n", + "std 18.152886\n", + "min 4.000000\n", + "25% 54.700001\n", + "50% 67.900000\n", + "75% 79.300000\n", + "max 99.800003\n", + "Name: RelativeHumidity, dtype: float64\n", + "\n", + "Unique sensors: 12\n", + "\n", + "Sensor locations:\n", + "SensorLocation\n", + "1 Treasury Place 75821\n", + "Birrarung Marr Park - Pole 1131 61388\n", + "101 Collins St L11 Rooftop 59430\n", + "Tram Stop 7C - Melbourne Tennis Centre Precinct - Rod Laver Arena 59021\n", + "Tram Stop 7B - Melbourne Tennis Centre Precinct - Rod Laver Arena 58845\n", + "CH1 rooftop 58665\n", + "Swanston St - Tram Stop 13 adjacent Federation Sq & Flinders St Station 58237\n", + "SkyFarm (Jeff's Shed). Rooftop - Melbourne Conference & Exhibition Centre (MCEC) 53371\n", + "Enterprize Park - Pole ID: COM1667 44065\n", + "Royal Park Asset ID: COM2707 30688\n", + "Batman Park 26129\n", + "Name: count, dtype: int64\n" + ] + } + ], + "source": [ + "# Missing values for key columns\n", + "print(\"Missing values:\")\n", + "print(sensors[['AirTemperature', 'RelativeHumidity', 'LatLong', 'Time']].isnull().sum())\n", + "\n", + "# Temperature and humidity ranges\n", + "print(f\"\\nAir Temperature:\")\n", + "print(sensors['AirTemperature'].describe())\n", + "print(f\"\\nRelative Humidity:\")\n", + "print(sensors['RelativeHumidity'].describe())\n", + "\n", + "# How many unique sensors\n", + "print(f\"\\nUnique sensors: {sensors['Device_id'].nunique()}\")\n", + "print(f\"\\nSensor locations:\")\n", + "print(sensors['SensorLocation'].value_counts())" + ] + }, + { + "cell_type": "markdown", + "id": "c3dd35a0", + "metadata": {}, + "source": [ + "Dataset 3: BOM Temperature " + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "d3c6419d", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Shape: (4830, 8)\n", + "\n", + "Column names:\n", + "['Product code', 'Bureau of Meteorology station number', 'Year', 'Month', 'Day', 'Maximum temperature (Degree C)', 'Days of accumulation of maximum temperature', 'Quality']\n", + "\n", + "First 5 rows:\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Product codeBureau of Meteorology station numberYearMonthDayMaximum temperature (Degree C)Days of accumulation of maximum temperatureQuality
0IDCJAC001086338201311NaNNaNNaN
1IDCJAC001086338201312NaNNaNNaN
2IDCJAC001086338201313NaNNaNNaN
3IDCJAC001086338201314NaNNaNNaN
4IDCJAC001086338201315NaNNaNNaN
\n", + "
" + ], + "text/plain": [ + " Product code Bureau of Meteorology station number Year Month Day \\\n", + "0 IDCJAC0010 86338 2013 1 1 \n", + "1 IDCJAC0010 86338 2013 1 2 \n", + "2 IDCJAC0010 86338 2013 1 3 \n", + "3 IDCJAC0010 86338 2013 1 4 \n", + "4 IDCJAC0010 86338 2013 1 5 \n", + "\n", + " Maximum temperature (Degree C) \\\n", + "0 NaN \n", + "1 NaN \n", + "2 NaN \n", + "3 NaN \n", + "4 NaN \n", + "\n", + " Days of accumulation of maximum temperature Quality \n", + "0 NaN NaN \n", + "1 NaN NaN \n", + "2 NaN NaN \n", + "3 NaN NaN \n", + "4 NaN NaN " + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Load BOM max temperature data\n", + "bom_temp = pd.read_csv(\"../data/raw/IDCJAC0010_086338_1800_Data.csv\")\n", + "\n", + "print(f\"Shape: {bom_temp.shape}\")\n", + "print(f\"\\nColumn names:\\n{bom_temp.columns.tolist()}\")\n", + "print(f\"\\nFirst 5 rows:\")\n", + "bom_temp.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "76cfb1d8", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Missing values:\n", + "Product code 0\n", + "Bureau of Meteorology station number 0\n", + "Year 0\n", + "Month 0\n", + "Day 0\n", + "Maximum temperature (Degree C) 155\n", + "Days of accumulation of maximum temperature 155\n", + "Quality 156\n", + "dtype: int64\n", + "\n", + "Year range: 2013 to 2026\n", + "\n", + "Total rows: 4830\n", + "Rows with temperature: 4675\n", + "\n", + "Temperature stats (non-null):\n", + "count 4675.000000\n", + "mean 20.445348\n", + "std 6.067834\n", + "min 9.000000\n", + "25% 15.800000\n", + "50% 19.200000\n", + "75% 23.800000\n", + "max 43.500000\n", + "Name: Maximum temperature (Degree C), dtype: float64\n" + ] + } + ], + "source": [ + "# Missing values and date range\n", + "print(f\"Missing values:\\n{bom_temp.isnull().sum()}\")\n", + "\n", + "print(f\"\\nYear range: {bom_temp['Year'].min()} to {bom_temp['Year'].max()}\")\n", + "\n", + "# How many actual temperature readings\n", + "print(f\"\\nTotal rows: {len(bom_temp)}\")\n", + "print(f\"Rows with temperature: {bom_temp['Maximum temperature (Degree C)'].notna().sum()}\")\n", + "\n", + "print(f\"\\nTemperature stats (non-null):\")\n", + "print(bom_temp['Maximum temperature (Degree C)'].describe())" + ] + }, + { + "cell_type": "markdown", + "id": "7f55c269", + "metadata": {}, + "source": [ + "Dataset 4: BOM Rainfall" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "5125bf36", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Shape: (4831, 8)\n", + "\n", + "Column names:\n", + "['Product code', 'Bureau of Meteorology station number', 'Year', 'Month', 'Day', 'Rainfall amount (millimetres)', 'Period over which rainfall was measured (days)', 'Quality']\n", + "\n", + "Missing values:\n", + "Product code 0\n", + "Bureau of Meteorology station number 0\n", + "Year 0\n", + "Month 0\n", + "Day 0\n", + "Rainfall amount (millimetres) 157\n", + "Period over which rainfall was measured (days) 159\n", + "Quality 157\n", + "dtype: int64\n", + "\n", + "First 5 rows:\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Product codeBureau of Meteorology station numberYearMonthDayRainfall amount (millimetres)Period over which rainfall was measured (days)Quality
0IDCJAC000986338201311NaNNaNNaN
1IDCJAC000986338201312NaNNaNNaN
2IDCJAC000986338201313NaNNaNNaN
3IDCJAC000986338201314NaNNaNNaN
4IDCJAC000986338201315NaNNaNNaN
\n", + "
" + ], + "text/plain": [ + " Product code Bureau of Meteorology station number Year Month Day \\\n", + "0 IDCJAC0009 86338 2013 1 1 \n", + "1 IDCJAC0009 86338 2013 1 2 \n", + "2 IDCJAC0009 86338 2013 1 3 \n", + "3 IDCJAC0009 86338 2013 1 4 \n", + "4 IDCJAC0009 86338 2013 1 5 \n", + "\n", + " Rainfall amount (millimetres) \\\n", + "0 NaN \n", + "1 NaN \n", + "2 NaN \n", + "3 NaN \n", + "4 NaN \n", + "\n", + " Period over which rainfall was measured (days) Quality \n", + "0 NaN NaN \n", + "1 NaN NaN \n", + "2 NaN NaN \n", + "3 NaN NaN \n", + "4 NaN NaN " + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Load BOM rainfall data\n", + "bom_rain = pd.read_csv(\"../data/raw/IDCJAC0009_086338_1800_Data.csv\")\n", + "\n", + "print(f\"Shape: {bom_rain.shape}\")\n", + "print(f\"\\nColumn names:\\n{bom_rain.columns.tolist()}\")\n", + "print(f\"\\nMissing values:\\n{bom_rain.isnull().sum()}\")\n", + "print(f\"\\nFirst 5 rows:\")\n", + "bom_rain.head()" + ] + }, + { + "cell_type": "markdown", + "id": "cd2fd126", + "metadata": {}, + "source": [ + "Dataset 4: Soil Sensor Locations" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "c77d8ac7", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Soil Sensor Locations Shape: (84, 6)\n", + "\n", + "Column names:\n", + "['Site_ID', 'Site_Name', 'Property_Name', 'Latitude', 'Longitude', 'Location']\n", + "\n", + "First 5 rows:\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Site_IDSite_NameProperty_NameLatitudeLongitudeLocation
050924Fitzroy West 09Fitzroy Gardens-37.810675144.979618-37.81067469320403, 144.979618148459
1649715th FairwayRoyal Park-37.779210144.952510-37.77920999998876, 144.95251000000945
264973Argyle SquareArgyle Square-37.802902144.966011-37.8029015228744, 144.966010728849
364975McAlister OvalRoyal Park-37.778805144.956444-37.77880526150242, 144.95644368244822
464977Treasury Gardens SouthTreasury Gardens-37.814943144.976201-37.814943422335645, 144.9762009715822
\n", + "
" + ], + "text/plain": [ + " Site_ID Site_Name Property_Name Latitude Longitude \\\n", + "0 50924 Fitzroy West 09 Fitzroy Gardens -37.810675 144.979618 \n", + "1 64971 5th Fairway Royal Park -37.779210 144.952510 \n", + "2 64973 Argyle Square Argyle Square -37.802902 144.966011 \n", + "3 64975 McAlister Oval Royal Park -37.778805 144.956444 \n", + "4 64977 Treasury Gardens South Treasury Gardens -37.814943 144.976201 \n", + "\n", + " Location \n", + "0 -37.81067469320403, 144.979618148459 \n", + "1 -37.77920999998876, 144.95251000000945 \n", + "2 -37.8029015228744, 144.966010728849 \n", + "3 -37.77880526150242, 144.95644368244822 \n", + "4 -37.814943422335645, 144.9762009715822 " + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Load soil sensor locations first (small file)\n", + "soil_locations = pd.read_csv(\"../data/raw/soil-sensor-locations.csv\")\n", + "\n", + "print(f\"Soil Sensor Locations Shape: {soil_locations.shape}\")\n", + "print(f\"\\nColumn names:\\n{soil_locations.columns.tolist()}\")\n", + "print(f\"\\nFirst 5 rows:\")\n", + "soil_locations.head()" + ] + }, + { + "cell_type": "markdown", + "id": "8f4fbd6c", + "metadata": {}, + "source": [ + "Dataset 5: Soil Sensor Readings" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "6e46db1f", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Shape: (1000, 9)\n", + "\n", + "Column names:\n", + "['Local_Time', 'Site_Name', 'Site_ID', 'ID', 'Probe_ID', 'Probe_Measure', 'Soil_Value', 'Unit', 'json_featuretype']\n", + "\n", + "Data types:\n", + "Local_Time object\n", + "Site_Name object\n", + "Site_ID int64\n", + "ID int64\n", + "Probe_ID int64\n", + "Probe_Measure object\n", + "Soil_Value float64\n", + "Unit object\n", + "json_featuretype object\n", + "dtype: object\n", + "\n", + "First 5 rows:\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Local_TimeSite_NameSite_IDIDProbe_IDProbe_MeasureSoil_ValueUnitjson_featuretype
02025-04-10T01:00:00+08:00Royal Parade CSIRO88403268683502031673Soil Salinity 20cm0.51µS/cmOutput
12025-04-10T02:00:00+08:00Kings Domain South fireyard66199268683901338696Soil Salinity 60cm #01.02µS/cmOutput
22025-04-09T22:00:00+08:00Princess bridge East p06101038268684102019364Soil Temperature 20cm16.42ºCOutput
32025-04-09T22:00:00+08:00Batman Park75504268688231618930Soil Temperature 60cm #016.33ºCOutput
42025-04-09T20:00:00+08:00Royal Parade CSIRO88403268690142031672Soil Salinity 10cm0.22µS/cmOutput
\n", + "
" + ], + "text/plain": [ + " Local_Time Site_Name Site_ID ID \\\n", + "0 2025-04-10T01:00:00+08:00 Royal Parade CSIRO 88403 26868350 \n", + "1 2025-04-10T02:00:00+08:00 Kings Domain South fireyard 66199 26868390 \n", + "2 2025-04-09T22:00:00+08:00 Princess bridge East p06 101038 26868410 \n", + "3 2025-04-09T22:00:00+08:00 Batman Park 75504 26868823 \n", + "4 2025-04-09T20:00:00+08:00 Royal Parade CSIRO 88403 26869014 \n", + "\n", + " Probe_ID Probe_Measure Soil_Value Unit json_featuretype \n", + "0 2031673 Soil Salinity 20cm 0.51 µS/cm Output \n", + "1 1338696 Soil Salinity 60cm #0 1.02 µS/cm Output \n", + "2 2019364 Soil Temperature 20cm 16.42 ºC Output \n", + "3 1618930 Soil Temperature 60cm #0 16.33 ºC Output \n", + "4 2031672 Soil Salinity 10cm 0.22 µS/cm Output " + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Load just the first 1000 rows to check structure\n", + "soil_sample = pd.read_csv(\"../data/raw/soil-sensor-readings-historical-data.csv\", nrows=1000)\n", + "\n", + "print(f\"Shape: {soil_sample.shape}\")\n", + "print(f\"\\nColumn names:\\n{soil_sample.columns.tolist()}\")\n", + "print(f\"\\nData types:\\n{soil_sample.dtypes}\")\n", + "print(f\"\\nFirst 5 rows:\")\n", + "soil_sample.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "17e7f1f6", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Measurement types:\n", + "Probe_Measure\n", + "Soil Moisture 40cm #0 37\n", + "Soil Moisture 30cm #0 32\n", + "Temperature 28\n", + "Soil Moisture 20cm #0 26\n", + "Soil Moisture 10cm #0 26\n", + " ..\n", + "Soil Moisture 80cm #0 (Sandy Loam) 1\n", + "Soil Moisture 70cm #0 (Loam) 1\n", + "Soil Salinity 80cm 1\n", + "Soil Moisture 50cm #0 (Sand) 1\n", + "Soil Moisture 60cm (Adjusted) 1\n", + "Name: count, Length: 98, dtype: int64\n", + "\n", + "Units:\n", + "Unit\n", + "%VWC 500\n", + "ºC 303\n", + "µS/cm 197\n", + "Name: count, dtype: int64\n" + ] + } + ], + "source": [ + "print(\"Measurement types:\")\n", + "print(soil_sample['Probe_Measure'].value_counts())\n", + "\n", + "print(f\"\\nUnits:\")\n", + "print(soil_sample['Unit'].value_counts())" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Urban Forest (venv)", + "language": "python", + "name": "urban-forest" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/Playground/AidanPage_T126/notebooks/02_data_cleaning.ipynb b/Playground/AidanPage_T126/notebooks/02_data_cleaning.ipynb new file mode 100644 index 0000000000..c4d7d21c3b --- /dev/null +++ b/Playground/AidanPage_T126/notebooks/02_data_cleaning.ipynb @@ -0,0 +1,1107 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "47819991", + "metadata": {}, + "source": [ + "Tree Species Data " + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "96fe4022", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Trees loaded: (82064, 20)\n" + ] + } + ], + "source": [ + "import pandas as pd\n", + "import geopandas as gpd\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "\n", + "# Load trees\n", + "trees = gpd.read_file(\"../data/raw/trees-with-species-and-dimensions-urban-forest.geojson\")\n", + "\n", + "print(f\"Trees loaded: {trees.shape}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "0d150416", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "DBH before cleaning:\n", + " Trees with DBH > 200cm: 54\n", + " Trees with DBH > 100cm: 482\n", + " Missing DBH: 44863\n", + "\n", + "DBH after capping outliers:\n", + "count 37147.000000\n", + "mean 34.354968\n", + "std 23.819303\n", + "min 1.000000\n", + "25% 18.000000\n", + "50% 30.000000\n", + "75% 45.000000\n", + "max 200.000000\n", + "Name: diameter_breast_height, dtype: float64\n" + ] + } + ], + "source": [ + "# Step 1: Drop unnecessary columns\n", + "trees = trees.drop(columns=['uploaddate', 'easting', 'northing', 'geolocation'])\n", + "\n", + "# Step 2: Check DBH outliers before fixing\n", + "print(\"DBH before cleaning:\")\n", + "print(f\" Trees with DBH > 200cm: {(trees['diameter_breast_height'] > 200).sum()}\")\n", + "print(f\" Trees with DBH > 100cm: {(trees['diameter_breast_height'] > 100).sum()}\")\n", + "print(f\" Missing DBH: {trees['diameter_breast_height'].isnull().sum()}\")\n", + "\n", + "# Cap DBH at 200cm \n", + "trees.loc[trees['diameter_breast_height'] > 200, 'diameter_breast_height'] = None\n", + "\n", + "print(f\"\\nDBH after capping outliers:\")\n", + "print(trees['diameter_breast_height'].describe())" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "99a99888", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Still missing after species median fill: 1946\n", + "Filled remaining with overall median: 21.5\n", + "\n", + "Final DBH stats:\n", + "count 82064.000000\n", + "mean 29.084885\n", + "std 18.490648\n", + "min 1.000000\n", + "25% 20.000000\n", + "50% 21.500000\n", + "75% 35.000000\n", + "max 200.000000\n", + "Name: diameter_breast_height, dtype: float64\n", + "\n", + "Missing DBH: 0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/numpy/lib/_nanfunctions_impl.py:1231: RuntimeWarning: Mean of empty slice\n", + " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n" + ] + } + ], + "source": [ + "# Step 3: Fill missing DBH with species median\n", + "trees['diameter_breast_height'] = trees.groupby('common_name')['diameter_breast_height'].transform(\n", + " lambda x: x.fillna(x.median())\n", + ")\n", + "\n", + "# Check if any are still missing (species where ALL trees had missing DBH)\n", + "remaining_missing = trees['diameter_breast_height'].isnull().sum()\n", + "print(f\"Still missing after species median fill: {remaining_missing}\")\n", + "\n", + "# Fill any remaining with the overall median\n", + "if remaining_missing > 0:\n", + " overall_median = trees['diameter_breast_height'].median()\n", + " trees['diameter_breast_height'] = trees['diameter_breast_height'].fillna(overall_median)\n", + " print(f\"Filled remaining with overall median: {overall_median}\")\n", + "\n", + "print(f\"\\nFinal DBH stats:\")\n", + "print(trees['diameter_breast_height'].describe())\n", + "print(f\"\\nMissing DBH: {trees['diameter_breast_height'].isnull().sum()}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "3f11db91", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Risk class distribution:\n", + "risk_class\n", + "LOW 56003\n", + "MEDIUM 21115\n", + "HIGH 4946\n", + "Name: count, dtype: int64\n", + "\n", + "As percentages:\n", + "risk_class\n", + "LOW 68.2\n", + "MEDIUM 25.7\n", + "HIGH 6.0\n", + "Name: proportion, dtype: float64\n", + "\n", + "Tree age stats:\n", + "count 82064.000000\n", + "mean 47.688816\n", + "std 54.307899\n", + "min 1.000000\n", + "25% 8.000000\n", + "50% 14.000000\n", + "75% 126.000000\n", + "max 126.000000\n", + "Name: tree_age, dtype: float64\n" + ] + } + ], + "source": [ + "# Step 4: Create risk target variable from useful life expectancy\n", + "trees['risk_class'] = trees['useful_life_expectency_value'].map({\n", + " 10: 'HIGH',\n", + " 20: 'HIGH',\n", + " 30: 'MEDIUM',\n", + " 40: 'LOW',\n", + " 50: 'LOW'\n", + "})\n", + "\n", + "print(\"Risk class distribution:\")\n", + "print(trees['risk_class'].value_counts())\n", + "print(f\"\\nAs percentages:\")\n", + "print(trees['risk_class'].value_counts(normalize=True).round(3) * 100)\n", + "\n", + "# Step 5: Calculate tree age\n", + "trees['year_planted'] = pd.to_numeric(trees['year_planted'], errors='coerce')\n", + "trees['tree_age'] = 2026 - trees['year_planted']\n", + "\n", + "print(f\"\\nTree age stats:\")\n", + "print(trees['tree_age'].describe())" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "f118721a", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Cleaned trees saved: (82064, 18)\n", + "Columns: ['com_id', 'common_name', 'scientific_name', 'genus', 'family', 'diameter_breast_height', 'year_planted', 'date_planted', 'age_description', 'useful_life_expectency', 'useful_life_expectency_value', 'precinct', 'located_in', 'latitude', 'longitude', 'geometry', 'risk_class', 'tree_age']\n" + ] + } + ], + "source": [ + "# Step 6: Save cleaned trees\n", + "trees.to_file(\"../data/processed/trees_cleaned.geojson\", driver=\"GeoJSON\")\n", + "print(f\"Cleaned trees saved: {trees.shape}\")\n", + "print(f\"Columns: {trees.columns.tolist()}\")" + ] + }, + { + "cell_type": "markdown", + "id": "45fdfd52", + "metadata": {}, + "source": [ + "Microclimate Sensor data cleaning" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "0866d247", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Shape after cleaning: (580320, 19)\n", + "\n", + "Sample of parsed data:\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
TimedatelatlonAirTemperatureRelativeHumidity
02025-11-16 02:17:20+00:002025-11-16-37.822331144.95217015.671.6
12025-11-15 16:17:34+00:002025-11-15-37.819499144.97872115.278.9
22025-11-16 02:14:29+00:002025-11-16-37.814035144.96728015.779.0
32025-08-17 15:10:39+00:002025-08-17-37.814035144.9672809.171.8
42025-11-15 16:23:06+00:002025-11-15-37.818593144.97164014.689.0
\n", + "
" + ], + "text/plain": [ + " Time date lat lon \\\n", + "0 2025-11-16 02:17:20+00:00 2025-11-16 -37.822331 144.952170 \n", + "1 2025-11-15 16:17:34+00:00 2025-11-15 -37.819499 144.978721 \n", + "2 2025-11-16 02:14:29+00:00 2025-11-16 -37.814035 144.967280 \n", + "3 2025-08-17 15:10:39+00:00 2025-08-17 -37.814035 144.967280 \n", + "4 2025-11-15 16:23:06+00:00 2025-11-15 -37.818593 144.971640 \n", + "\n", + " AirTemperature RelativeHumidity \n", + "0 15.6 71.6 \n", + "1 15.2 78.9 \n", + "2 15.7 79.0 \n", + "3 9.1 71.8 \n", + "4 14.6 89.0 " + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Load and clean microclimate sensors\n", + "sensors = pd.read_csv(\"../data/raw/microclimate-sensors-data.csv\")\n", + "\n", + "# Parse timestamp with UTC to handle mixed timezones\n", + "sensors['Time'] = pd.to_datetime(sensors['Time'], utc=True)\n", + "\n", + "# Split LatLong into separate columns\n", + "sensors[['lat', 'lon']] = sensors['LatLong'].str.split(',', expand=True).astype(float)\n", + "\n", + "# Drop rows with no coordinates\n", + "sensors = sensors.dropna(subset=['lat', 'lon'])\n", + "\n", + "# Extract date for daily aggregation\n", + "sensors['date'] = sensors['Time'].dt.date\n", + "\n", + "print(f\"Shape after cleaning: {sensors.shape}\")\n", + "print(f\"\\nSample of parsed data:\")\n", + "sensors[['Time', 'date', 'lat', 'lon', 'AirTemperature', 'RelativeHumidity']].head()" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "436df871", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Daily sensor data shape: (6443, 6)\n", + "\n", + "Sample:\n", + "\n", + "Saved to data/processed/sensors_daily.csv\n" + ] + } + ], + "source": [ + "# Aggregate to daily averages per sensor\n", + "sensor_daily = sensors.groupby(['SensorLocation', 'lat', 'lon', 'date']).agg({\n", + " 'AirTemperature': 'mean',\n", + " 'RelativeHumidity': 'mean'\n", + "}).reset_index()\n", + "\n", + "sensor_daily.columns = ['sensor_location', 'lat', 'lon', 'date', 'avg_temp', 'avg_humidity']\n", + "\n", + "print(f\"Daily sensor data shape: {sensor_daily.shape}\")\n", + "print(f\"\\nSample:\")\n", + "sensor_daily.head()\n", + "\n", + "# Save\n", + "sensor_daily.to_csv(\"../data/processed/sensors_daily.csv\", index=False)\n", + "print(f\"\\nSaved to data/processed/sensors_daily.csv\")" + ] + }, + { + "cell_type": "markdown", + "id": "3c88befe", + "metadata": {}, + "source": [ + "BOM weather data cleaning" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "382df919", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Weather shape: (4679, 3)\n", + "\n", + "Missing values:\n", + "date 0\n", + "max_temp 0\n", + "rainfall_mm 0\n", + "dtype: int64\n", + "\n", + "Date range: 2013-06-01 00:00:00 to 2026-03-24 00:00:00\n", + "\n", + "Sample:\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
datemax_temprainfall_mm
1512013-06-0115.80.0
1522013-06-0215.75.0
1532013-06-0314.80.2
1542013-06-0415.00.2
1552013-06-0514.60.0
\n", + "
" + ], + "text/plain": [ + " date max_temp rainfall_mm\n", + "151 2013-06-01 15.8 0.0\n", + "152 2013-06-02 15.7 5.0\n", + "153 2013-06-03 14.8 0.2\n", + "154 2013-06-04 15.0 0.2\n", + "155 2013-06-05 14.6 0.0" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Load BOM data\n", + "bom_temp = pd.read_csv(\"../data/raw/IDCJAC0010_086338_1800_Data.csv\")\n", + "bom_rain = pd.read_csv(\"../data/raw/IDCJAC0009_086338_1800_Data.csv\")\n", + "\n", + "# Create date column for both\n", + "bom_temp['date'] = pd.to_datetime(bom_temp[['Year', 'Month', 'Day']])\n", + "bom_rain['date'] = pd.to_datetime(bom_rain[['Year', 'Month', 'Day']])\n", + "\n", + "# Keep only whats needed\n", + "bom_temp = bom_temp[['date', 'Maximum temperature (Degree C)']].rename(\n", + " columns={'Maximum temperature (Degree C)': 'max_temp'}\n", + ")\n", + "bom_rain = bom_rain[['date', 'Rainfall amount (millimetres)']].rename(\n", + " columns={'Rainfall amount (millimetres)': 'rainfall_mm'}\n", + ")\n", + "\n", + "# Merge on date\n", + "weather = bom_temp.merge(bom_rain, on='date', how='outer')\n", + "\n", + "# Drop rows where both are missing\n", + "weather = weather.dropna(subset=['max_temp', 'rainfall_mm'], how='all')\n", + "\n", + "# Fill missing rainfall with 0 (no recorded rain = no rain)\n", + "weather['rainfall_mm'] = weather['rainfall_mm'].fillna(0)\n", + "\n", + "# Interpolate short temp gaps\n", + "weather = weather.sort_values('date')\n", + "weather['max_temp'] = weather['max_temp'].interpolate(method='linear', limit=3)\n", + "\n", + "print(f\"Weather shape: {weather.shape}\")\n", + "print(f\"\\nMissing values:\\n{weather.isnull().sum()}\")\n", + "print(f\"\\nDate range: {weather['date'].min()} to {weather['date'].max()}\")\n", + "print(f\"\\nSample:\")\n", + "weather.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "c1d638a2", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Weather saved.\n" + ] + } + ], + "source": [ + "# Save weather\n", + "weather.to_csv(\"../data/processed/weather_cleaned.csv\", index=False)\n", + "print(\"Weather saved.\")" + ] + }, + { + "cell_type": "markdown", + "id": "947aba18", + "metadata": {}, + "source": [ + "Soil Sensor Data Cleaning " + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "2fc3d765", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Daily soil moisture shape: (33115, 5)\n", + "\n", + "Missing values:\n", + "site_id 0\n", + "date 0\n", + "avg_soil_moisture 0\n", + "Latitude 0\n", + "Longitude 0\n", + "dtype: int64\n", + "\n", + "Sample:\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
site_iddateavg_soil_moistureLatitudeLongitude
0649702023-09-0233.529375-37.7864144.96259
1649702023-09-0333.350313-37.7864144.96259
2649702023-09-0433.352187-37.7864144.96259
3649702023-09-0533.262500-37.7864144.96259
4649702023-09-0633.066875-37.7864144.96259
\n", + "
" + ], + "text/plain": [ + " site_id date avg_soil_moisture Latitude Longitude\n", + "0 64970 2023-09-02 33.529375 -37.7864 144.96259\n", + "1 64970 2023-09-03 33.350313 -37.7864 144.96259\n", + "2 64970 2023-09-04 33.352187 -37.7864 144.96259\n", + "3 64970 2023-09-05 33.262500 -37.7864 144.96259\n", + "4 64970 2023-09-06 33.066875 -37.7864 144.96259" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Clean soil sensor data — load only moisture \n", + "soil = pd.read_csv(\"../data/raw/soil-sensor-readings-historical-data.csv\")\n", + "soil_locations = pd.read_csv(\"../data/raw/soil-sensor-locations.csv\")\n", + "\n", + "# Filter to just soil moisture readings\n", + "soil_moisture = soil[soil['Unit'] == '%VWC'].copy()\n", + "\n", + "# Parse timestamp\n", + "soil_moisture['Local_Time'] = pd.to_datetime(soil_moisture['Local_Time'], utc=True)\n", + "soil_moisture['date'] = soil_moisture['Local_Time'].dt.date\n", + "\n", + "# Aggregate to daily average moisture per site\n", + "soil_daily = soil_moisture.groupby(['Site_ID', 'date']).agg({\n", + " 'Soil_Value': 'mean'\n", + "}).reset_index()\n", + "soil_daily.columns = ['site_id', 'date', 'avg_soil_moisture']\n", + "\n", + "# Join to locations for coordinates\n", + "soil_daily = soil_daily.merge(\n", + " soil_locations[['Site_ID', 'Latitude', 'Longitude']],\n", + " left_on='site_id',\n", + " right_on='Site_ID',\n", + " how='left'\n", + ").drop(columns=['Site_ID'])\n", + "\n", + "print(f\"Daily soil moisture shape: {soil_daily.shape}\")\n", + "print(f\"\\nMissing values:\\n{soil_daily.isnull().sum()}\")\n", + "print(f\"\\nSample:\")\n", + "soil_daily.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "90ef6b67", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Soil data saved.\n" + ] + } + ], + "source": [ + "soil_daily.to_csv(\"../data/processed/soil_daily.csv\", index=False)\n", + "print(\"Soil data saved.\")" + ] + }, + { + "cell_type": "markdown", + "id": "1b1a3b93", + "metadata": {}, + "source": [ + "Coordinate Reference System (CRS)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "1685c8b4", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Trees CRS: EPSG:4326\n", + "Trees reprojected to: EPSG:7844\n", + "Sensors reprojected to: EPSG:7844\n", + "Soil reprojected to: EPSG:7844\n" + ] + } + ], + "source": [ + "# Check current CRS of trees\n", + "print(f\"Trees CRS: {trees.crs}\")\n", + "\n", + "# Set project CRS — GDA2020 (EPSG:7844) for Melbourne\n", + "trees = trees.to_crs(\"EPSG:7844\")\n", + "print(f\"Trees reprojected to: {trees.crs}\")\n", + "\n", + "# Convert sensor data to GeoDataFrame and set CRS\n", + "sensor_daily = pd.read_csv(\"../data/processed/sensors_daily.csv\")\n", + "sensor_gdf = gpd.GeoDataFrame(\n", + " sensor_daily,\n", + " geometry=gpd.points_from_xy(sensor_daily['lon'], sensor_daily['lat']),\n", + " crs=\"EPSG:4326\" # Raw GPS coordinates are WGS84\n", + ").to_crs(\"EPSG:7844\")\n", + "print(f\"Sensors reprojected to: {sensor_gdf.crs}\")\n", + "\n", + "# Convert soil data to GeoDataFrame and set CRS\n", + "soil_daily = pd.read_csv(\"../data/processed/soil_daily.csv\")\n", + "soil_gdf = gpd.GeoDataFrame(\n", + " soil_daily,\n", + " geometry=gpd.points_from_xy(soil_daily['Longitude'], soil_daily['Latitude']),\n", + " crs=\"EPSG:4326\"\n", + ").to_crs(\"EPSG:7844\")\n", + "print(f\"Soil reprojected to: {soil_gdf.crs}\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Urban Forest (venv)", + "language": "python", + "name": "urban-forest" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/Playground/AidanPage_T126/notebooks/03_spatial_joins.ipynb b/Playground/AidanPage_T126/notebooks/03_spatial_joins.ipynb new file mode 100644 index 0000000000..0455fed8e2 --- /dev/null +++ b/Playground/AidanPage_T126/notebooks/03_spatial_joins.ipynb @@ -0,0 +1,186 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "b01f3375", + "metadata": {}, + "source": [ + "Spatial Joins for cleaned data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5590fb7", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Trees: 82064\n", + "Unique microclimate sensors: 11\n", + "Unique soil sensors: 69\n" + ] + } + ], + "source": [ + "import pandas as pd\n", + "import geopandas as gpd\n", + "\n", + "# Load cleaned data\n", + "trees = gpd.read_file(\"../data/processed/trees_cleaned.geojson\")\n", + "sensor_daily = pd.read_csv(\"../data/processed/sensors_daily.csv\")\n", + "soil_daily = pd.read_csv(\"../data/processed/soil_daily.csv\")\n", + "weather = pd.read_csv(\"../data/processed/weather_cleaned.csv\")\n", + "\n", + "# Reproject trees to EPSG:7844\n", + "trees = trees.to_crs(\"EPSG:7844\")\n", + "\n", + "# Get unique sensor locations (one row per sensor not per day)\n", + "sensor_locations = sensor_daily.drop_duplicates(subset=['sensor_location'])[['sensor_location', 'lat', 'lon']]\n", + "sensor_locations_gdf = gpd.GeoDataFrame(\n", + " sensor_locations,\n", + " geometry=gpd.points_from_xy(sensor_locations['lon'], sensor_locations['lat']),\n", + " crs=\"EPSG:4326\"\n", + ").to_crs(\"EPSG:7844\")\n", + "\n", + "# Get unique soil sensor locations\n", + "soil_locations = soil_daily.drop_duplicates(subset=['site_id'])[['site_id', 'Latitude', 'Longitude']]\n", + "soil_locations_gdf = gpd.GeoDataFrame(\n", + " soil_locations,\n", + " geometry=gpd.points_from_xy(soil_locations['Longitude'], soil_locations['Latitude']),\n", + " crs=\"EPSG:4326\"\n", + ").to_crs(\"EPSG:7844\")\n", + "\n", + "print(f\"Trees: {trees.shape[0]}\")\n", + "print(f\"Unique microclimate sensors: {sensor_locations_gdf.shape[0]}\")\n", + "print(f\"Unique soil sensors: {soil_locations_gdf.shape[0]}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "b7378485", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/geopandas/array.py:403: UserWarning: Geometry is in a geographic CRS. Results from 'sjoin_nearest' are likely incorrect. Use 'GeoSeries.to_crs()' to re-project geometries to a projected CRS before this operation.\n", + "\n", + " warnings.warn(\n", + "/Users/aidanpage/Documents/GitHub/MOP-Code/Playground/AidanPage_T126/venv/lib/python3.9/site-packages/geopandas/array.py:403: UserWarning: Geometry is in a geographic CRS. Results from 'sjoin_nearest' are likely incorrect. Use 'GeoSeries.to_crs()' to re-project geometries to a projected CRS before this operation.\n", + "\n", + " warnings.warn(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Trees with nearest microclimate sensor: (82064, 21)\n", + "\n", + "Distance to nearest sensor (metres):\n", + "count 82064.000000\n", + "mean 0.014189\n", + "std 0.010299\n", + "min 0.000022\n", + "25% 0.006639\n", + "50% 0.011951\n", + "75% 0.017868\n", + "max 0.052376\n", + "Name: sensor_distance_m, dtype: float64\n", + "\n", + "Trees with both sensors: (82064, 23)\n", + "\n", + "Distance to nearest soil sensor (metres):\n", + "count 82064.000000\n", + "mean 0.006912\n", + "std 0.007596\n", + "min 0.000003\n", + "25% 0.001781\n", + "50% 0.003806\n", + "75% 0.008004\n", + "max 0.041724\n", + "Name: soil_sensor_distance_m, dtype: float64\n" + ] + } + ], + "source": [ + "# Join each tree to its nearest microclimate sensor\n", + "trees_with_sensors = gpd.sjoin_nearest(\n", + " trees,\n", + " sensor_locations_gdf[['sensor_location', 'geometry']],\n", + " how='left',\n", + " distance_col='sensor_distance_m'\n", + ")\n", + "\n", + "print(f\"Trees with nearest microclimate sensor: {trees_with_sensors.shape}\")\n", + "print(f\"\\nDistance to nearest sensor (metres):\")\n", + "print(trees_with_sensors['sensor_distance_m'].describe())\n", + "\n", + "# Join each tree to its nearest soil sensor\n", + "trees_with_sensors = gpd.sjoin_nearest(\n", + " trees_with_sensors.drop(columns=['index_right']),\n", + " soil_locations_gdf[['site_id', 'geometry']],\n", + " how='left',\n", + " distance_col='soil_sensor_distance_m'\n", + ")\n", + "\n", + "print(f\"\\nTrees with both sensors: {trees_with_sensors.shape}\")\n", + "print(f\"\\nDistance to nearest soil sensor (metres):\")\n", + "print(trees_with_sensors['soil_sensor_distance_m'].describe())" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "bb95212d", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Columns:\n", + "['com_id', 'common_name', 'scientific_name', 'genus', 'family', 'diameter_breast_height', 'year_planted', 'date_planted', 'age_description', 'useful_life_expectency', 'useful_life_expectency_value', 'precinct', 'located_in', 'latitude', 'longitude', 'risk_class', 'tree_age', 'geometry', 'sensor_location', 'sensor_distance_m', 'index_right', 'site_id', 'soil_sensor_distance_m']\n", + "\n", + "Saved: (82064, 23)\n" + ] + } + ], + "source": [ + "# Check columns \n", + "print(\"Columns:\")\n", + "print(trees_with_sensors.columns.tolist())\n", + "\n", + "# Save the joined tree data\n", + "trees_with_sensors.to_file(\"../data/processed/trees_with_sensors.geojson\", driver=\"GeoJSON\")\n", + "print(f\"\\nSaved: {trees_with_sensors.shape}\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Urban Forest (venv)", + "language": "python", + "name": "urban-forest" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/Playground/AidanPage_T126/notebooks/04_feature_engineering.ipynb b/Playground/AidanPage_T126/notebooks/04_feature_engineering.ipynb new file mode 100644 index 0000000000..1b0e6ccf57 --- /dev/null +++ b/Playground/AidanPage_T126/notebooks/04_feature_engineering.ipynb @@ -0,0 +1,350 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "763bbfb5", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Trees: (82064, 23)\n", + "Weather: (4679, 3)\n", + "Sensor daily: (6443, 6)\n", + "Soil daily: (33115, 5)\n" + ] + } + ], + "source": [ + "import pandas as pd\n", + "import geopandas as gpd\n", + "\n", + "# Load joined tree data\n", + "trees = gpd.read_file(\"../data/processed/trees_with_sensors.geojson\")\n", + "\n", + "# Load weather and sensor daily data\n", + "weather = pd.read_csv(\"../data/processed/weather_cleaned.csv\")\n", + "weather['date'] = pd.to_datetime(weather['date'])\n", + "\n", + "sensor_daily = pd.read_csv(\"../data/processed/sensors_daily.csv\")\n", + "soil_daily = pd.read_csv(\"../data/processed/soil_daily.csv\")\n", + "\n", + "print(f\"Trees: {trees.shape}\")\n", + "print(f\"Weather: {weather.shape}\")\n", + "print(f\"Sensor daily: {sensor_daily.shape}\")\n", + "print(f\"Soil daily: {soil_daily.shape}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "2ff007b7", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Weather features created:\n", + " date max_temp avg_temp_7d days_since_rain consec_hot_days \\\n", + "4669 2026-03-15 26.2 24.364286 1 0 \n", + "4670 2026-03-16 19.7 22.907143 2 0 \n", + "4671 2026-03-17 21.1 21.635714 0 0 \n", + "4672 2026-03-18 21.1 21.185714 0 0 \n", + "4673 2026-03-19 19.7 21.357143 0 0 \n", + "4674 2026-03-20 20.8 21.528571 1 0 \n", + "4675 2026-03-21 21.8 21.485714 2 0 \n", + "4676 2026-03-22 30.2 22.057143 3 0 \n", + "4677 2026-03-23 27.5 23.171429 4 0 \n", + "4678 2026-03-24 27.5 24.085714 5 0 \n", + "\n", + " heatwave_flag \n", + "4669 0 \n", + "4670 0 \n", + "4671 0 \n", + "4672 0 \n", + "4673 0 \n", + "4674 0 \n", + "4675 0 \n", + "4676 0 \n", + "4677 0 \n", + "4678 0 \n" + ] + } + ], + "source": [ + "# Weather feature engineering\n", + "weather = weather.sort_values('date')\n", + "\n", + "# Rolling temperature averages\n", + "weather['avg_temp_7d'] = weather['max_temp'].rolling(7).mean()\n", + "weather['avg_temp_14d'] = weather['max_temp'].rolling(14).mean()\n", + "weather['avg_temp_30d'] = weather['max_temp'].rolling(30).mean()\n", + "\n", + "# Days since last rainfall\n", + "weather['had_rain'] = (weather['rainfall_mm'] > 0).astype(int)\n", + "rain_groups = (weather['had_rain'] != weather['had_rain'].shift()).cumsum()\n", + "weather['days_since_rain'] = weather.groupby(rain_groups).cumcount()\n", + "weather.loc[weather['had_rain'] == 1, 'days_since_rain'] = 0\n", + "\n", + "# Heatwave features\n", + "weather['above_35'] = (weather['max_temp'] >= 35).astype(int)\n", + "weather['heat_degrees'] = (weather['max_temp'] - 35).clip(lower=0)\n", + "weather['heat_degree_days_14d'] = weather['heat_degrees'].rolling(14).sum()\n", + "\n", + "# Consecutive hot days\n", + "hot_groups = (weather['above_35'] != weather['above_35'].shift()).cumsum()\n", + "weather['consec_hot_days'] = weather.groupby(hot_groups).cumcount() + 1\n", + "weather.loc[weather['above_35'] == 0, 'consec_hot_days'] = 0\n", + "weather['heatwave_flag'] = (weather['consec_hot_days'] >= 3).astype(int)\n", + "\n", + "print(\"Weather features created:\")\n", + "print(weather[['date', 'max_temp', 'avg_temp_7d', 'days_since_rain', 'consec_hot_days', 'heatwave_flag']].tail(10))" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "166c8dae", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Trees after sensor merge: (82064, 25)\n", + "\n", + "Missing sensor values:\n", + "sensor_avg_temp 0\n", + "sensor_avg_humidity 0\n", + "dtype: int64\n" + ] + } + ], + "source": [ + "# Summarise sensor data per location\n", + "sensor_summary = sensor_daily.groupby('sensor_location').agg({\n", + " 'avg_temp': 'mean',\n", + " 'avg_humidity': 'mean'\n", + "}).reset_index()\n", + "\n", + "sensor_summary.columns = ['sensor_location', 'sensor_avg_temp', 'sensor_avg_humidity']\n", + "\n", + "# Merge to trees\n", + "trees = trees.merge(sensor_summary, on='sensor_location', how='left')\n", + "\n", + "print(f\"Trees after sensor merge: {trees.shape}\")\n", + "print(f\"\\nMissing sensor values:\")\n", + "print(trees[['sensor_avg_temp', 'sensor_avg_humidity']].isnull().sum())" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "cc18a967", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Trees after soil merge: (82064, 26)\n", + "\n", + "Missing soil values:\n", + "avg_soil_moisture 0\n", + "dtype: int64\n" + ] + } + ], + "source": [ + "#Summarise soil data per site\n", + "soil_summary = soil_daily.groupby('site_id').agg({\n", + " 'avg_soil_moisture': 'mean'\n", + "}).reset_index()\n", + "\n", + "soil_summary.columns = ['site_id', 'avg_soil_moisture']\n", + "\n", + "# Merge to trees\n", + "trees = trees.merge(soil_summary, on='site_id', how='left')\n", + "\n", + "print(f\"Trees after soil merge: {trees.shape}\")\n", + "print(f\"\\nMissing soil values:\")\n", + "print(trees[['avg_soil_moisture']].isnull().sum())" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "70ab116f", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Trees with all features: (82064, 33)\n", + "\n", + "Weather snapshot date: 2026-03-24 00:00:00\n", + "Max temp: 27.5°C\n", + "7-day avg: 24.1°C\n", + "Days since rain: 5\n", + "Heatwave: No\n" + ] + } + ], + "source": [ + "# Get most recent weather features\n", + "latest_weather = weather.dropna().tail(1).squeeze()\n", + "\n", + "# Add weather features to all trees (same weather applies city-wide)\n", + "trees['max_temp_latest'] = latest_weather['max_temp']\n", + "trees['avg_temp_7d'] = latest_weather['avg_temp_7d']\n", + "trees['avg_temp_14d'] = latest_weather['avg_temp_14d']\n", + "trees['avg_temp_30d'] = latest_weather['avg_temp_30d']\n", + "trees['days_since_rain'] = latest_weather['days_since_rain']\n", + "trees['heat_degree_days_14d'] = latest_weather['heat_degree_days_14d']\n", + "trees['heatwave_flag'] = latest_weather['heatwave_flag']\n", + "\n", + "print(f\"Trees with all features: {trees.shape}\")\n", + "print(f\"\\nWeather snapshot date: {latest_weather['date']}\")\n", + "print(f\"Max temp: {latest_weather['max_temp']}°C\")\n", + "print(f\"7-day avg: {latest_weather['avg_temp_7d']:.1f}°C\")\n", + "print(f\"Days since rain: {int(latest_weather['days_since_rain'])}\")\n", + "print(f\"Heatwave: {'Yes' if latest_weather['heatwave_flag'] else 'No'}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "db389631", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Feature table shape: (82064, 24)\n", + "\n", + "Missing values:\n", + "com_id 0\n", + "common_name 0\n", + "scientific_name 0\n", + "genus 0\n", + "family 0\n", + "diameter_breast_height 0\n", + "year_planted 0\n", + "tree_age 0\n", + "age_description 0\n", + "latitude 0\n", + "longitude 0\n", + "precinct 0\n", + "sensor_avg_temp 0\n", + "sensor_avg_humidity 0\n", + "avg_soil_moisture 0\n", + "max_temp_latest 0\n", + "avg_temp_7d 0\n", + "avg_temp_14d 0\n", + "avg_temp_30d 0\n", + "days_since_rain 0\n", + "heat_degree_days_14d 0\n", + "heatwave_flag 0\n", + "useful_life_expectency_value 0\n", + "risk_class 0\n", + "dtype: int64\n", + "\n", + "Target distribution:\n", + "risk_class\n", + "LOW 56003\n", + "MEDIUM 21115\n", + "HIGH 4946\n", + "Name: count, dtype: int64\n" + ] + } + ], + "source": [ + "#Create final feature table\n", + "feature_table = trees[[\n", + " # Tree ID\n", + " 'com_id',\n", + " \n", + " # Tree characteristics\n", + " 'common_name', 'scientific_name', 'genus', 'family',\n", + " 'diameter_breast_height', 'year_planted', 'tree_age',\n", + " 'age_description',\n", + " \n", + " # Location\n", + " 'latitude', 'longitude', 'precinct',\n", + " \n", + " # Microclimate sensor features\n", + " 'sensor_avg_temp', 'sensor_avg_humidity',\n", + " \n", + " # Soil features\n", + " 'avg_soil_moisture',\n", + " \n", + " # Weather features\n", + " 'max_temp_latest', 'avg_temp_7d', 'avg_temp_14d', 'avg_temp_30d',\n", + " 'days_since_rain', 'heat_degree_days_14d', 'heatwave_flag',\n", + " \n", + " # Target variable\n", + " 'useful_life_expectency_value', 'risk_class'\n", + "]].copy()\n", + "\n", + "print(f\"Feature table shape: {feature_table.shape}\")\n", + "print(f\"\\nMissing values:\\n{feature_table.isnull().sum()}\")\n", + "print(f\"\\nTarget distribution:\\n{feature_table['risk_class'].value_counts()}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "3a47f277", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Feature table saved!\n", + "\n", + "Final summary:\n", + " Rows: 82064\n", + " Features: 22 (excluding target columns)\n", + " Target classes: {'LOW': 56003, 'MEDIUM': 21115, 'HIGH': 4946}\n" + ] + } + ], + "source": [ + "#Save feature table\n", + "feature_table.to_csv(\"../data/processed/feature_table.csv\", index=False)\n", + "print(\"Feature table saved!\")\n", + "print(f\"\\nFinal summary:\")\n", + "print(f\" Rows: {feature_table.shape[0]}\")\n", + "print(f\" Features: {feature_table.shape[1] - 2} (excluding target columns)\")\n", + "print(f\" Target classes: {feature_table['risk_class'].value_counts().to_dict()}\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Urban Forest (venv)", + "language": "python", + "name": "urban-forest" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/Playground/AidanPage_T126/notebooks/05_ml_model.ipynb b/Playground/AidanPage_T126/notebooks/05_ml_model.ipynb new file mode 100644 index 0000000000..e593694fcc --- /dev/null +++ b/Playground/AidanPage_T126/notebooks/05_ml_model.ipynb @@ -0,0 +1,279 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 2, + "id": "0ff06e1f", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Shape: (82064, 24)\n", + "\n", + "Target distribution:\n", + "risk_class\n", + "LOW 56003\n", + "MEDIUM 21115\n", + "HIGH 4946\n", + "Name: count, dtype: int64\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
com_idcommon_namescientific_namegenusfamilydiameter_breast_heightyear_plantedtree_ageage_descriptionlatitude...avg_soil_moisturemax_temp_latestavg_temp_7davg_temp_14davg_temp_30ddays_since_rainheat_degree_days_14dheatwave_flaguseful_life_expectency_valuerisk_class
01070378Tulip TreeLiriodendron tulipiferaLiriodendronMagnoliaceae20.0200620Mature-37.832567...39.30500327.524.08571422.86071424.30550.0050LOW
11070382Tulip TreeLiriodendron tulipiferaLiriodendronMagnoliaceae21.0200620Mature-37.831669...39.30500327.524.08571422.86071424.30550.0050LOW
21796650Cook pineAraucaria columnarisAraucariaAraucariaceae50.020206Semi-mature-37.802222...29.02120627.524.08571422.86071424.30550.0030MEDIUM
31457913Yellow BoxEucalyptus melliodoraEucalyptusMyrtaceae25.0201016Mature-37.797537...17.67444227.524.08571422.86071424.30550.0050LOW
41457915Yellow BoxEucalyptus melliodoraEucalyptusMyrtaceae22.0201016Mature-37.797540...17.67444227.524.08571422.86071424.30550.0050LOW
\n", + "

5 rows × 24 columns

\n", + "
" + ], + "text/plain": [ + " com_id common_name scientific_name genus family \\\n", + "0 1070378 Tulip Tree Liriodendron tulipifera Liriodendron Magnoliaceae \n", + "1 1070382 Tulip Tree Liriodendron tulipifera Liriodendron Magnoliaceae \n", + "2 1796650 Cook pine Araucaria columnaris Araucaria Araucariaceae \n", + "3 1457913 Yellow Box Eucalyptus melliodora Eucalyptus Myrtaceae \n", + "4 1457915 Yellow Box Eucalyptus melliodora Eucalyptus Myrtaceae \n", + "\n", + " diameter_breast_height year_planted tree_age age_description latitude \\\n", + "0 20.0 2006 20 Mature -37.832567 \n", + "1 21.0 2006 20 Mature -37.831669 \n", + "2 50.0 2020 6 Semi-mature -37.802222 \n", + "3 25.0 2010 16 Mature -37.797537 \n", + "4 22.0 2010 16 Mature -37.797540 \n", + "\n", + " ... avg_soil_moisture max_temp_latest avg_temp_7d avg_temp_14d \\\n", + "0 ... 39.305003 27.5 24.085714 22.860714 \n", + "1 ... 39.305003 27.5 24.085714 22.860714 \n", + "2 ... 29.021206 27.5 24.085714 22.860714 \n", + "3 ... 17.674442 27.5 24.085714 22.860714 \n", + "4 ... 17.674442 27.5 24.085714 22.860714 \n", + "\n", + " avg_temp_30d days_since_rain heat_degree_days_14d heatwave_flag \\\n", + "0 24.305 5 0.0 0 \n", + "1 24.305 5 0.0 0 \n", + "2 24.305 5 0.0 0 \n", + "3 24.305 5 0.0 0 \n", + "4 24.305 5 0.0 0 \n", + "\n", + " useful_life_expectency_value risk_class \n", + "0 50 LOW \n", + "1 50 LOW \n", + "2 30 MEDIUM \n", + "3 50 LOW \n", + "4 50 LOW \n", + "\n", + "[5 rows x 24 columns]" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "from sklearn.model_selection import train_test_split, cross_val_score\n", + "from sklearn.preprocessing import LabelEncoder\n", + "from sklearn.metrics import classification_report, confusion_matrix\n", + "from sklearn.linear_model import LogisticRegression\n", + "from sklearn.tree import DecisionTreeClassifier\n", + "import warnings\n", + "warnings.filterwarnings('ignore')\n", + "\n", + "# Load feature table\n", + "df = pd.read_csv(\"../data/processed/feature_table.csv\")\n", + "\n", + "print(f\"Shape: {df.shape}\")\n", + "print(f\"\\nTarget distribution:\")\n", + "print(df['risk_class'].value_counts())\n", + "df.head()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Urban Forest (venv)", + "language": "python", + "name": "urban-forest" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/Playground/AidanPage_T126/requirements.txt b/Playground/AidanPage_T126/requirements.txt new file mode 100644 index 0000000000..507ba864bb --- /dev/null +++ b/Playground/AidanPage_T126/requirements.txt @@ -0,0 +1,120 @@ +anyio==4.12.1 +appnope==0.1.4 +argon2-cffi==25.1.0 +argon2-cffi-bindings==25.1.0 +arrow==1.4.0 +asttokens==3.0.1 +async-lru==2.0.5 +attrs==25.4.0 +babel==2.18.0 +beautifulsoup4==4.14.3 +bleach==6.2.0 +branca==0.8.2 +certifi==2026.2.25 +cffi==2.0.0 +charset-normalizer==3.4.6 +comm==0.2.3 +contourpy==1.3.0 +cycler==0.12.1 +debugpy==1.8.20 +decorator==5.2.1 +defusedxml==0.7.1 +exceptiongroup==1.3.1 +executing==2.2.1 +fastjsonschema==2.21.2 +folium==0.20.0 +fonttools==4.60.2 +fqdn==1.5.1 +geopandas==1.0.1 +h11==0.16.0 +httpcore==1.0.9 +httpx==0.28.1 +idna==3.11 +importlib_metadata==8.7.1 +importlib_resources==6.5.2 +ipykernel==6.31.0 +ipython==8.18.1 +ipywidgets==8.1.8 +isoduration==20.11.0 +jedi==0.19.2 +Jinja2==3.1.6 +json5==0.13.0 +jsonpointer==3.0.0 +jsonschema==4.25.1 +jsonschema-specifications==2025.9.1 +jupyter==1.1.1 +jupyter-console==6.6.3 +jupyter-events==0.12.0 +jupyter-lsp==2.3.0 +jupyter_client==8.6.3 +jupyter_core==5.8.1 +jupyter_server==2.17.0 +jupyter_server_terminals==0.5.4 +jupyterlab==4.5.6 +jupyterlab_pygments==0.3.0 +jupyterlab_server==2.28.0 +jupyterlab_widgets==3.0.16 +kiwisolver==1.4.7 +lark==1.3.1 +MarkupSafe==3.0.3 +matplotlib==3.9.4 +matplotlib-inline==0.2.1 +mistune==3.2.0 +nbclient==0.10.2 +nbconvert==7.17.0 +nbformat==5.10.4 +nest-asyncio==1.6.0 +notebook==7.5.5 +notebook_shim==0.2.4 +numpy==2.0.2 +overrides==7.7.0 +packaging==26.0 +pandas==2.3.3 +pandocfilters==1.5.1 +parso==0.8.6 +pexpect==4.9.0 +pillow==11.3.0 +platformdirs==4.4.0 +prometheus_client==0.24.1 +prompt_toolkit==3.0.52 +psutil==7.2.2 +ptyprocess==0.7.0 +pure_eval==0.2.3 +pycparser==2.23 +Pygments==2.19.2 +pyogrio==0.11.1 +pyparsing==3.3.2 +pyproj==3.6.1 +python-dateutil==2.9.0.post0 +python-json-logger==4.0.0 +pytz==2026.1.post1 +PyYAML==6.0.3 +pyzmq==27.1.0 +referencing==0.36.2 +requests==2.32.5 +rfc3339-validator==0.1.4 +rfc3986-validator==0.1.1 +rfc3987-syntax==1.1.0 +rpds-py==0.27.1 +seaborn==0.13.2 +Send2Trash==2.1.0 +shapely==2.0.7 +six==1.17.0 +soupsieve==2.8.3 +stack-data==0.6.3 +terminado==0.18.1 +tinycss2==1.4.0 +tomli==2.4.0 +tornado==6.5.5 +traitlets==5.14.3 +typing_extensions==4.15.0 +tzdata==2025.3 +uri-template==1.3.0 +urllib3==2.6.3 +wcwidth==0.6.0 +webcolors==24.11.1 +webencodings==0.5.1 +websocket-client==1.9.0 +widgetsnbextension==4.0.15 +xyzservices==2025.11.0 +zipp==3.23.0