Skip to content

Feat: 5-min data visualization UI #201

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Aug 1, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pems_data/src/pems_data/services/stations.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ def get_imputed_agg_5min(self, station_id: str) -> pd.DataFrame:
value (pandas.DataFrame): The station's data as a DataFrame.
"""

cache_opts = {"key": self._build_cache_key("imputed", "agg", "5m", "station", station_id), "ttl": 300} # 5 minutes
cache_opts = {"key": self._build_cache_key("imputed", "agg", "5m", "station", station_id), "ttl": 3600} # 1 hour
columns = [
"STATION_ID",
"LANE",
Expand Down
1 change: 1 addition & 0 deletions pems_streamlit/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ dependencies = [
# local package reference
# a wheel for this package is built during Docker build
"pems_data",
"plotly==6.2.0",
"streamlit==1.45.1",
]

Expand Down
103 changes: 103 additions & 0 deletions pems_streamlit/src/pems_streamlit/apps/districts/app_stations.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
import re

import pandas as pd
import streamlit as st

from pems_data import ServiceFactory

from pems_streamlit.components.map_station_summary import map_station_summary
from pems_streamlit.components.plot_5_min_traffic_data import plot_5_min_traffic_data

FACTORY = ServiceFactory()
STATIONS = FACTORY.stations_service()
S3 = FACTORY.s3_source


@st.cache_data(ttl=3600) # Cache for 1 hour
def load_station_metadata(district_number: str) -> pd.DataFrame:
"""Loads metadata for all stations in the selected District from S3."""
return STATIONS.get_district_metadata(district_number)


@st.cache_data(ttl=3600) # Cache for 1 hour
def get_available_days() -> set:
"""
Lists available days by inspecting S3 prefixes.
"""
# Find "day=", then capture one or more digits that immediately follow it
pattern = re.compile(r"day=(\d+)")

# add as int only the text captured by the first set of parentheses to the set
def match(m: re.Match):
return int(m.group(1))

return S3.get_prefixes(pattern, initial_prefix=STATIONS.imputation_detector_agg_5min, match_func=match)


@st.cache_data(ttl=3600) # Cache for 1 hour
def load_station_data(station_id: str) -> pd.DataFrame:
"""
Loads station data for a specific station.
"""
return STATIONS.get_imputed_agg_5min(station_id)


# --- STREAMLIT APP ---


def main():
query_params = st.query_params
district_number = query_params.get("district_number", "")

df_station_metadata = load_station_metadata(district_number)

map_placeholder = st.empty()

station = st.selectbox(
"Station",
df_station_metadata["STATION_ID"],
)

quantity = st.multiselect("Quantity", ["VOLUME_SUM", "OCCUPANCY_AVG", "SPEED_FIVE_MINS"])

num_lanes = int(df_station_metadata[df_station_metadata["STATION_ID"] == station]["PHYSICAL_LANES"].iloc[0])
lane = st.multiselect(
"Lane",
list(range(1, num_lanes + 1)),
)

with map_placeholder:
df_selected_station = df_station_metadata.query("STATION_ID == @station")
map_station_summary(df_selected_station)

days = st.multiselect("Days", get_available_days())

station_data_button = st.button("Load Station Data", type="primary")

error_placeholder = st.empty()
plot_placeholder = st.empty()

if station_data_button:
error_messages = []
if len(quantity) == 0 or len(quantity) > 2:
error_messages.append("- Please select one or two quantities to proceed.")
if not lane:
error_messages.append("- Please select at least one lane to proceed.")
if not days:
error_messages.append("- Please select at least one day to proceed.")
if error_messages:
full_error_message = "\n".join(error_messages)
error_placeholder.error(full_error_message)
else:
df_station_data = load_station_data(station)
filtered_df = df_station_data[
(df_station_data["SAMPLE_TIMESTAMP"].dt.day.isin(days)) & (df_station_data["LANE"].isin(lane))
]
filtered_df_sorted = filtered_df.sort_values(by="SAMPLE_TIMESTAMP")

fig = plot_5_min_traffic_data(filtered_df_sorted, quantity, lane)
plot_placeholder.plotly_chart(fig, use_container_width=True)


if __name__ == "__main__":
main()
67 changes: 0 additions & 67 deletions pems_streamlit/src/pems_streamlit/apps/stations/app_stations.py

This file was deleted.

Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import pandas as pd
import streamlit as st


def map_station_summary(df_station_metadata: pd.DataFrame):

map_col, info_col = st.columns([0.6, 0.4])

with map_col:
map_df = df_station_metadata.rename(columns={"LATITUDE": "latitude", "LONGITUDE": "longitude"})
map_df_cleaned = map_df.dropna(subset=["latitude", "longitude"])
st.map(map_df_cleaned[["latitude", "longitude"]], height=265)

with info_col:
with st.container(border=True):
st.markdown(f"**Station {df_station_metadata['STATION_ID'].item()} - {df_station_metadata['NAME'].item()}**")
st.markdown(
f"{df_station_metadata["FREEWAY"].item()} - {df_station_metadata["DIRECTION"].item()}, {df_station_metadata["CITY_NAME"].item()}"
)
st.markdown(f"**County** {df_station_metadata["COUNTY_NAME"].item()}")
st.markdown(f"**District** {df_station_metadata["DISTRICT"].item()}")
st.markdown(f"**Absolute Post Mile** {df_station_metadata["ABSOLUTE_POSTMILE"].item()}")
st.markdown(f"**Lanes** {df_station_metadata["PHYSICAL_LANES"].item()}")
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
import pandas as pd
import plotly.graph_objs as go

QUANTITY_CONFIG = {
"VOLUME_SUM": {"name": "Volume (veh/hr)"},
"OCCUPANCY_AVG": {"name": "Occupancy (%)"},
"SPEED_FIVE_MINS": {"name": "Speed (mph)"},
}


def plot_5_min_traffic_data(df_station_data: pd.DataFrame, quantities: list, lanes: list):
fig = go.Figure()

layout_updates = {
"xaxis": dict(title="Time of Day"),
"legend": dict(orientation="h", yanchor="top", y=-0.3, xanchor="center", x=0.5),
}

# One quantity selected
if len(quantities) == 1:
qty_key = quantities[0]
qty_name = QUANTITY_CONFIG[qty_key]["name"]

for lane in lanes:
df_lane = df_station_data[df_station_data["LANE"] == lane]
fig.add_trace(
go.Scatter(
x=df_lane["SAMPLE_TIMESTAMP"],
y=df_lane[qty_key],
mode="lines",
name=f"Lane {lane} {qty_name.split(' ')[0]}",
)
)

layout_updates["title"] = dict(text=f"<b>{qty_name}</b>", x=0.5, xanchor="center")
layout_updates["yaxis"] = dict(title=f"<b>{qty_name}</b>", side="left")

# Two quantities selected
elif len(quantities) == 2:
left_qty_key, right_qty_key = quantities[0], quantities[1]
left_qty_name = QUANTITY_CONFIG[left_qty_key]["name"]
right_qty_name = QUANTITY_CONFIG[right_qty_key]["name"]

for lane in lanes:
df_lane = df_station_data[df_station_data["LANE"] == lane]
fig.add_trace(
go.Scatter(
x=df_lane["SAMPLE_TIMESTAMP"],
y=df_lane[left_qty_key],
mode="lines",
name=f"Lane {lane} {left_qty_name.split(' ')[0]}",
)
)
fig.add_trace(
go.Scatter(
x=df_lane["SAMPLE_TIMESTAMP"],
y=df_lane[right_qty_key],
mode="lines",
name=f"Lane {lane} {right_qty_name.split(' ')[0]}",
yaxis="y2",
)
)

# Create layout for two axes
layout_updates["title"] = dict(text=f"<b>{left_qty_name} vs. {right_qty_name}</b>", x=0.5, xanchor="center")
layout_updates["yaxis"] = dict(title=f"<b>{left_qty_name}</b>", side="left")
layout_updates["yaxis2"] = dict(title=f"<b>{right_qty_name}</b>", side="right", overlaying="y")

fig.update_layout(**layout_updates)

return fig
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,11 @@
{% endblock headline %}

{% block districts-content %}
<div class="row">
<div class="col-lg-4 border">
<h2>Form</h2>
</div>
</div>
<div class="row" style="min-height: 450px;">
<div class="col-lg-12 border">
<div class="col-lg-12">
<iframe title="District {{ current_district.number }} visualization"
class="w-100 h-100"
src="{{ streamlit.url }}/stations--stations?embed=true&district_number={{ current_district.number }}">
src="{{ streamlit.url }}/districts--stations?embed=true&district_number={{ current_district.number }}">
</iframe>
</div>
</div>
Expand Down
12 changes: 2 additions & 10 deletions pems_web/src/pems_web/districts/templates/districts/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -14,17 +14,9 @@
</div>
<div class="col-lg-10 pb-lg-5">
{% block districts-content %}
<div class="row">
<div class="col-lg-4 border">
<h2>Form</h2>
</div>
<div class="col-lg-8 border">
<h2>Chart</h2>
</div>
</div>
<div class="row" style="min-height: 450px;">
<div class="col-lg-12 border">
<iframe title="District visualizations" class="w-100 h-100" src="{{ streamlit.url }}/stations--stations?embed=true">
<div class="col-lg-12">
<iframe title="District visualizations" class="w-100 h-100" src="{{ streamlit.url }}/districts--stations?embed=true">
</iframe>
</div>
</div>
Expand Down
2 changes: 1 addition & 1 deletion tests/pytest/pems_data/services/test_stations.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,6 @@ def test_get_imputed_agg_5min(self, service: StationsService, data_source: IData

cache_opts = data_source.read.call_args.kwargs["cache_opts"]
assert station_id in cache_opts["key"]
assert cache_opts["ttl"] == 300
assert cache_opts["ttl"] == 3600

pd.testing.assert_frame_equal(result, df)
Loading