Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add visulization script #339

Merged
merged 21 commits into from
Nov 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
43a38ca
add visulization script
peterdudfield Jun 28, 2024
ebc5059
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jun 28, 2024
8d922c5
upgrade to plot all nwp examples, and make table of times
peterdudfield Jun 28, 2024
2954be1
Merge commit 'ebc50594d00b2c623bb9121c350bdf367e84d88a' into issue/vi…
peterdudfield Jun 28, 2024
2a7dde4
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jun 28, 2024
e86104e
plot GSP
peterdudfield Jun 28, 2024
4983da9
Merge commit '2a7dde452d3aeb666eceb5e98e7520c8a2b2b414' into issue/vi…
peterdudfield Jun 28, 2024
5a72650
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jun 28, 2024
20e5df2
lint
peterdudfield Jun 28, 2024
cae02db
add satellite visualization
peterdudfield Jun 28, 2024
e708d59
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jun 28, 2024
caa8e12
upgrade to visualization
peterdudfield Sep 4, 2024
7a48796
Merge commit '9ec252eeee44937c12ab52699579bdcace76e72f' into issue/vi…
peterdudfield Sep 4, 2024
6bab08b
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Sep 4, 2024
62c56f0
move to visualise fold and refactor to british spelling
AUdaltsova Sep 26, 2024
6f720a1
add visualise_batch to init
AUdaltsova Sep 26, 2024
73d1561
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Sep 26, 2024
b7d2d64
linting
AUdaltsova Sep 26, 2024
b30d543
linting
AUdaltsova Sep 26, 2024
859546e
Merge remote-tracking branch 'origin/issue/visulaize' into issue/visu…
AUdaltsova Sep 26, 2024
1b336d4
Merge branch 'main' into issue/visulaize
peterdudfield Nov 29, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1 +1,3 @@
"""Tools for plotting and visualizing data."""

from .batch import visualise_batch
298 changes: 298 additions & 0 deletions ocf_datapipes/visualisation/batch.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,298 @@
""" The idea is visualise one of the batches

This is a bit of a work in progress, but the idea is to visualise the batch in a markdown file.
"""

import os

import pandas as pd
import plotly.graph_objects as go
import torch

from ocf_datapipes.batch import BatchKey, NumpyBatch, NWPBatchKey


def visualise_batch(batch: NumpyBatch, folder=".", output_file="report.md", limit_examples=None):
"""Visualize the batch in a markdown file"""

# create dir if it does not exist
for d in [folder, f"{folder}/gsp", f"{folder}/nwp", f"{folder}/satellite"]:
if not os.path.exists(d):
os.makedirs(d)

with open(f"{folder}/{output_file}", "a") as f:
# Wind
print("# Batch visualisation", file=f)

print("## Wind \n", file=f)
keys = [
BatchKey.wind,
BatchKey.wind_t0_idx,
BatchKey.wind_time_utc,
BatchKey.wind_id,
BatchKey.wind_observed_capacity_mwp,
BatchKey.wind_nominal_capacity_mwp,
BatchKey.wind_time_utc,
BatchKey.wind_latitude,
BatchKey.wind_longitude,
BatchKey.wind_solar_azimuth,
BatchKey.wind_solar_elevation,
]
for key in keys:
if key in batch.keys():
print("\n", file=f)
value = batch[key]
if isinstance(value, torch.Tensor):
print(f"{key} {value.shape=}", file=f)
print(f"Max {value.max()}", file=f)
print(f"Min {value.min()}", file=f)
elif isinstance(value, int):
print(f"{key} {value}", file=f)
else:
print(f"{key} {value}", file=f)

print("## GSP \n", file=f)
keys = [
BatchKey.gsp,
BatchKey.gsp_id,
BatchKey.gsp_time_utc,
BatchKey.gsp_time_utc_fourier,
BatchKey.gsp_x_osgb,
BatchKey.gsp_x_osgb_fourier,
BatchKey.gsp_y_osgb,
BatchKey.gsp_y_osgb_fourier,
BatchKey.gsp_t0_idx,
BatchKey.gsp_effective_capacity_mwp,
BatchKey.gsp_nominal_capacity_mwp,
BatchKey.gsp_solar_azimuth,
BatchKey.gsp_solar_elevation,
]
for key in keys:
if key in batch.keys():
print("\n", file=f)
print(f"### {key.name}", file=f)
value = batch[key]
if key.name == "gsp":
# plot gsp data
n_examples = value.shape[0]
if limit_examples is not None:
n_examples = min(n_examples, limit_examples)

for b in range(n_examples):
fig = go.Figure()
gsp_data = value[b, :, 0]
time = pd.to_datetime(batch[BatchKey.gsp_time_utc][b], unit="s")
fig.add_trace(go.Scatter(x=time, y=gsp_data, mode="lines", name="GSP"))
fig.update_layout(
title=f"GSP - example {b}", xaxis_title="Time", yaxis_title="Value"
)
# fig.show(renderer='browser')
name = f"gsp/gsp_{b}.png"
fig.write_image(f"{folder}/{name}")
print(f"![](./{name})", file=f)
print("\n", file=f)

elif isinstance(value, torch.Tensor):
print(f"shape {value.shape=}", file=f)
print(f"Max {value.max():.2f}", file=f)
print(f"Min {value.min():.2f}", file=f)
elif isinstance(value, int):
print(f"{value}", file=f)
else:
print(f"{value}", file=f)

# TODO plot solar azimuth and elevation

# NWP
print("## NWP \n", file=f)

keys = [
NWPBatchKey.nwp,
NWPBatchKey.nwp_target_time_utc,
NWPBatchKey.nwp_channel_names,
NWPBatchKey.nwp_step,
NWPBatchKey.nwp_t0_idx,
NWPBatchKey.nwp_init_time_utc,
]

nwp = batch[BatchKey.nwp]

nwp_providers = nwp.keys()
for provider in nwp_providers:
print("\n", file=f)
print(f"### Provider {provider}", file=f)
nwp_provider = nwp[provider]

# plot nwp main data
nwp_data = nwp_provider[NWPBatchKey.nwp]
# average of lat and lon
nwp_data = nwp_data.mean(dim=(3, 4))

n_examples = nwp_data.shape[0]
if limit_examples is not None:
n_examples = min(n_examples, limit_examples)

for b in range(n_examples):

fig = go.Figure()
for i in range(len(nwp_provider[NWPBatchKey.nwp_channel_names])):
channel = nwp_provider[NWPBatchKey.nwp_channel_names][i]
nwp_data_one_channel = nwp_data[b, :, i]
time = nwp_provider[NWPBatchKey.nwp_target_time_utc][b]
time = pd.to_datetime(time, unit="s")
fig.add_trace(
go.Scatter(x=time, y=nwp_data_one_channel, mode="lines", name=channel)
)

fig.update_layout(
title=f"{provider} NWP - example {b}", xaxis_title="Time", yaxis_title="Value"
)
# fig.show(renderer='browser')
name = f"nwp/{provider}_nwp_{b}.png"
fig.write_image(f"{folder}/{name}")
print(f"![](./{name})", file=f)
print("\n", file=f)

for key in keys:
print("\n", file=f)
print(f"#### {key.name}", file=f)
value = nwp_provider[key]

if "time" in key.name:

# make a table with example, shape, max, min
print("| Example | Shape | Max | Min |", file=f)
print("| --- | --- | --- | --- |", file=f)

for example_id in range(n_examples):
value_ts = pd.to_datetime(value[example_id], unit="s")
print(
f"| {example_id} | {len(value_ts)} "
f"| {value_ts.max()} | {value_ts.min()} |",
file=f,
)

elif "channel" in key.name:

# create a table with the channel names with max, min, mean and std
print("| Channel | Max | Min | Mean | Std |", file=f)
print("| --- | --- | --- | --- | --- |", file=f)
for i in range(len(value)):
channel = value[i]
data = nwp_data[:, :, i]
print(
f"| {channel} "
f"| {data.max().item():.2f} "
f"| {data.min().item():.2f} "
f"| {data.mean().item():.2f} "
f"| {data.std().item():.2f} |",
file=f,
)

print(f"Shape={value.shape}", file=f)

elif isinstance(value, torch.Tensor):
print(f"Shape {value.shape=}", file=f)
print(f"Max {value.max():.2f}", file=f)
print(f"Min {value.min():.2f}", file=f)
elif isinstance(value, int):
print(f"{value}", file=f)
else:
print(f"{value}", file=f)

# Satellite
print("## Satellite \n", file=f)
keys = [
BatchKey.satellite_actual,
BatchKey.satellite_t0_idx,
BatchKey.satellite_time_utc,
BatchKey.satellite_time_utc,
BatchKey.satellite_x_geostationary,
BatchKey.satellite_y_geostationary,
]

for key in keys:

print("\n", file=f)
print(f"#### {key.name}", file=f)
value = batch[key]

if "satellite_actual" in key.name:

print(value.shape, file=f)

# average of lat and lon
value = value.mean(dim=(3, 4))

n_examples = value.shape[0]
if limit_examples is not None:
n_examples = min(n_examples, limit_examples)

for b in range(n_examples):

fig = go.Figure()
for i in range(value.shape[2]):
satellite_data_one_channel = value[b, :, i]
time = batch[BatchKey.satellite_time_utc][b]
time = pd.to_datetime(time, unit="s")
fig.add_trace(
go.Scatter(x=time, y=satellite_data_one_channel, mode="lines")
)

fig.update_layout(
title=f"Satellite - example {b}", xaxis_title="Time", yaxis_title="Value"
)
# fig.show(renderer='browser')
name = f"satellite/satellite_{b}.png"
fig.write_image(f"{folder}/{name}")
print(f"![](./{name})", file=f)
print("\n", file=f)

elif "time" in key.name:

# make a table with example, shape, max, min
print("| Example | Shape | Max | Min |", file=f)
print("| --- | --- | --- | --- |", file=f)

for example_id in range(n_examples):
value_ts = pd.to_datetime(value[example_id], unit="s")
print(
f"| {example_id} | {len(value_ts)} "
f"| {value_ts.max()} | {value_ts.min()} |",
file=f,
)

elif "channel" in key.name:

# create a table with the channel names with max, min, mean and std
print("| Channel | Max | Min | Mean | Std |", file=f)
print("| --- | --- | --- | --- | --- |", file=f)
for i in range(len(value)):
channel = value[i]
data = nwp_data[:, :, i]
print(
f"| {channel} "
f"| {data.max().item():.2f} "
f"| {data.min().item():.2f} "
f"| {data.mean().item():.2f} "
f"| {data.std().item():.2f} |",
file=f,
)

print(f"Shape={value.shape}", file=f)

elif isinstance(value, torch.Tensor):
print(f"Shape {value.shape=}", file=f)
print(f"Max {value.max():.2f}", file=f)
print(f"Min {value.min():.2f}", file=f)
elif isinstance(value, int):
print(f"{value}", file=f)
else:
print(f"{value}", file=f)


# For example you can run it like this
# with open("batch.md", "w") as f:
# sys.stdout = f
# d = torch.load("000000.pt")
# visualise_batch(d)
1 change: 0 additions & 1 deletion ocf_datapipes/visualization/batch.py

This file was deleted.