forked from openclimatefix/pv-site-datamodel
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsqlmodels.py
369 lines (284 loc) · 12.1 KB
/
sqlmodels.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
"""SQLAlchemy definition of the pvsite database schema."""
from __future__ import annotations
# This means we can use Typing of objects that have jet to be defined
import enum
import uuid
from datetime import datetime
from typing import List
import sqlalchemy as sa
from sqlalchemy.dialects.postgresql import UUID
from sqlalchemy.orm import Mapped, declarative_base, relationship
from sqlalchemy.schema import UniqueConstraint
Base = declarative_base()
class CreatedMixin:
"""Mixin to add created datetime to model."""
created_utc = sa.Column(sa.DateTime, default=lambda: datetime.utcnow())
class UserSQL(Base, CreatedMixin):
"""Class representing the users table.
Each user row specifies a single user.
"""
__tablename__ = "users"
__tables_args__ = (UniqueConstraint("email", name="idx_email"),)
user_uuid = sa.Column(UUID(as_uuid=True), default=uuid.uuid4, primary_key=True)
email = sa.Column(sa.String(255), index=True, unique=True)
site_group_uuid = sa.Column(
UUID(as_uuid=True),
sa.ForeignKey("site_groups.site_group_uuid"),
nullable=False,
comment="The foreign key to the site_groups table",
)
# Relationships
site_group: Mapped["SiteGroupSQL"] = relationship("SiteGroupSQL", back_populates="users")
api_request = relationship("APIRequestSQL", back_populates="user")
class SiteGroupSQL(Base, CreatedMixin):
"""Class representing the site_groups table.
Each site_group row specifies a single group of sites.
"""
__tablename__ = "site_groups"
site_group_uuid = sa.Column(UUID(as_uuid=True), default=uuid.uuid4, primary_key=True)
site_group_name = sa.Column(sa.String(255), index=True, unique=True)
# Relationships
# N-N
sites: Mapped[List["SiteSQL"]] = relationship(
"SiteSQL", secondary="site_group_sites", back_populates="site_groups"
)
# 1-N, one site group can have many users
users: Mapped[List[UserSQL]] = relationship("UserSQL", back_populates="site_group")
class SiteGroupSiteSQL(Base, CreatedMixin):
"""Class representing the site_group_sites table.
Each site_group_site row specifies a single site in a site group.
"""
__tablename__ = "site_group_sites"
__table_args__ = (UniqueConstraint("site_group_uuid", "site_uuid", name="idx_site_group_site"),)
site_group_site_uuid = sa.Column(UUID(as_uuid=True), default=uuid.uuid4, primary_key=True)
site_group_uuid = sa.Column(
UUID(as_uuid=True),
sa.ForeignKey("site_groups.site_group_uuid"),
nullable=False,
comment="The foreign key to the site_groups table",
)
site_uuid = sa.Column(
UUID(as_uuid=True),
sa.ForeignKey("sites.site_uuid"),
nullable=False,
comment="The foreign key to the sites table",
)
class SiteAssetType(enum.Enum):
"""Enum type representing a site's asset type."""
pv = 1
wind = 2
class SiteSQL(Base, CreatedMixin):
"""Class representing the sites table.
Each site row specifies a single panel or cluster of panels
found on a residential house or commercial building. Their
data is provided by a client.
*Approximate size: *
4 clients * ~1000 sites each = ~4000 rows
"""
__tablename__ = "sites"
site_uuid = sa.Column(UUID(as_uuid=True), default=uuid.uuid4, primary_key=True)
client_site_id = sa.Column(
sa.Integer, index=True, comment="The ID of the site as given by the providing client"
)
client_site_name = sa.Column(
sa.String(255), index=True, comment="The ID of the site as given by the providing client"
)
country = sa.Column(
sa.String(255), server_default="uk", comment="The country in which the site is located"
)
region = sa.Column(
sa.String(255), comment="The region within the country in which the site is located"
)
dno = sa.Column(sa.String(255), comment="The Distribution Node Operator that owns the site")
gsp = sa.Column(sa.String(255), comment="The Grid Supply Point in which the site is located")
asset_type = sa.Column(
sa.Enum(SiteAssetType, name="site_asset_type"),
nullable=False,
server_default=SiteAssetType.pv.name,
)
# For metadata `NULL` means "we don't know".
orientation = sa.Column(
sa.Float, comment="The rotation of the panel in degrees. 180° points south"
)
tilt = sa.Column(
sa.Float, comment="The tile of the panel in degrees. 90° indicates the panel is vertical"
)
latitude = sa.Column(sa.Float)
longitude = sa.Column(sa.Float)
capacity_kw = sa.Column(
sa.Float, comment="The physical limit on the production capacity of the site"
)
inverter_capacity_kw = sa.Column(sa.Float, comment="The inverter capacity of the site")
module_capacity_kw = sa.Column(sa.Float, comment="The PV module nameplate capacity of the site")
ml_id = sa.Column(
sa.Integer,
autoincrement=True,
nullable=False,
unique=True,
comment="Auto-incrementing integer ID of the site for use in ML training",
)
forecasts: Mapped[List["ForecastSQL"]] = relationship("ForecastSQL", back_populates="site")
generation: Mapped[List["GenerationSQL"]] = relationship("GenerationSQL")
inverters: Mapped[List["InverterSQL"]] = relationship(
"InverterSQL", back_populates="site", cascade="all, delete-orphan"
)
site_groups: Mapped[List["SiteGroupSQL"]] = relationship(
"SiteGroupSQL", secondary="site_group_sites", back_populates="sites"
)
class GenerationSQL(Base, CreatedMixin):
"""Class representing the generation table.
Each generation row specifies a generated power output over a
given time range for a site.
*Approximate size: *
Generation populated every 5 minutes per site * 4000 sites = ~1,125,000 rows per day
"""
__tablename__ = "generation"
__table_args__ = (
UniqueConstraint("site_uuid", "start_utc", "end_utc", name="uniq_cons_site_start_end"),
)
generation_uuid = sa.Column(UUID(as_uuid=True), default=uuid.uuid4, primary_key=True)
site_uuid = sa.Column(
UUID(as_uuid=True),
sa.ForeignKey("sites.site_uuid"),
nullable=False,
index=True,
comment="The site for which this geenration yield belongs to",
)
generation_power_kw = sa.Column(
sa.Float,
nullable=False,
comment="The actual generated power in kW at this site for this datetime interval",
)
start_utc = sa.Column(
sa.DateTime,
nullable=False,
index=True,
comment="The start of the time interval over which this generated power value applies",
)
end_utc = sa.Column(
sa.DateTime,
nullable=False,
comment="The end of the time interval over which this generated power value applies",
)
site: Mapped["SiteSQL"] = relationship("SiteSQL", back_populates="generation")
class ForecastSQL(Base, CreatedMixin):
"""Class representing the forecasts table.
Each forecast row refers to a sequence of predicted solar generation values
over a set of target times for one site.
*Approximate size: *
One forecast per site every 5 minutes = ~1,125,000 rows per day
"""
__tablename__ = "forecasts"
forecast_uuid = sa.Column(UUID(as_uuid=True), default=uuid.uuid4, primary_key=True)
site_uuid = sa.Column(
UUID(as_uuid=True),
sa.ForeignKey("sites.site_uuid"),
nullable=False,
comment="The site for which the forecast sequence was generated",
)
# The timestamp at which we are making the forecast. This is often referred as "now" in the
# modelling code.
# Note that this could be very different from the `created_utc` time, for instance if we
# run the model for a given "now" timestamp in the past.
timestamp_utc = sa.Column(
sa.DateTime,
nullable=False,
comment="The creation time of the forecast sequence",
)
forecast_version = sa.Column(
sa.String(32),
nullable=False,
comment="The semantic version of the model used to generate the forecast",
)
# one (forecasts) to many (forecast_values)
forecast_values: Mapped["ForecastValueSQL"] = relationship("ForecastValueSQL")
site = relationship("SiteSQL", back_populates="forecasts")
__table_args__ = (
# With this index, we are assuming that it doesn't make sense to do a query solely on
# `timestamp_utc`: we always also filter by site_uuid.
sa.Index("ix_forecasts_site_uuid_timestamp_utc", "site_uuid", "timestamp_utc"),
)
class ForecastValueSQL(Base, CreatedMixin):
"""Class representing the forecast_values table.
Each forecast_value row is a prediction for the power output
of a site over a target datetime interval. Many predictions
are made for each site at each target interval.
*Approximate size: *
One forecast value every 5 minutes per site per forecast.
Each forecast's prediction sequence covers 24 hours of target
intervals = ~324,000,000 rows per day
"""
__tablename__ = "forecast_values"
forecast_value_uuid = sa.Column(UUID(as_uuid=True), default=uuid.uuid4, primary_key=True)
start_utc = sa.Column(
sa.DateTime,
nullable=False,
index=True,
comment="The start of the time interval over which this predicted power value applies",
)
end_utc = sa.Column(
sa.DateTime,
nullable=False,
comment="The end of the time interval over which this predicted power value applies",
)
forecast_power_kw = sa.Column(
sa.Float,
nullable=False,
comment="The predicted power generation of this site for the given time interval",
)
# This is the different between `start_utc` and the `forecast`'s `timestamp_utc`, in minutes.
# It's useful to have it in its own column to efficiently query forecasts for a given horizon.
# TODO Set to nullable=False
horizon_minutes = sa.Column(
sa.Integer,
nullable=True,
comment="The time difference between the creation time of the forecast value "
"and the start of the time interval it applies for",
)
forecast_uuid = sa.Column(
UUID(as_uuid=True),
sa.ForeignKey("forecasts.forecast_uuid"),
nullable=False,
comment="The forecast sequence this forcast value belongs to",
)
forecast: Mapped["ForecastSQL"] = relationship("ForecastSQL", back_populates="forecast_values")
__table_args__ = (
# Here we assume that we always filter on `horizon_minutes` *for given forecasts*.
sa.Index(
"ix_forecast_values_forecast_uuid_horizon_minutes", "forecast_uuid", "horizon_minutes"
),
)
class StatusSQL(Base, CreatedMixin):
"""Class representing the status table.
Each status row defines a message reporting on the status of the
services within the nowcasting domain
*Approximate size: *
~1 row per day
"""
__tablename__ = "status"
status_uuid = sa.Column(UUID(as_uuid=True), default=uuid.uuid4, primary_key=True)
status = sa.Column(sa.String(255))
message = sa.Column(sa.String(255))
class InverterSQL(Base, CreatedMixin):
"""Class representing the inverters table.
Each InverterSQL row represents an inverter tied to a SiteSQL row.
*Approximate size: *
4 clients * ~1000 sites each * ~1 inverter each = ~4000 rows
"""
__tablename__ = "inverters"
inverter_uuid = sa.Column(UUID(as_uuid=True), default=uuid.uuid4, primary_key=True)
site_uuid = sa.Column(
UUID(as_uuid=True),
sa.ForeignKey("sites.site_uuid"),
nullable=False,
index=True,
comment="The UUID for the site that has this inverter",
)
site: Mapped["SiteSQL"] = relationship("SiteSQL", back_populates="inverters")
class APIRequestSQL(Base, CreatedMixin):
"""Information about what API route was called."""
__tablename__ = "api_request"
uuid = sa.Column(UUID, primary_key=True, server_default=sa.func.gen_random_uuid())
url = sa.Column(sa.String)
user_uuid = sa.Column(UUID, sa.ForeignKey("users.user_uuid"), index=True)
user = relationship("UserSQL", back_populates="api_request")