Skip to content

Commit

Permalink
♻️🗃️ Is1004/modify resource tracker backend after container label cha…
Browse files Browse the repository at this point in the history
…nges (#4488)
  • Loading branch information
matusdrobuliak66 authored Jul 11, 2023
1 parent 241d018 commit a0dd8ad
Show file tree
Hide file tree
Showing 9 changed files with 261 additions and 166 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
"""refactoring of resource_tracker_container table
Revision ID: ef931143b7cd
Revises: a8762d5d43ae
Create Date: 2023-07-11 14:37:57.455348+00:00
"""
import sqlalchemy as sa
from alembic import op
from sqlalchemy.dialects import postgresql

# revision identifiers, used by Alembic.
revision = "ef931143b7cd"
down_revision = "a8762d5d43ae"
branch_labels = None
depends_on = None


def upgrade():
container_classification_enum = postgresql.ENUM(
"DYNAMIC_SIDECAR", "USER_SERVICE", name="containerclassification"
)
container_classification_enum.create(op.get_bind())

op.execute("DELETE FROM resource_tracker_container;")

# ### commands auto generated by Alembic - please adjust! ###
op.add_column(
"resource_tracker_container",
sa.Column("cpu_limit", sa.Numeric(precision=3, scale=2), nullable=False),
)
op.add_column(
"resource_tracker_container",
sa.Column("memory_limit", sa.BigInteger(), nullable=False),
)
op.add_column(
"resource_tracker_container",
sa.Column(
"classification",
sa.Enum("DYNAMIC_SIDECAR", "USER_SERVICE", name="containerclassification"),
nullable=True,
),
)
op.drop_column(
"resource_tracker_container", "service_settings_reservation_nano_cpus"
)
op.drop_column("resource_tracker_container", "service_settings_limit_nano_cpus")
op.drop_column("resource_tracker_container", "service_settings_limit_memory_bytes")
op.drop_column(
"resource_tracker_container", "service_settings_reservation_memory_bytes"
)
# ### end Alembic commands ###


def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.add_column(
"resource_tracker_container",
sa.Column(
"service_settings_reservation_memory_bytes",
sa.BIGINT(),
autoincrement=False,
nullable=True,
),
)
op.add_column(
"resource_tracker_container",
sa.Column(
"service_settings_limit_memory_bytes",
sa.BIGINT(),
autoincrement=False,
nullable=True,
),
)
op.add_column(
"resource_tracker_container",
sa.Column(
"service_settings_limit_nano_cpus",
sa.BIGINT(),
autoincrement=False,
nullable=True,
),
)
op.add_column(
"resource_tracker_container",
sa.Column(
"service_settings_reservation_nano_cpus",
sa.BIGINT(),
autoincrement=False,
nullable=True,
),
)
op.drop_column("resource_tracker_container", "classification")
op.drop_column("resource_tracker_container", "memory_limit")
op.drop_column("resource_tracker_container", "cpu_limit")
# ### end Alembic commands ###
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,20 @@
- Table where we store the resource usage of each container that
we scrape via resource-usage-tracker service
"""
import enum

import sqlalchemy as sa
from sqlalchemy.dialects.postgresql import JSONB

from ._common import column_modified_datetime
from .base import metadata


class ContainerClassification(str, enum.Enum):
DYNAMIC_SIDECAR = enum.auto()
USER_SERVICE = enum.auto()


resource_tracker_container = sa.Table(
"resource_tracker_container",
metadata,
Expand Down Expand Up @@ -39,18 +46,6 @@
doc="product_name label scraped via Prometheus (taken from container labels)",
index=True,
),
sa.Column(
"service_settings_reservation_nano_cpus",
sa.BigInteger,
nullable=True,
doc="CPU resource allocated to a container, ex.500000000 means that the container is allocated 0.5 CPU shares",
),
sa.Column(
"service_settings_reservation_memory_bytes",
sa.BigInteger,
nullable=True,
doc="memory limit in bytes scraped via Prometheus",
),
sa.Column(
"service_settings_reservation_additional_info",
JSONB,
Expand Down Expand Up @@ -90,18 +85,6 @@
nullable=True,
doc="instance label scraped via Prometheus (taken from container labels, ex.: gpu1)",
),
sa.Column(
"service_settings_limit_nano_cpus",
sa.BigInteger,
nullable=True,
doc="CPU resource limit allocated to a container, ex.500000000 means that the container has limit for 0.5 CPU shares",
),
sa.Column(
"service_settings_limit_memory_bytes",
sa.BigInteger,
nullable=True,
doc="memory limit in bytes scraped via Prometheus",
),
sa.Column(
"project_name",
sa.String,
Expand All @@ -126,6 +109,23 @@
nullable=False,
doc="Service Version (parsed from image label scraped via Prometheus)",
),
sa.Column(
"cpu_limit",
sa.Numeric(precision=3, scale=2),
nullable=False,
doc="CPU resource allocated to a container, ex.0.5 CPU shares",
),
sa.Column(
"memory_limit",
sa.BigInteger,
nullable=False,
doc="memory limit in bytes scraped via Prometheus",
),
sa.Column(
"classification",
sa.Enum(ContainerClassification),
doc="Our custom classification of the container type",
),
# ---------------------------
sa.PrimaryKeyConstraint("container_id", name="resource_tracker_container_pkey"),
)
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@
from models_library.projects_nodes_io import NodeID
from models_library.services import ServiceKey, ServiceVersion
from models_library.users import UserID
from pydantic import BaseModel, Field, PositiveInt
from pydantic import BaseModel, ByteSize, Field, PositiveInt
from simcore_postgres_database.models.resource_tracker import ContainerClassification

# Scraped from prometheus

Expand All @@ -26,17 +27,18 @@ class ContainerScrapedResourceUsageMetric(BaseModel):
None,
description="Instance label scraped via Prometheus (taken from container labels, ex.: gpu1)",
)
service_settings_reservation_nano_cpus: int | None = Field(
None,
description="CPU resource limit allocated to a container, ex.500000000 means that the container has limit for 0.5 CPU shares",
)
service_settings_reservation_memory_bytes: int | None
service_settings_reservation_additional_info: dict[str, Any] = Field(
{},
description="Storing additional information about the reservation settings, such as what type of graphic card is used.",
)
service_settings_limit_nano_cpus: int | None
service_settings_limit_memory_bytes: int | None
memory_limit: ByteSize = Field(
None,
description="Memory bytes limit set by the runtime, ex. 17179869184 means that the container has limit for 16GB of memory",
)
cpu_limit: float = Field(
None,
description="CPU limit set by the runtime, ex. 3.5 Shares of one CPU cores",
)
service_key: ServiceKey
service_version: ServiceVersion

Expand All @@ -50,8 +52,14 @@ class Config:
arbitrary_types_allowed = True


class ContainerScrapedResourceUsageCustom(BaseModel):
classification: ContainerClassification


class ContainerScrapedResourceUsage(
ContainerScrapedResourceUsageMetric, ContainerScrapedResourceUsageValues
ContainerScrapedResourceUsageMetric,
ContainerScrapedResourceUsageValues,
ContainerScrapedResourceUsageCustom,
):
...

Expand All @@ -60,8 +68,8 @@ class ContainerScrapedResourceUsage(


class ContainerGetDB(BaseModel):
service_settings_reservation_nano_cpus: int | None
service_settings_reservation_memory_bytes: int | None
cpu_limit: float
memory_limit: int
prometheus_created: datetime
prometheus_last_scraped: datetime
project_uuid: ProjectID
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,8 @@ async def upsert_resource_tracker_container_data(
project_uuid=f"{data.project_uuid}",
project_name=data.project_name,
product_name=data.product_name,
service_settings_reservation_nano_cpus=data.service_settings_reservation_nano_cpus,
service_settings_reservation_memory_bytes=data.service_settings_reservation_memory_bytes,
cpu_limit=data.cpu_limit,
memory_limit=data.memory_limit,
service_settings_reservation_additional_info=data.service_settings_reservation_additional_info,
container_cpu_usage_seconds_total=data.container_cpu_usage_seconds_total,
prometheus_created=data.prometheus_created.datetime,
Expand All @@ -49,10 +49,9 @@ async def upsert_resource_tracker_container_data(
node_uuid=f"{data.node_uuid}",
node_label=data.node_label,
instance=data.instance,
service_settings_limit_nano_cpus=data.service_settings_limit_nano_cpus,
service_settings_limit_memory_bytes=data.service_settings_limit_memory_bytes,
service_key=data.service_key,
service_version=data.service_version,
classification=data.classification,
)

on_update_stmt = insert_stmt.on_conflict_do_update(
Expand Down Expand Up @@ -84,8 +83,8 @@ async def list_containers_by_user_and_product(
async with self.db_engine.begin() as conn:
query = (
sa.select(
resource_tracker_container.c.service_settings_reservation_nano_cpus,
resource_tracker_container.c.service_settings_reservation_memory_bytes,
resource_tracker_container.c.cpu_limit,
resource_tracker_container.c.memory_limit,
resource_tracker_container.c.prometheus_created,
resource_tracker_container.c.prometheus_last_scraped,
resource_tracker_container.c.project_uuid,
Expand All @@ -106,7 +105,7 @@ async def list_containers_by_user_and_product(

result = await conn.execute(query)
containers_list = [
ContainerGetDB.construct(**row) # type: ignore[arg-type]
ContainerGetDB(**row) # type: ignore[arg-type]
for row in result.fetchall()
]

Expand Down
Loading

0 comments on commit a0dd8ad

Please sign in to comment.