Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

♻️🗃️ Is1004/modify resource tracker backend after container label changes #4488

Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
"""refactoring of resource_tracker_container table

Revision ID: ef931143b7cd
Revises: a8762d5d43ae
Create Date: 2023-07-11 14:37:57.455348+00:00

"""
import sqlalchemy as sa
from alembic import op
from sqlalchemy.dialects import postgresql

# revision identifiers, used by Alembic.
revision = "ef931143b7cd"
down_revision = "a8762d5d43ae"
branch_labels = None
depends_on = None


def upgrade():
container_classification_enum = postgresql.ENUM(
"DYNAMIC_SIDECAR", "USER_SERVICE", name="containerclassification"
)
container_classification_enum.create(op.get_bind())

op.execute("DELETE FROM resource_tracker_container;")

# ### commands auto generated by Alembic - please adjust! ###
op.add_column(
"resource_tracker_container",
sa.Column("cpu_limit", sa.Numeric(precision=3, scale=2), nullable=False),
)
op.add_column(
"resource_tracker_container",
sa.Column("memory_limit", sa.BigInteger(), nullable=False),
)
op.add_column(
"resource_tracker_container",
sa.Column(
"classification",
sa.Enum("DYNAMIC_SIDECAR", "USER_SERVICE", name="containerclassification"),
nullable=True,
),
)
op.drop_column(
"resource_tracker_container", "service_settings_reservation_nano_cpus"
)
op.drop_column("resource_tracker_container", "service_settings_limit_nano_cpus")
op.drop_column("resource_tracker_container", "service_settings_limit_memory_bytes")
op.drop_column(
"resource_tracker_container", "service_settings_reservation_memory_bytes"
)
# ### end Alembic commands ###


def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.add_column(
"resource_tracker_container",
sa.Column(
"service_settings_reservation_memory_bytes",
sa.BIGINT(),
autoincrement=False,
nullable=True,
),
)
op.add_column(
"resource_tracker_container",
sa.Column(
"service_settings_limit_memory_bytes",
sa.BIGINT(),
autoincrement=False,
nullable=True,
),
)
op.add_column(
"resource_tracker_container",
sa.Column(
"service_settings_limit_nano_cpus",
sa.BIGINT(),
autoincrement=False,
nullable=True,
),
)
op.add_column(
"resource_tracker_container",
sa.Column(
"service_settings_reservation_nano_cpus",
sa.BIGINT(),
autoincrement=False,
nullable=True,
),
)
op.drop_column("resource_tracker_container", "classification")
op.drop_column("resource_tracker_container", "memory_limit")
op.drop_column("resource_tracker_container", "cpu_limit")
# ### end Alembic commands ###
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,20 @@
- Table where we store the resource usage of each container that
we scrape via resource-usage-tracker service
"""
import enum

import sqlalchemy as sa
from sqlalchemy.dialects.postgresql import JSONB

from ._common import column_modified_datetime
from .base import metadata


class ContainerClassification(str, enum.Enum):
DYNAMIC_SIDECAR = enum.auto()
USER_SERVICE = enum.auto()


resource_tracker_container = sa.Table(
"resource_tracker_container",
metadata,
Expand Down Expand Up @@ -39,18 +46,6 @@
doc="product_name label scraped via Prometheus (taken from container labels)",
index=True,
),
sa.Column(
"service_settings_reservation_nano_cpus",
sa.BigInteger,
nullable=True,
doc="CPU resource allocated to a container, ex.500000000 means that the container is allocated 0.5 CPU shares",
),
sa.Column(
"service_settings_reservation_memory_bytes",
sa.BigInteger,
nullable=True,
doc="memory limit in bytes scraped via Prometheus",
),
sa.Column(
"service_settings_reservation_additional_info",
JSONB,
Expand Down Expand Up @@ -90,18 +85,6 @@
nullable=True,
doc="instance label scraped via Prometheus (taken from container labels, ex.: gpu1)",
),
sa.Column(
"service_settings_limit_nano_cpus",
sa.BigInteger,
nullable=True,
doc="CPU resource limit allocated to a container, ex.500000000 means that the container has limit for 0.5 CPU shares",
),
sa.Column(
"service_settings_limit_memory_bytes",
sa.BigInteger,
nullable=True,
doc="memory limit in bytes scraped via Prometheus",
),
sa.Column(
"project_name",
sa.String,
Expand All @@ -126,6 +109,23 @@
nullable=False,
doc="Service Version (parsed from image label scraped via Prometheus)",
),
sa.Column(
"cpu_limit",
sa.Numeric(precision=3, scale=2),
nullable=False,
doc="CPU resource allocated to a container, ex.0.5 CPU shares",
),
sa.Column(
"memory_limit",
sa.BigInteger,
nullable=False,
doc="memory limit in bytes scraped via Prometheus",
),
sa.Column(
"classification",
sa.Enum(ContainerClassification),
doc="Our custom classification of the container type",
),
# ---------------------------
sa.PrimaryKeyConstraint("container_id", name="resource_tracker_container_pkey"),
)
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@
from models_library.projects_nodes_io import NodeID
from models_library.services import ServiceKey, ServiceVersion
from models_library.users import UserID
from pydantic import BaseModel, Field, PositiveInt
from pydantic import BaseModel, ByteSize, Field, PositiveInt
from simcore_postgres_database.models.resource_tracker import ContainerClassification

# Scraped from prometheus

Expand All @@ -26,17 +27,18 @@ class ContainerScrapedResourceUsageMetric(BaseModel):
None,
description="Instance label scraped via Prometheus (taken from container labels, ex.: gpu1)",
)
service_settings_reservation_nano_cpus: int | None = Field(
None,
description="CPU resource limit allocated to a container, ex.500000000 means that the container has limit for 0.5 CPU shares",
)
service_settings_reservation_memory_bytes: int | None
service_settings_reservation_additional_info: dict[str, Any] = Field(
{},
description="Storing additional information about the reservation settings, such as what type of graphic card is used.",
)
service_settings_limit_nano_cpus: int | None
service_settings_limit_memory_bytes: int | None
memory_limit: ByteSize = Field(
None,
description="Memory bytes limit set by the runtime, ex. 17179869184 means that the container has limit for 16GB of memory",
)
cpu_limit: float = Field(
None,
description="CPU limit set by the runtime, ex. 3.5 Shares of one CPU cores",
)
service_key: ServiceKey
service_version: ServiceVersion

Expand All @@ -50,8 +52,14 @@ class Config:
arbitrary_types_allowed = True


class ContainerScrapedResourceUsageCustom(BaseModel):
classification: ContainerClassification


class ContainerScrapedResourceUsage(
ContainerScrapedResourceUsageMetric, ContainerScrapedResourceUsageValues
ContainerScrapedResourceUsageMetric,
ContainerScrapedResourceUsageValues,
ContainerScrapedResourceUsageCustom,
):
...

Expand All @@ -60,8 +68,8 @@ class ContainerScrapedResourceUsage(


class ContainerGetDB(BaseModel):
service_settings_reservation_nano_cpus: int | None
service_settings_reservation_memory_bytes: int | None
cpu_limit: float
memory_limit: int
prometheus_created: datetime
prometheus_last_scraped: datetime
project_uuid: ProjectID
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,8 @@ async def upsert_resource_tracker_container_data(
project_uuid=f"{data.project_uuid}",
project_name=data.project_name,
product_name=data.product_name,
service_settings_reservation_nano_cpus=data.service_settings_reservation_nano_cpus,
service_settings_reservation_memory_bytes=data.service_settings_reservation_memory_bytes,
cpu_limit=data.cpu_limit,
memory_limit=data.memory_limit,
service_settings_reservation_additional_info=data.service_settings_reservation_additional_info,
container_cpu_usage_seconds_total=data.container_cpu_usage_seconds_total,
prometheus_created=data.prometheus_created.datetime,
Expand All @@ -49,10 +49,9 @@ async def upsert_resource_tracker_container_data(
node_uuid=f"{data.node_uuid}",
node_label=data.node_label,
instance=data.instance,
service_settings_limit_nano_cpus=data.service_settings_limit_nano_cpus,
service_settings_limit_memory_bytes=data.service_settings_limit_memory_bytes,
service_key=data.service_key,
service_version=data.service_version,
classification=data.classification,
)

on_update_stmt = insert_stmt.on_conflict_do_update(
Expand Down Expand Up @@ -84,8 +83,8 @@ async def list_containers_by_user_and_product(
async with self.db_engine.begin() as conn:
query = (
sa.select(
resource_tracker_container.c.service_settings_reservation_nano_cpus,
resource_tracker_container.c.service_settings_reservation_memory_bytes,
resource_tracker_container.c.cpu_limit,
resource_tracker_container.c.memory_limit,
resource_tracker_container.c.prometheus_created,
resource_tracker_container.c.prometheus_last_scraped,
resource_tracker_container.c.project_uuid,
Expand All @@ -106,7 +105,7 @@ async def list_containers_by_user_and_product(

result = await conn.execute(query)
containers_list = [
ContainerGetDB.construct(**row) # type: ignore[arg-type]
ContainerGetDB(**row) # type: ignore[arg-type]
for row in result.fetchall()
]

Expand Down
Loading