diff --git a/.github/workflows/test-migrations.yml b/.github/workflows/test-migrations.yml new file mode 100644 index 000000000..d5b815619 --- /dev/null +++ b/.github/workflows/test-migrations.yml @@ -0,0 +1,52 @@ +# Strategy: +# - Load latest API image from ghcr +# - Startup API and postgres services +# - Rebuild API container with main branch code (entrypoint contains `alembic upgrade head`) +name: Test database migrations + +on: + push: + branches: ["main"] + pull_request: + branches: ["main"] + +jobs: + test: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Set up Python 3.12 + uses: actions/setup-python@v5 + with: + python-version: "3.12" + cache: "pip" + cache-dependency-path: pyproject.toml + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Run environment setup script + run: bash env.sh + + - name: Start Docker services + run: | + export TRACECAT__IMAGE_TAG=latest + docker compose -f docker-compose.yml up --build --no-deps -d api postgres_db caddy + + - name: Create Docker Compose override for local build + run: | + cat << EOF > docker-compose.override.yml + version: '3' + services: + api: + build: + context: . + dockerfile: Dockerfile + EOF + + - name: Rebuild API to test migrations + run: docker compose -f docker-compose.yml -f docker-compose.override.yml up -d api + + - name: Verify Tracecat API is running + run: curl -s http://localhost/api/health | jq -e '.status == "ok"' diff --git a/.github/workflows/test-python.yml b/.github/workflows/test-python.yml index e4f2487d3..bba7436d9 100644 --- a/.github/workflows/test-python.yml +++ b/.github/workflows/test-python.yml @@ -59,7 +59,7 @@ jobs: run: docker compose -f docker-compose.dev.yml up --build --no-deps -d api worker postgres_db caddy - name: Verify Tracecat API is running - run: curl -s http://localhost:8000/health | jq -e '.status == "ok"' + run: curl -s http://localhost/api/health | jq -e '.status == "ok"' - name: pip install Tracecat run: | diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 3c8e7688d..aeffb4470 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -19,7 +19,9 @@ repos: - id: ruff args: - --fix + exclude: ^alembic/versions/ - id: ruff-format + exclude: ^alembic/versions/ - repo: https://github.com/gitleaks/gitleaks rev: v8.18.2 # Specify the desired version of Gitleaks hooks: diff --git a/Dockerfile b/Dockerfile index 38fa36bb7..2ad1e5edf 100644 --- a/Dockerfile +++ b/Dockerfile @@ -30,17 +30,25 @@ RUN groupadd -g 1001 apiuser && \ # Set the working directory inside the container WORKDIR /app -# Change to the non-root user -USER apiuser - # Copy the application files into the container and set ownership COPY --chown=apiuser:apiuser ./tracecat /app/tracecat COPY --chown=apiuser:apiuser ./pyproject.toml /app/pyproject.toml COPY --chown=apiuser:apiuser ./README.md /app/README.md COPY --chown=apiuser:apiuser ./LICENSE /app/LICENSE +COPY --chown=apiuser:apiuser ./alembic.ini /app/alembic.ini +COPY --chown=apiuser:apiuser ./alembic /app/alembic + +# Copy the entrypoint script +COPY --chown=apiuser:apiuser scripts/entrypoint.sh /app/entrypoint.sh +RUN chmod +x /app/entrypoint.sh + +# Change to the non-root user +USER apiuser # Install package RUN pip install --upgrade pip && pip install . +ENTRYPOINT ["/app/entrypoint.sh"] + # Command to run the application CMD ["sh", "-c", "python3 -m uvicorn tracecat.api.app:app --host $HOST --port $PORT"] diff --git a/alembic.ini b/alembic.ini new file mode 100644 index 000000000..1f431ea68 --- /dev/null +++ b/alembic.ini @@ -0,0 +1,122 @@ +# A generic, single database configuration. + +[alembic] +# path to migration scripts +# Use forward slashes (/) also on windows to provide an os agnostic path +script_location = alembic + +# template used to generate migration file names; The default value is %%(rev)s_%%(slug)s +# Uncomment the line below if you want the files to be prepended with date and time +# see https://alembic.sqlalchemy.org/en/latest/tutorial.html#editing-the-ini-file +# for all available tokens +# file_template = %%(year)d_%%(month).2d_%%(day).2d_%%(hour).2d%%(minute).2d-%%(rev)s_%%(slug)s + +# sys.path path, will be prepended to sys.path if present. +# defaults to the current working directory. +prepend_sys_path = . + +# timezone to use when rendering the date within the migration file +# as well as the filename. +# If specified, requires the python>=3.9 or backports.zoneinfo library. +# Any required deps can installed by adding `alembic[tz]` to the pip requirements +# string value is passed to ZoneInfo() +# leave blank for localtime +# timezone = + +# max length of characters to apply to the "slug" field +# truncate_slug_length = 40 + +# set to 'true' to run the environment during +# the 'revision' command, regardless of autogenerate +# revision_environment = false + +# set to 'true' to allow .pyc and .pyo files without +# a source .py file to be detected as revisions in the +# versions/ directory +# sourceless = false + +# version location specification; This defaults +# to alembic/versions. When using multiple version +# directories, initial revisions must be specified with --version-path. +# The path separator used here should be the separator specified by "version_path_separator" below. +# version_locations = %(here)s/bar:%(here)s/bat:alembic/versions + +# version path separator; As mentioned above, this is the character used to split +# version_locations. The default within new alembic.ini files is "os", which uses os.pathsep. +# If this key is omitted entirely, it falls back to the legacy behavior of splitting on spaces and/or commas. +# Valid values for version_path_separator are: +# +# version_path_separator = : +# version_path_separator = ; +# version_path_separator = space +version_path_separator = os # Use os.pathsep. Default configuration used for new projects. + +# set to 'true' to search source files recursively +# in each "version_locations" directory +# new in Alembic version 1.10 +# recursive_version_locations = false + +# the output encoding used when revision files +# are written from script.py.mako +# output_encoding = utf-8 + +# NOTE: in the Alembic env.py file, we get the URI directly from env vars +# sqlalchemy.url = %(TRACECAT__DB_URI)s + + +[post_write_hooks] +# post_write_hooks defines scripts or Python functions that are run +# on newly generated revision scripts. See the documentation for further +# detail and examples + +# format using "black" - use the console_scripts runner, against the "black" entrypoint +# hooks = black +# black.type = console_scripts +# black.entrypoint = black +# black.options = -l 79 REVISION_SCRIPT_FILENAME + +# lint with attempts to fix using "ruff" - use the exec runner, execute a binary +# hooks = ruff +# ruff.type = exec +# ruff.executable = %(here)s/.venv/bin/ruff +# ruff.options = --fix REVISION_SCRIPT_FILENAME + +# Logging configuration +[loggers] +keys = root,sqlalchemy,alembic,alembic_utils + +[logger_alembic_utils] +level = INFO +handlers = +qualname = alembic_utils + +[handlers] +keys = console + +[formatters] +keys = generic + +[logger_root] +level = WARN +handlers = console +qualname = + +[logger_sqlalchemy] +level = WARN +handlers = +qualname = sqlalchemy.engine + +[logger_alembic] +level = INFO +handlers = +qualname = alembic + +[handler_console] +class = StreamHandler +args = (sys.stderr,) +level = NOTSET +formatter = generic + +[formatter_generic] +format = %(levelname)-5.5s [%(name)s] %(message)s +datefmt = %H:%M:%S diff --git a/alembic/README b/alembic/README new file mode 100644 index 000000000..2500aa1bc --- /dev/null +++ b/alembic/README @@ -0,0 +1 @@ +Generic single-database configuration. diff --git a/alembic/env.py b/alembic/env.py new file mode 100644 index 000000000..733adbf66 --- /dev/null +++ b/alembic/env.py @@ -0,0 +1,65 @@ +import os +from logging.config import fileConfig + +import alembic_postgresql_enum # noqa: F401 +from sqlalchemy import engine_from_config, pool +from sqlmodel import SQLModel + +from alembic import context +from tracecat.db import schemas # noqa: F401 + +# this is the Alembic Config object, which provides +# access to the values within the .ini file in use. +config = context.config + +# Interpret the config file for Python logging. +# This line sets up loggers basically. +if config.config_file_name is not None: + fileConfig(config.config_file_name) + +# add your model's MetaData object here +# for 'autogenerate' support +# from myapp import mymodel +# target_metadata = mymodel.Base.metadata +target_metadata = SQLModel.metadata + +# other values from the config, defined by the needs of env.py, +# can be acquired: +# my_important_option = config.get_main_option("my_important_option") +# ... etc. + + +def run_migrations_offline() -> None: + """Run migrations in 'offline' mode.""" + url = os.environ["TRACECAT__DB_URI"] + context.configure( + url=url, + target_metadata=target_metadata, + literal_binds=True, + dialect_opts={"paramstyle": "named"}, + ) + + with context.begin_transaction(): + context.run_migrations() + + +def run_migrations_online() -> None: + """Run migrations in 'online' mode.""" + connectable = engine_from_config( + configuration=config.get_section(config.config_ini_section, {}), + prefix="sqlalchemy.", + poolclass=pool.NullPool, + url=os.environ["TRACECAT__DB_URI"], + ) + + with connectable.connect() as connection: + context.configure(connection=connection, target_metadata=target_metadata) + + with context.begin_transaction(): + context.run_migrations() + + +if context.is_offline_mode(): + run_migrations_offline() +else: + run_migrations_online() diff --git a/alembic/script.py.mako b/alembic/script.py.mako new file mode 100644 index 000000000..fbc4b07dc --- /dev/null +++ b/alembic/script.py.mako @@ -0,0 +1,26 @@ +"""${message} + +Revision ID: ${up_revision} +Revises: ${down_revision | comma,n} +Create Date: ${create_date} + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa +${imports if imports else ""} + +# revision identifiers, used by Alembic. +revision: str = ${repr(up_revision)} +down_revision: Union[str, None] = ${repr(down_revision)} +branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)} +depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)} + + +def upgrade() -> None: + ${upgrades if upgrades else "pass"} + + +def downgrade() -> None: + ${downgrades if downgrades else "pass"} diff --git a/alembic/versions/ebf7613506e0_initial_migration.py b/alembic/versions/ebf7613506e0_initial_migration.py new file mode 100644 index 000000000..91b033385 --- /dev/null +++ b/alembic/versions/ebf7613506e0_initial_migration.py @@ -0,0 +1,30 @@ +"""Initial migration + +Revision ID: ebf7613506e0 +Revises: +Create Date: 2024-08-18 23:29:03.639693 + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision: str = 'ebf7613506e0' +down_revision: Union[str, None] = None +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + pass + # ### end Alembic commands ### + + +def downgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + pass + # ### end Alembic commands ### diff --git a/pyproject.toml b/pyproject.toml index 4234c21b5..85f8acfb1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,6 +26,10 @@ dependencies = [ "adbc-driver-snowflake==1.0.0", "adbc-driver-sqlite==1.0.0", "aioboto3==13.0.1", + "alembic_utils==0.8.4", + "alembic-postgresql-enum==1.3.0", + "alembic==1.13.2", + "asyncpg==0.29.0", "authlib>=1.3.1,<1.4.0", "boto3==1.34.70", "cloudpickle==3.0.0", @@ -33,8 +37,8 @@ dependencies = [ "croniter==2.0.5", "crowdstrike-falconpy==1.4.4", "cryptography==42.0.7", - "fastapi==0.111.0", "fastapi-users[sqlalchemy,oauth]==13.0.0", + "fastapi==0.111.0", "fsspec==2024.6.0", "greenlet==3.0.3", "httpx==0.27.0", @@ -46,11 +50,9 @@ dependencies = [ "orjson==3.10.3", "polars-lts-cpu==1.2.0", "psycopg[binary]==3.1.19", - "psycopg2-binary==2.9.9", - "asyncpg==0.29.0", "pyarrow==16.1.0", "pydantic==2.6.1", - "python-slugify", + "python-slugify==8.0.4", "shodan==1.31.0", "slack-sdk==3.28.0", "sqlmodel==0.0.18", @@ -117,7 +119,8 @@ log_level = "INFO" log_cli = true log_cli_level = "INFO" markers = [ - "webtest: marks test that require the web", + "webtest: marks tests that require the web", "slow: marks tests as slow", + "dbtest: marks tests that make database calls", "disable_fixture: marks tests that disable fixtures", ] diff --git a/scripts/entrypoint.sh b/scripts/entrypoint.sh new file mode 100644 index 000000000..d8874d33b --- /dev/null +++ b/scripts/entrypoint.sh @@ -0,0 +1,23 @@ +#!/usr/bin/env bash +set -euo pipefail + +# Function to run migrations +run_migrations() { + echo "Running database migrations..." + if ! alembic upgrade head; then + echo "Migration failed!" + return 1 + fi + echo "Migrations completed successfully." +} + +# Check if we need to run migrations (only for API) +if [[ "${RUN_MIGRATIONS:-false}" == "true" ]]; then + if ! run_migrations; then + echo "Exiting due to migration failure" + exit 1 + fi +fi + +# Execute the CMD +exec "$@"