diff --git a/.env.example b/.env.example index 7a18eaa..f634647 100644 --- a/.env.example +++ b/.env.example @@ -1,12 +1,27 @@ # Database -DATABASE_URL=postgres://postgres:postgres@localhost:5433/fipe +# Required. Generate a strong value, e.g. `openssl rand -base64 32`. +POSTGRES_PASSWORD= +# Optional overrides — defaults: user=postgres, db=fipe. +POSTGRES_USER=postgres +POSTGRES_DB=fipe +# Host port (postgres is bound to 127.0.0.1 only; not reachable from the network). +DB_PORT=5433 +DATABASE_URL=postgres://postgres:${POSTGRES_PASSWORD}@localhost:5433/fipe # Crawler settings -RATE_LIMIT_MS=800 # Min delay between requests (ms) -MAX_THROTTLE_MS=5000 # Max delay when rate limited (ms) +# Min delay between requests (ms) +RATE_LIMIT_MS=800 +# Max delay when rate limited (ms) +MAX_THROTTLE_MS=5000 MAX_RETRIES=3 # AI Classification (optional) # Required for automatic segment classification of vehicle models # Get your key at: https://console.anthropic.com/ ANTHROPIC_API_KEY= + +# Docker service settings +# Command the crawler container runs +CRAWLER_COMMAND=bun run crawl +# Docker restart policy: no | always | on-failure | unless-stopped +RESTART_POLICY=always diff --git a/Dockerfile.migrate b/Dockerfile.migrate new file mode 100644 index 0000000..54e202f --- /dev/null +++ b/Dockerfile.migrate @@ -0,0 +1,7 @@ +FROM oven/bun:alpine +WORKDIR /app +COPY package.json bun.lock ./ +RUN bun install --frozen-lockfile +COPY src ./src +COPY tsconfig.json drizzle.config.ts ./ +CMD ["tail", "-f", "/dev/null"] diff --git a/README.md b/README.md index 0d5eff9..3273f7e 100644 --- a/README.md +++ b/README.md @@ -23,12 +23,21 @@ ## Início Rápido +**Tudo via Docker** — sobe o banco, aplica o schema e inicia o crawler automaticamente: + +```bash +cp .env.example .env +docker compose up --build +``` + +**Desenvolvimento local:** + ```bash cp .env.example .env -docker compose up -d # PostgreSQL -bun install # dependências -bun run db:push # schema -bun run crawl # crawl +docker compose up -d postgres # Só o banco +bun install # dependências +bun run db:push # schema +bun run crawl # crawl ``` ## Comandos @@ -67,22 +76,52 @@ bun run classify -- --dry-run # preview da classificação ## Docker +O `docker compose up` sobe três serviços em ordem: + +| Serviço | Imagem | O que faz | +|---|---|---| +| `postgres` | `postgres:16-alpine` | Banco de dados | +| `migrate` | `Dockerfile.migrate` | Aplica o schema (`db:push`) e encerra | +| `crawler` | `Dockerfile` | Executa o crawler e reinicia sempre que termina | + +O crawler só inicia após `migrate` concluir com sucesso. + +**Customizar o comando do crawler:** + ```bash -docker build -t fipe-crawler . -docker run -d --name fipe --env-file .env fipe-crawler +CRAWLER_COMMAND="bun run crawl -- --year 2024" docker compose up --build +``` + +**Controlar o comportamento de restart:** + +```bash +RESTART_POLICY=no docker compose up --build # roda uma vez e para +``` -docker exec fipe bun src/index.ts crawl --brand 25 --year 2024 --month 6 -docker exec fipe bun src/index.ts status +**Executar comandos avulsos no container:** + +```bash +docker exec fipe-crawler bun src/index.ts status +docker exec fipe-crawler bun src/index.ts crawl --brand 25 --year 2024 --month 6 ``` ## Configuração ```bash +# Banco +DB_PORT=5433 DATABASE_URL=postgres://postgres:postgres@localhost:5433/fipe -RATE_LIMIT_MS=800 # Delay mínimo entre requests (ms) -MAX_THROTTLE_MS=5000 # Delay máximo quando rate limited (ms) + +# Crawler +RATE_LIMIT_MS=800 # Delay mínimo entre requests (ms) +MAX_THROTTLE_MS=5000 # Delay máximo quando rate limited (ms) MAX_RETRIES=3 -ANTHROPIC_API_KEY= # Para classificação de segmentos via AI (opcional) + +ANTHROPIC_API_KEY= # Para classificação de segmentos via AI (opcional) + +# Docker Compose +CRAWLER_COMMAND=bun run crawl +RESTART_POLICY=always ``` ## Schema diff --git a/docker-compose.yml b/docker-compose.yml index b7ec277..b4e89bc 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -3,18 +3,48 @@ services: image: postgres:16-alpine container_name: fipe-postgres environment: - POSTGRES_USER: postgres - POSTGRES_PASSWORD: postgres - POSTGRES_DB: fipe + POSTGRES_USER: ${POSTGRES_USER:-postgres} + POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:?POSTGRES_PASSWORD must be set (see .env.example)} + POSTGRES_DB: ${POSTGRES_DB:-fipe} ports: - - '5433:5432' + - '127.0.0.1:${DB_PORT:-5433}:5432' volumes: - postgres_data:/var/lib/postgresql/data healthcheck: - test: ['CMD-SHELL', 'pg_isready -U postgres'] + test: ['CMD-SHELL', 'pg_isready -U ${POSTGRES_USER:-postgres}'] interval: 5s timeout: 5s retries: 5 + migrate: + build: + context: . + dockerfile: Dockerfile.migrate + container_name: fipe-migrate + command: bun run db:push + restart: 'no' + depends_on: + postgres: + condition: service_healthy + environment: + DATABASE_URL: postgres://${POSTGRES_USER:-postgres}:${POSTGRES_PASSWORD:-postgres}@postgres:5432/${POSTGRES_DB:-fipe} + + crawler: + build: . + container_name: fipe-crawler + command: sh -c "${CRAWLER_COMMAND:-bun run crawl}" + restart: "${RESTART_POLICY:-always}" + depends_on: + postgres: + condition: service_healthy + migrate: + condition: service_completed_successfully + environment: + DATABASE_URL: postgres://${POSTGRES_USER:-postgres}:${POSTGRES_PASSWORD:-postgres}@postgres:5432/${POSTGRES_DB:-fipe} + RATE_LIMIT_MS: ${RATE_LIMIT_MS:-800} + MAX_THROTTLE_MS: ${MAX_THROTTLE_MS:-5000} + MAX_RETRIES: ${MAX_RETRIES:-3} + ANTHROPIC_API_KEY: ${ANTHROPIC_API_KEY:-} + volumes: postgres_data: diff --git a/drizzle.config.ts b/drizzle.config.ts index 6405413..180fc0d 100644 --- a/drizzle.config.ts +++ b/drizzle.config.ts @@ -5,6 +5,8 @@ export default defineConfig({ out: './drizzle', dialect: 'postgresql', dbCredentials: { - url: process.env.DATABASE_URL || 'postgres://postgres:postgres@localhost:5432/fipe', + url: + process.env.DATABASE_URL || + `postgres://postgres:postgres@localhost:${process.env.DB_PORT ?? 5433}/fipe`, }, }); diff --git a/src/config.ts b/src/config.ts index 275069c..f6abb99 100644 --- a/src/config.ts +++ b/src/config.ts @@ -2,6 +2,7 @@ import { z } from 'zod'; const envSchema = z.object({ DATABASE_URL: z.string().min(1), + DB_PORT: z.coerce.number().default(5433), RATE_LIMIT_MS: z.coerce.number().default(800), MAX_THROTTLE_MS: z.coerce.number().default(5000), MAX_RETRIES: z.coerce.number().default(3),