Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 18 additions & 3 deletions .env.example
Original file line number Diff line number Diff line change
@@ -1,12 +1,27 @@
# Database
DATABASE_URL=postgres://postgres:postgres@localhost:5433/fipe
# Required. Generate a strong value, e.g. `openssl rand -base64 32`.
POSTGRES_PASSWORD=
# Optional overrides — defaults: user=postgres, db=fipe.
POSTGRES_USER=postgres
POSTGRES_DB=fipe
# Host port (postgres is bound to 127.0.0.1 only; not reachable from the network).
DB_PORT=5433
DATABASE_URL=postgres://postgres:${POSTGRES_PASSWORD}@localhost:5433/fipe

# Crawler settings
RATE_LIMIT_MS=800 # Min delay between requests (ms)
MAX_THROTTLE_MS=5000 # Max delay when rate limited (ms)
# Min delay between requests (ms)
RATE_LIMIT_MS=800
# Max delay when rate limited (ms)
MAX_THROTTLE_MS=5000
MAX_RETRIES=3

# AI Classification (optional)
# Required for automatic segment classification of vehicle models
# Get your key at: https://console.anthropic.com/
ANTHROPIC_API_KEY=

# Docker service settings
# Command the crawler container runs
CRAWLER_COMMAND=bun run crawl
# Docker restart policy: no | always | on-failure | unless-stopped
RESTART_POLICY=always
7 changes: 7 additions & 0 deletions Dockerfile.migrate
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
FROM oven/bun:alpine
WORKDIR /app
COPY package.json bun.lock ./
RUN bun install --frozen-lockfile
COPY src ./src
COPY tsconfig.json drizzle.config.ts ./
CMD ["tail", "-f", "/dev/null"]
61 changes: 50 additions & 11 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,21 @@

## Início Rápido

**Tudo via Docker** — sobe o banco, aplica o schema e inicia o crawler automaticamente:

```bash
cp .env.example .env
docker compose up --build
```

**Desenvolvimento local:**

```bash
cp .env.example .env
docker compose up -d # PostgreSQL
bun install # dependências
bun run db:push # schema
bun run crawl # crawl
docker compose up -d postgres # Só o banco
bun install # dependências
bun run db:push # schema
bun run crawl # crawl
```

## Comandos
Expand Down Expand Up @@ -67,22 +76,52 @@ bun run classify -- --dry-run # preview da classificação

## Docker

O `docker compose up` sobe três serviços em ordem:

| Serviço | Imagem | O que faz |
|---|---|---|
| `postgres` | `postgres:16-alpine` | Banco de dados |
| `migrate` | `Dockerfile.migrate` | Aplica o schema (`db:push`) e encerra |
| `crawler` | `Dockerfile` | Executa o crawler e reinicia sempre que termina |

O crawler só inicia após `migrate` concluir com sucesso.

**Customizar o comando do crawler:**

```bash
docker build -t fipe-crawler .
docker run -d --name fipe --env-file .env fipe-crawler
CRAWLER_COMMAND="bun run crawl -- --year 2024" docker compose up --build
```

**Controlar o comportamento de restart:**

```bash
RESTART_POLICY=no docker compose up --build # roda uma vez e para
```

docker exec fipe bun src/index.ts crawl --brand 25 --year 2024 --month 6
docker exec fipe bun src/index.ts status
**Executar comandos avulsos no container:**

```bash
docker exec fipe-crawler bun src/index.ts status
docker exec fipe-crawler bun src/index.ts crawl --brand 25 --year 2024 --month 6
```

## Configuração

```bash
# Banco
DB_PORT=5433
DATABASE_URL=postgres://postgres:postgres@localhost:5433/fipe
RATE_LIMIT_MS=800 # Delay mínimo entre requests (ms)
MAX_THROTTLE_MS=5000 # Delay máximo quando rate limited (ms)

# Crawler
RATE_LIMIT_MS=800 # Delay mínimo entre requests (ms)
MAX_THROTTLE_MS=5000 # Delay máximo quando rate limited (ms)
MAX_RETRIES=3
ANTHROPIC_API_KEY= # Para classificação de segmentos via AI (opcional)

ANTHROPIC_API_KEY= # Para classificação de segmentos via AI (opcional)

# Docker Compose
CRAWLER_COMMAND=bun run crawl
RESTART_POLICY=always
```

## Schema
Expand Down
40 changes: 35 additions & 5 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,48 @@ services:
image: postgres:16-alpine
container_name: fipe-postgres
environment:
POSTGRES_USER: postgres
POSTGRES_PASSWORD: postgres
POSTGRES_DB: fipe
POSTGRES_USER: ${POSTGRES_USER:-postgres}
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:?POSTGRES_PASSWORD must be set (see .env.example)}
POSTGRES_DB: ${POSTGRES_DB:-fipe}
ports:
- '5433:5432'
- '127.0.0.1:${DB_PORT:-5433}:5432'
volumes:
- postgres_data:/var/lib/postgresql/data
healthcheck:
test: ['CMD-SHELL', 'pg_isready -U postgres']
test: ['CMD-SHELL', 'pg_isready -U ${POSTGRES_USER:-postgres}']
interval: 5s
timeout: 5s
retries: 5

migrate:
build:
context: .
dockerfile: Dockerfile.migrate
container_name: fipe-migrate
command: bun run db:push
restart: 'no'
depends_on:
postgres:
condition: service_healthy
environment:
DATABASE_URL: postgres://${POSTGRES_USER:-postgres}:${POSTGRES_PASSWORD:-postgres}@postgres:5432/${POSTGRES_DB:-fipe}

crawler:
build: .
container_name: fipe-crawler
command: sh -c "${CRAWLER_COMMAND:-bun run crawl}"
restart: "${RESTART_POLICY:-always}"
depends_on:
postgres:
condition: service_healthy
migrate:
condition: service_completed_successfully
environment:
DATABASE_URL: postgres://${POSTGRES_USER:-postgres}:${POSTGRES_PASSWORD:-postgres}@postgres:5432/${POSTGRES_DB:-fipe}
RATE_LIMIT_MS: ${RATE_LIMIT_MS:-800}
MAX_THROTTLE_MS: ${MAX_THROTTLE_MS:-5000}
MAX_RETRIES: ${MAX_RETRIES:-3}
ANTHROPIC_API_KEY: ${ANTHROPIC_API_KEY:-}

volumes:
postgres_data:
4 changes: 3 additions & 1 deletion drizzle.config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ export default defineConfig({
out: './drizzle',
dialect: 'postgresql',
dbCredentials: {
url: process.env.DATABASE_URL || 'postgres://postgres:postgres@localhost:5432/fipe',
url:
process.env.DATABASE_URL ||
`postgres://postgres:postgres@localhost:${process.env.DB_PORT ?? 5433}/fipe`,
},
});
1 change: 1 addition & 0 deletions src/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import { z } from 'zod';

const envSchema = z.object({
DATABASE_URL: z.string().min(1),
DB_PORT: z.coerce.number().default(5433),
RATE_LIMIT_MS: z.coerce.number().default(800),
MAX_THROTTLE_MS: z.coerce.number().default(5000),
MAX_RETRIES: z.coerce.number().default(3),
Expand Down