diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..afd1d22 --- /dev/null +++ b/.gitignore @@ -0,0 +1,42 @@ +# Arquivos temporários +*.pyc +__pycache__/ +*.py[cod] +*$py.class + +# Jupyter Notebook +.ipynb_checkpoints +*/.ipynb_checkpoints/* + +# Ambientes virtuais +.venv/ +venv/ +ENV/ +env/ + +# IDEs +.vscode/ +.idea/ +*.swp +*.swo + +# OS +.DS_Store +Thumbs.db +desktop.ini + +# Credenciais (NUNCA fazer commit) +*secret*.json +*credentials*.json +*cookie*.txt +*api*.txt +*.pem + +# Arquivos de processamento +VIRALS/ +*.mp4 +*.mp3 +*.wav + +# Scripts temporários +add_docs.py diff --git a/LICENSE b/LICENSE index f288702..8c5bfeb 100644 --- a/LICENSE +++ b/LICENSE @@ -671,4 +671,4 @@ into proprietary programs. If your program is a subroutine library, you may consider it more useful to permit linking proprietary applications with the library. If this is what you want to do, use the GNU Lesser General Public License instead of this License. But first, please read -. +.) diff --git a/README.md b/README.md index f2d23d2..e29e4f6 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,172 @@ +# 🎬 ViralCutter - Smooth Face Tracking Edition + +[![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/masterface77/ViralCutter/blob/smooth-zoom/ViralCutter-SmoothZoom.ipynb) +[![Open in Kaggle](https://kaggle.com/static/images/open-in-kaggle.svg)](https://www.kaggle.com/code/levireis77/viralcutter-cyclic-smooth-zoom-edition-kag) +[![Discord](https://dcbadge.limes.pink/api/server/tAdPHFAbud)](https://discord.gg/tAdPHFAbud) + +> **🎯 Branch `smooth-zoom`** - Versão com **YOLO Smooth Tracking** + face tracking configurável! + +Fork do [ViralCutter](https://github.com/RafaelGodoyEbert/ViralCutter) com **Smooth Face Tracking** - a câmera segue o rosto de forma suave e cinematográfica. + +--- + +## ✨ Novidades v0.9 - Smooth Face Tracking + +![Smooth Tracking](https://img.shields.io/badge/🎥-Smooth_Tracking-blueviolet?style=for-the-badge) + +### 🎥 Tracking Suave Configurável +A suavidade do tracking agora pode ser ajustada no Gradio! + +| Alpha | Efeito | +|-------|--------| +| 0.02 | Ultra Suave (câmera bem lenta) | +| 0.05 | Normal (padrão recomendado) | +| 0.10 | Rápido (acompanha mais de perto) | + +### ⚙️ Configuração via Interface +- **"Advanced Face Settings"** no Gradio +- **Slider "Tracking Smoothness"** para ajustar alpha +- Aplica-se apenas ao modo YOLO + +**Tecnologias:** +- 🔍 **YOLOv8** - Detecção e tracking de pessoas em tempo real +- 📊 **ByteTrack** - IDs persistentes para cada pessoa +- 📈 **EMA (Exponential Moving Average)** - Suavização configurável + +### ⚡ Otimização T4 (WhisperX) +Transcrição otimizada para GPUs T4 do Colab/Kaggle (16GB VRAM): + +| Configuração | Valor | Benefício | +|--------------|-------|-----------| +| `compute_type` | int8_float16 | 50% menos VRAM | +| `model` | large-v2 | Mais estável que v3 | +| `batch_size` | 8 | Evita OOM em vídeos longos | +| `language` | pt (padrão) | Pula detecção automática | +| `chunk_size` | 15 | Chunks maiores = mais eficiente | + +--- + +## 🚀 Notebooks Disponíveis + +### 🔵 Colab - Smooth Zoom +**Características:** +- ✅ YOLO Smooth Zoom **ATIVADO POR PADRÃO** +- ✅ Upload automático para Google Drive +- ✅ Otimizado para GPUs T4 +- ✅ Instalação rápida (3-5 min) + +**Como usar:** +1. Abra o notebook [ViralCutter-SmoothZoom.ipynb](https://colab.research.google.com/github/masterface77/ViralCutter/blob/smooth-zoom/ViralCutter-SmoothZoom.ipynb) +2. Execute a célula principal +3. Aguarde a instalação (inclui `ultralytics`) +4. Clique no link `gradio.live` gerado +5. Face Model já vem selecionado como **yolo** 🎯 + +### 🟠 Kaggle - Smooth Zoom +**Características:** +- ✅ 30h/semana de GPU grátis +- ✅ Upload OAuth para sua conta Drive +- ✅ YOLO Smooth Zoom incluído +- ✅ Suporte a cookies e datasets + +--- + +## 📦 Configurando Datasets no Kaggle + +O notebook do Kaggle requer algumas credenciais. Siga o guia completo: + +### 1️⃣ client_secret.json (Obrigatório) +Credenciais OAuth do Google Cloud para upload no Drive. + +**Passos:** +1. Acesse [Google Cloud Console](https://console.cloud.google.com/) +2. Crie um projeto ou use um existente +3. Vá em **APIs & Services** → **Credentials** +4. **Create Credentials** → **OAuth 2.0 Client ID** +5. Escolha **Desktop App** +6. Baixe o JSON +7. No Kaggle: **Add Data** → **Upload** → Faça upload +8. Nomeie o dataset como `client-secret-json` + +### 2️⃣ cookie (Opcional) +Cookies para download de vídeos privados/restritos. + +**Passos:** +1. Instale [Get cookies.txt LOCAL](https://chrome.google.com/webstore/detail/get-cookiestxt-locally/cclelndahbckbenkjhflpdbgdldlbecc) +2. Acesse www.kaggle.com (logado) +3. Clique na extensão → **Export** +4. Salve como `www.youtube.com_cookies.txt` +5. No Kaggle: **Add Data** → **Upload** +6. Nomeie o dataset como `cookie` + +### 3️⃣ credenciais-google (Obrigatório) +API Key do Gemini para análise com IA. + +**Passos:** +1. Acesse [Google AI Studio](https://makersuite.google.com/app/apikey) +2. **Create API Key** +3. Copie a chave +4. Crie arquivo `gemini_api.txt` com a chave +5. No Kaggle: **Add Data** → **Upload** +6. Nomeie o dataset como `credenciais-google` + +### 4️⃣ google-drive-credentials (Opcional) +Token OAuth reutilizável (gerado na primeira execução). + +**Como reutilizar:** +1. Execute o notebook uma vez +2. Após autenticação, baixe o arquivo `.json` gerado em `/kaggle/working/` +3. Crie dataset no Kaggle com este arquivo +4. Nomeie como `google-drive-credentials` + +--- + +## ✨ Diferenças entre Colab e Kaggle + +| Característica | Colab | Kaggle | +|----------------|-------|--------| +| GPU Grátis | ✅ 12h/dia | ✅ 30h/semana | +| Configuração | Mais simples | Requer datasets | +| Upload Drive | Nativo | OAuth manual | +| Zoom IA | ❌ Removido | ✅ Disponível | +| Persistência | ❌ Nenhuma | ✅ Datasets | + +--- + +## 🎯 Recursos + +- **Detecção automática** de momentos virais +- **Transcrição com IA** (WhisperX) +- **Corte inteligente** com análise semântica +- **Legendas automáticas** +- **Processamento em batch** + +--- + +## 🔗 Links Úteis + +- **Licença (GPL v3):** [LICENSE](LICENSE) +- **Repositório Original:** [RafaelGodoyEbert/ViralCutter](https://github.com/RafaelGodoyEbert/ViralCutter) +- **Discord (Suporte):** [discord.gg/tAdPHFAbud](https://discord.gg/tAdPHFAbud) + +--- + +## 📝 Créditos + +Desenvolvido por **Rafa.Godoy** +- [GitHub](https://github.com/rafaelGodoyEbert) +- [Twitter](https://twitter.com/GodoyEbert) +- [Instagram](https://www.instagram.com/rafael.godoy.ebert/) + +Fork customizado para facilitar uso em Kaggle e Colab. + +--- + +## 📄 Licença + +Este projeto é licenciado sob a **GNU General Public License v3**, permitindo que você copie, distribua e modifique o software livremente, desde que mantenha a mesma licença. [Leia a licença completa aqui](LICENSE). + + # ViralCutter [![Discord](https://dcbadge.limes.pink/api/server/tAdPHFAbud)](https://discord.gg/tAdPHFAbud)
diff --git a/README_en.md b/README_en.md index 8d092ab..8ebd015 100644 --- a/README_en.md +++ b/README_en.md @@ -102,5 +102,9 @@ ViralCutter is community-maintained. Join us to democratize AI content creation! - **Discord**: [AI Hub Brasil](https://discord.gg/aihubbrasil) - **Github**: Give us a ⭐ star if this project helped you! +## 📄 License + +This project is licensed under the **GNU General Public License v3**. [Read the full license here](LICENSE). + **Current Version**: 0.8v Alpha *ViralCutter: Because viral clips shouldn't cost a fortune.* 🚀 diff --git a/ViralCutter-Colab.ipynb b/ViralCutter-Colab.ipynb new file mode 100644 index 0000000..868eb50 --- /dev/null +++ b/ViralCutter-Colab.ipynb @@ -0,0 +1,216 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "pa36OeArowme" + }, + "source": [ + "# ViralCutter\n", + "Uma alternativa gratuita ao `opus.pro` e ao `vidyo.ai`" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "6Q-ljfsw1unE" + }, + "source": [ + "# Suporte em:\n", + "[![](https://dcbadge.limes.pink/api/server/tAdPHFAbud)](https://discord.gg/tAdPHFAbud)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "e76jiRnjONmj", + "outputId": "a2a5eeb8-04de-42f2-834e-8ad1c4d04393" + }, + "outputs": [], + "source": [ + "#@title 🎬 VIRALCUTTER COLAB (VRAM Fix + Drive Sync)\n", + "#@markdown Este script roda o ViralCutter com otimização de memória e sincroniza os vídeos processados com seu Google Drive.\n", + "\n", + "import os\n", + "import shutil\n", + "import subprocess\n", + "import threading\n", + "import time\n", + "import sys\n", + "from IPython.display import clear_output\n", + "from google.colab import drive\n", + "\n", + "# ==========================================\n", + "# ⚙️ CONFIGURAÇÕES (EDITE AQUI)\n", + "# ==========================================\n", + "PASTA_DRIVE_FINAL = \"/content/drive/MyDrive/ViralCutter_Prontos\" # Onde salvar\n", + "\n", + "# ==========================================\n", + "# 1. MONTAGEM DO DRIVE (NATIVO COLAB)\n", + "# ==========================================\n", + "print(\"1️⃣ Conectando ao Google Drive...\")\n", + "if not os.path.exists('/content/drive'):\n", + " drive.mount('/content/drive')\n", + "\n", + "os.makedirs(PASTA_DRIVE_FINAL, exist_ok=True)\n", + "print(f\"✅ Drive conectado! Vídeos irão para: {PASTA_DRIVE_FINAL}\")\n", + "\n", + "# ==========================================\n", + "# 2. INSTALAÇÃO OTIMIZADA (VRAM FIX)\n", + "# ==========================================\n", + "# Verifica se já está instalado para economizar tempo\n", + "if not os.path.exists(\"/content/ViralCutter\"):\n", + " print(\"\\n📦 Instalando ViralCutter Otimizado (3-5 min)...\")\n", + "\n", + " # Clone\n", + " subprocess.run(\"git clone https://github.com/RafaelGodoyEbert/ViralCutter.git /content/ViralCutter\", shell=True)\n", + " os.chdir(\"/content/ViralCutter\")\n", + "\n", + " # Instalação do gerenciador rápido (UV)\n", + " subprocess.run(\"pip install uv -q\", shell=True)\n", + "\n", + " # Drivers de Sistema (FFmpeg e CUDA basics)\n", + " subprocess.run(\"apt-get update -y -qq && apt-get install -y libcudnn8 ffmpeg xvfb -qq\", shell=True)\n", + "\n", + " # Cria ambiente\n", + " subprocess.run([\"uv\", \"venv\", \".venv\"], check=True)\n", + "\n", + " # --- OTIMIZAÇÃO DE VRAM AQUI ---\n", + " # Instalamos o faster-whisper e forçamos versões específicas do Torch\n", + " # para evitar que o Colab carregue CUDA kernels duplicados.\n", + " print(\" 🚀 Otimizando dependências de Vídeo e IA...\")\n", + "\n", + " cmds = [\n", + " # Motor de transcrição leve\n", + " \"uv pip install --python .venv faster-whisper\",\n", + " # Bibliotecas base\n", + " \"uv pip install --python .venv git+https://github.com/m-bain/whisperx.git\",\n", + " \"uv pip install --python .venv -r requirements.txt\",\n", + " \"uv pip install --python .venv -U --pre 'yt-dlp[default]'\",\n", + " # Gemini e Processamento\n", + " \"uv pip install --python .venv google-generativeai pandas onnxruntime-gpu\",\n", + " # Correção de versão do Transformers (Evita erro de alinhamento)\n", + " \"uv pip install --python .venv transformers==4.46.3 accelerate>=0.26.0\",\n", + " # Torch Otimizado para T4 (Colab)\n", + " \"uv pip install --python .venv torch==2.3.1+cu121 torchvision==0.18.1+cu121 torchaudio==2.3.1+cu121 --index-url https://download.pytorch.org/whl/cu121\",\n", + " # Visão Computacional (MediaPipe leve)\n", + " \"uv pip install --python .venv insightface\",\n", + " \"uv pip uninstall --python .venv mediapipe protobuf flatbuffers\",\n", + " \"uv pip install --python .venv 'mediapipe>=0.10.0' 'protobuf>=3.20,<5.0' 'flatbuffers>=2.0'\"\n", + " ]\n", + "\n", + " for cmd in cmds:\n", + " subprocess.run(cmd, shell=True, check=True)\n", + "\n", + " print(\"✅ Instalação Concluída!\")\n", + "else:\n", + " os.chdir(\"/content/ViralCutter\")\n", + " print(\"\\n✅ Sistema já instalado. Pulando etapa.\")\n", + "\n", + "# ==========================================\n", + "# 3. MONITOR AUTOMÁTICO (ENGINE)\n", + "# ==========================================\n", + "def engine_loop():\n", + " print(\"👀 Engine: Monitorando pasta VIRALS...\")\n", + " processed = set()\n", + " WATCH_DIR = \"/content/ViralCutter/VIRALS\"\n", + "\n", + " while True:\n", + " if os.path.exists(WATCH_DIR):\n", + " for r, d, f in os.walk(WATCH_DIR):\n", + " for file in f:\n", + " if not file.endswith(\".mp4\"): continue\n", + " # Ignora arquivos temporários e originais\n", + " if any(x in file for x in [\"input\", \"temp\", \"original\"]): continue\n", + "\n", + " full_path = os.path.join(r, file)\n", + "\n", + " # Verifica se o arquivo terminou de ser salvo\n", + " if full_path not in processed and os.path.getsize(full_path) > 1e6:\n", + " s1 = os.path.getsize(full_path)\n", + " time.sleep(5)\n", + " if os.path.getsize(full_path) != s1: continue\n", + "\n", + " print(f\"\\n💎 NOVO VÍDEO DETECTADO: {file}\")\n", + "\n", + " # Envia para o Drive\n", + " print(f\" ☁️ Salvando no Drive: {PASTA_DRIVE_FINAL}...\")\n", + " shutil.copy(full_path, os.path.join(PASTA_DRIVE_FINAL, os.path.basename(full_path)))\n", + " print(\" ✅ Concluído!\")\n", + "\n", + " processed.add(full_path)\n", + "\n", + " time.sleep(5)\n", + "\n", + "# Inicia o monitor em segundo plano\n", + "threading.Thread(target=engine_loop, daemon=True).start()\n", + "\n", + "# ==========================================\n", + "# 4. INICIAR INTERFACE\n", + "# ==========================================\n", + "print(\"\\n🚀 INICIANDO INTERFACE...\")\n", + "print(\"⚠️ CLIQUE NO LINK PÚBLICO (gradio.live) ABAIXO:\")\n", + "print(\"=\"*60)\n", + "\n", + "# Configuração de Display Virtual (Necessário para Colab)\n", + "subprocess.Popen(['Xvfb', ':1', '-screen', '0', '2560x1440x8'], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)\n", + "time.sleep(2)\n", + "os.environ.update({'DISPLAY':':1.0', 'MPLBACKEND':'Agg', 'CUDA_VISIBLE_DEVICES':'0'})\n", + "\n", + "# Roda o App\n", + "!/content/ViralCutter/.venv/bin/python webui/app.py --colab" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nEIsMZLwJ5kD" + }, + "source": [ + "#Créditos\n", + "\n", + "Inspirado no [reels clips automator](https://github.com/eddieoz/reels-clips-automator) e no [YoutubeVideoToAIPoweredShorts](https://github.com/Fitsbit/YoutubeVideoToAIPoweredShorts)
\n", + "\n", + "---\n", + "![Rafa.png](https://i.imgur.com/cGknQpU.png;base64)\n", + "\n", + "Desenvolvido por **Rafa.Godoy**
\n", + "[ ![GitHub](https://img.shields.io/badge/github-%23121011.svg?style=for-the-badge&logo=github&logoColor=white) ](https://github.com/rafaelGodoyEbert)
\n", + "[ ![X](https://img.shields.io/twitter/url?url=https%3A%2F%2Ftwitter.com%2FGodoyEbert) ](https://twitter.com/GodoyEbert)
\n", + "[Instagram](https://www.instagram.com/rafael.godoy.ebert/)
\n", + "[ ![](https://dcbadge.vercel.app/api/server/aihubbrasil) ](https://discord.gg/aihubbrasil)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "tpLJmPqGT5_u" + }, + "source": [ + "`0.8v Alpha`
\n", + "\n", + "Apenas uma alternativa gratuita ao `opus.pro` e ao `vidyo.ai`
\n" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "gpuType": "T4", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file diff --git a/ViralCutter-SmoothZoom.ipynb b/ViralCutter-SmoothZoom.ipynb new file mode 100644 index 0000000..36bc6cb --- /dev/null +++ b/ViralCutter-SmoothZoom.ipynb @@ -0,0 +1,239 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "header_smooth_zoom" + }, + "source": [ + "# 🎬 ViralCutter - Cyclic Smooth Zoom Edition\n", + "[![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/masterface77/ViralCutter/blob/smooth-zoom/ViralCutter-SmoothZoom.ipynb)\n", + "\n", + "Versão especial com **YOLO Tracking + Cyclic Smooth Zoom** automático!\n", + "\n", + "### ✨ Novidades v0.9\n", + "- 🔄 **Zoom Cíclico**: zoom in (3s) → hold (2s) → **SNAP BACK** (instantâneo) → repete\n", + "- 🎥 **Tracking Ultra Suave**: alpha 0.02 (câmera segue o rosto bem devagar)\n", + "- 🎯 **Efeito Dinâmico**: aproximação lenta e retorno imediato\n", + "- ⚡ **T4 Otimizado**: WhisperX com int8 quantization (50% menos VRAM)\n", + "\n", + "### 🚀 Recursos\n", + "- ✅ Download de vídeos do YouTube\n", + "- ✅ Cortes automáticos com IA (Gemini)\n", + "- ✅ Legendas dinâmicas\n", + "- ✅ Upload automático para Google Drive\n", + "- ✅ Transcrição otimizada para GPUs T4\n", + "\n", + "---\n", + "[![Discord](https://dcbadge.limes.pink/api/server/tAdPHFAbud)](https://discord.gg/tAdPHFAbud)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "main_cell_smooth_zoom" + }, + "outputs": [], + "source": [ + "#@title 🚀 INICIAR VIRALCUTTER (CYCLIC SMOOTH ZOOM + T4 OTIMIZADO)\n", + "#@markdown Execute esta célula para instalar e iniciar a interface.\n", + "#@markdown\n", + "#@markdown ### ⚙️ Configurações do Zoom Cíclico\n", + "#@markdown - **Zoom In**: 3 segundos (aproxima suavemente no rosto)\n", + "#@markdown - **Hold**: 2 segundos (mantém close-up)\n", + "#@markdown - **SNAP BACK**: (instantâneo para visão ampla)\n", + "#@markdown - **Hold**: 2 segundos | **Repete** até o fim\n", + "#@markdown\n", + "#@markdown ### ⚡ Otimização T4 (WhisperX)\n", + "#@markdown - Modelo: `large-v2` (quantizado)\n", + "#@markdown - Compute type: `int8_float16` (50% menos VRAM)\n", + "#@markdown - Batch size: 8 (evita OOM em vídeos longos)\n", + "#@markdown - Idioma padrão: Português\n", + "\n", + "import os\n", + "import shutil\n", + "import subprocess\n", + "import threading\n", + "import time\n", + "import sys\n", + "from IPython.display import clear_output\n", + "from google.colab import drive\n", + "\n", + "# ==========================================\n", + "# ⚙️ CONFIGURAÇÕES\n", + "# ==========================================\n", + "PASTA_DRIVE_FINAL = \"/content/drive/MyDrive/ViralCutter_SmoothZoom\"\n", + "REPO_URL = \"https://github.com/masterface77/ViralCutter.git\"\n", + "BRANCH = \"smooth-zoom\" # Branch com Cyclic Smooth Zoom + T4 Optimization\n", + "\n", + "# ==========================================\n", + "# 1. MONTAGEM DO DRIVE\n", + "# ==========================================\n", + "print(\"1️⃣ Conectando ao Google Drive...\")\n", + "if not os.path.exists('/content/drive'):\n", + " drive.mount('/content/drive')\n", + "\n", + "os.makedirs(PASTA_DRIVE_FINAL, exist_ok=True)\n", + "print(f\"✅ Drive conectado! Vídeos irão para: {PASTA_DRIVE_FINAL}\")\n", + "\n", + "# ==========================================\n", + "# 2. INSTALAÇÃO (BRANCH SMOOTH-ZOOM)\n", + "# ==========================================\n", + "if not os.path.exists(\"/content/ViralCutter\"):\n", + " print(f\"\\n📦 Clonando ViralCutter (branch: {BRANCH})...\")\n", + "\n", + " # Clone da branch smooth-zoom diretamente\n", + " subprocess.run(f\"git clone -b {BRANCH} {REPO_URL} /content/ViralCutter\", shell=True)\n", + " os.chdir(\"/content/ViralCutter\")\n", + "\n", + " # UV (gerenciador rápido)\n", + " subprocess.run(\"pip install uv -q\", shell=True)\n", + "\n", + " # Drivers de Sistema\n", + " subprocess.run(\"apt-get update -y -qq && apt-get install -y libcudnn8 ffmpeg xvfb -qq\", shell=True)\n", + "\n", + " # Cria ambiente virtual\n", + " subprocess.run([\"uv\", \"venv\", \".venv\"], check=True)\n", + "\n", + " # Instalação das dependências (T4 Optimized)\n", + " print(\" 🚀 Instalando dependências + Ultralytics (T4 Optimizado)...\")\n", + " cmds = [\n", + " \"uv pip install --python .venv faster-whisper\",\n", + " \"uv pip install --python .venv git+https://github.com/m-bain/whisperx.git\",\n", + " \"uv pip install --python .venv -r requirements.txt\",\n", + " \"uv pip install --python .venv -U --pre 'yt-dlp[default]'\",\n", + " \"uv pip install --python .venv google-generativeai pandas onnxruntime-gpu\",\n", + " \"uv pip install --python .venv transformers==4.46.3 accelerate>=0.26.0\",\n", + " \"uv pip install --python .venv torch==2.3.1+cu121 torchvision==0.18.1+cu121 torchaudio==2.3.1+cu121 --index-url https://download.pytorch.org/whl/cu121\",\n", + " \"uv pip install --python .venv insightface\",\n", + " # YOLO Cyclic Smooth Zoom\n", + " \"uv pip install --python .venv ultralytics\",\n", + " \"uv pip uninstall --python .venv mediapipe protobuf flatbuffers\",\n", + " \"uv pip install --python .venv 'mediapipe>=0.10.0' 'protobuf>=3.20,<5.0' 'flatbuffers>=2.0'\"\n", + " ]\n", + "\n", + " for cmd in cmds:\n", + " subprocess.run(cmd, shell=True, check=True)\n", + "\n", + " print(\"✅ Instalação Concluída!\")\n", + " print(\"🔄 Cyclic Smooth Zoom + T4 Optimization ativados!\")\n", + "else:\n", + " os.chdir(\"/content/ViralCutter\")\n", + " print(\"\\n✅ Sistema já instalado.\")\n", + " # Atualizar para última versão\n", + " subprocess.run(\"git pull origin smooth-zoom\", shell=True)\n", + " print(\"🔄 Atualizado para última versão!\")\n", + "\n", + "# ==========================================\n", + "# 3. MONITOR DE ARQUIVOS (DRIVE SYNC)\n", + "# APENAS arquivos _subtitled (COM LEGENDA)\n", + "# ==========================================\n", + "def engine_loop():\n", + " print(\"👀 Engine: Monitorando pasta VIRALS...\")\n", + " print(\"📌 APENAS arquivos COM LEGENDA (_subtitled) serão salvos!\")\n", + " processed = set()\n", + " WATCH_DIR = \"/content/ViralCutter/VIRALS\"\n", + "\n", + " while True:\n", + " try:\n", + " if os.path.exists(WATCH_DIR):\n", + " for r, d, f in os.walk(WATCH_DIR):\n", + " for file in f:\n", + " if not file.endswith(\".mp4\"): continue\n", + " \n", + " # Ignora arquivos temporários\n", + " if any(x in file.lower() for x in [\"input\", \"temp\", \"original\", \".part\", \"processing\"]): continue\n", + " \n", + " # ✅ APENAS faz upload de arquivos COM LEGENDA (_subtitled)\n", + " if \"_subtitled\" not in file.lower(): continue\n", + "\n", + " full_path = os.path.join(r, file)\n", + "\n", + " if full_path not in processed:\n", + " try:\n", + " if os.path.getsize(full_path) > 1e6:\n", + " s1 = os.path.getsize(full_path)\n", + " time.sleep(5)\n", + " if not os.path.exists(full_path): continue\n", + " if os.path.getsize(full_path) != s1: continue\n", + "\n", + " print(f\"\\n💎 Corte COM LEGENDA: {file}\")\n", + " print(f\" ☁️ Enviando para Drive...\")\n", + " shutil.copy(full_path, os.path.join(PASTA_DRIVE_FINAL, os.path.basename(full_path)))\n", + " print(\" ✅ Salvo no Drive!\")\n", + "\n", + " processed.add(full_path)\n", + " except FileNotFoundError:\n", + " continue\n", + " except Exception as e:\n", + " pass\n", + "\n", + " time.sleep(5)\n", + "\n", + "threading.Thread(target=engine_loop, daemon=True).start()\n", + "\n", + "# ==========================================\n", + "# 4. INICIAR INTERFACE GRADIO\n", + "# ==========================================\n", + "print(\"\\n🚀 INICIANDO VIRALCUTTER...\")\n", + "print(\"⚡ T4 Otimizado: int8_float16, batch_size=8, large-v2\")\n", + "print(\"⚠️ CLIQUE NO LINK gradio.live ABAIXO:\")\n", + "print(\"=\"*60)\n", + "\n", + "# Display Virtual\n", + "subprocess.Popen(['Xvfb', ':1', '-screen', '0', '2560x1440x8'], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)\n", + "time.sleep(2)\n", + "os.environ.update({'DISPLAY':':1.0', 'MPLBACKEND':'Agg', 'CUDA_VISIBLE_DEVICES':'0', 'VIRALCUTTER_FACE_MODEL': 'yolo'})\n", + "\n", + "# Roda o App com YOLO como padrão\n", + "!/content/ViralCutter/.venv/bin/python webui/app.py --colab --face-model yolo" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "credits_smooth_zoom" + }, + "source": [ + "## 📝 Créditos\n", + "\n", + "**ViralCutter** por [Rafa.Godoy](https://github.com/rafaelGodoyEbert)\n", + "\n", + "**Cyclic Smooth Zoom** implementado com:\n", + "- [Ultralytics YOLOv8](https://github.com/ultralytics/ultralytics)\n", + "- ByteTrack para tracking persistente\n", + "- EMA (Exponential Moving Average) com alpha=0.02\n", + "\n", + "**Otimização T4:**\n", + "- WhisperX com int8 quantization\n", + "- Modelo large-v2 (mais estável)\n", + "- Batch size otimizado para 16GB VRAM\n", + "\n", + "### 🔄 Zoom Cíclico | ⚡ T4 Otimizado\n", + "```\n", + "Zoom: 1.0x ──(3s)──► 1.4x ──(2s hold)──► SNAP ──(2s hold)──► repete\n", + "```\n", + "\n", + "---\n", + "`v0.9 Alpha + Cyclic Smooth Zoom + T4 Optimization`" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "gpuType": "T4", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file diff --git a/main_improved.py b/main_improved.py index 17451a2..974ed8e 100644 --- a/main_improved.py +++ b/main_improved.py @@ -118,7 +118,7 @@ def main(): parser.add_argument("--project-path", help="Path to existing project folder (overrides URL/Latest)") parser.add_argument("--workflow", choices=["1", "2", "3"], default="1", help="Workflow choice: 1=Full, 2=Cut Only, 3=Subtitles Only") - parser.add_argument("--face-model", choices=["insightface", "mediapipe"], default="insightface", help="Face detection model") + parser.add_argument("--face-model", choices=["yolo", "insightface", "mediapipe"], default="insightface", help="Face detection model: 'yolo' (Smooth Zoom), 'insightface' (default), 'mediapipe'") parser.add_argument("--face-mode", choices=["auto", "1", "2"], default="auto", help="Face tracking mode: auto, 1, 2") parser.add_argument("--subtitle-config", help="Path to subtitle configuration JSON file") parser.add_argument("--no-face-mode", choices=["padding", "zoom"], default="padding", help="Method to handle segments with no face detected: 'padding' (9:16 frame with black bars) or 'zoom' (Center Crop Zoom)") @@ -127,6 +127,7 @@ def main(): parser.add_argument("--face-two-threshold", type=float, default=0.60, help="Relative area threshold to trigger 2-face mode (default: 0.60)") parser.add_argument("--face-confidence-threshold", type=float, default=0.30, help="Face detection confidence threshold (0.0 - 1.0) (default: 0.30)") parser.add_argument("--face-dead-zone", type=str, default="40", help="Camera movement dead zone in pixels (default: 40)") # str to support future "auto" + parser.add_argument("--tracking-alpha", type=float, default=0.05, help="Camera tracking smoothness (0.02=Ultra Smooth, 0.05=Normal, 0.10=Fast)") parser.add_argument("--focus-active-speaker", action="store_true", help="Enable experimental active speaker focus (InsightFace only)") parser.add_argument("--active-speaker-mar", type=float, default=0.03, help="Mouth Aspect Ratio threshold for active speaker (0.0 - 1.0) (default: 0.03)") parser.add_argument("--active-speaker-score-diff", type=float, default=1.5, help="Score difference to focus on active speaker (default: 1.5)") @@ -569,6 +570,7 @@ def main(): two_face_threshold=args.face_two_threshold, confidence_threshold=args.face_confidence_threshold, dead_zone=dead_zone_val, + tracking_alpha=args.tracking_alpha, focus_active_speaker=args.focus_active_speaker, active_speaker_mar=args.active_speaker_mar, active_speaker_score_diff=args.active_speaker_score_diff, diff --git a/requirements.txt b/requirements.txt index 0e221bd..ee34a9d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,6 +5,7 @@ whisperx mediapipe google-genai insightface +ultralytics onnxruntime-gpu gradio opencv-python diff --git a/scripts/edit_video.py b/scripts/edit_video.py index ebefb2b..ee00f39 100644 --- a/scripts/edit_video.py +++ b/scripts/edit_video.py @@ -12,6 +12,15 @@ INSIGHTFACE_AVAILABLE = False print("InsightFace not found or error importing. Install with: pip install insightface onnxruntime-gpu") +# YOLO Tracking (Smooth Zoom) +try: + from scripts.face_tracking_yolo import init_yolo, generate_short_yolo, is_yolo_available + YOLO_TRACKING_AVAILABLE = True +except ImportError: + YOLO_TRACKING_AVAILABLE = False + print("YOLO Tracking not available. Install with: pip install ultralytics") + + # Global cache for encoder CACHED_ENCODER = None @@ -1084,7 +1093,7 @@ def sort_score(f): return "1" -def edit(project_folder="tmp", face_model="insightface", face_mode="auto", detection_period=None, filter_threshold=0.35, two_face_threshold=0.60, confidence_threshold=0.30, dead_zone=40, focus_active_speaker=False, active_speaker_mar=0.03, active_speaker_score_diff=1.5, include_motion=False, active_speaker_motion_deadzone=3.0, active_speaker_motion_sensitivity=0.05, active_speaker_decay=2.0, segments_data=None, no_face_mode="padding"): +def edit(project_folder="tmp", face_model="insightface", face_mode="auto", detection_period=None, filter_threshold=0.35, two_face_threshold=0.60, confidence_threshold=0.30, dead_zone=40, tracking_alpha=0.05, focus_active_speaker=False, active_speaker_mar=0.03, active_speaker_score_diff=1.5, include_motion=False, active_speaker_motion_deadzone=3.0, active_speaker_motion_sensitivity=0.05, active_speaker_decay=2.0, segments_data=None, no_face_mode="padding"): # Lazy init solutions only when needed to avoid AttributeError if import failed partially mp_face_detection = None mp_face_mesh = None @@ -1099,10 +1108,24 @@ def edit(project_folder="tmp", face_model="insightface", face_mode="auto", detec # Priority: User Choice -> Fallbacks + # NEW: YOLO Tracking (Smooth Zoom) - highest priority if selected + yolo_working = False + if YOLO_TRACKING_AVAILABLE and face_model == "yolo": + try: + print("Initializing YOLO Tracking (Smooth Zoom)...") + if init_yolo(): + yolo_working = True + print("YOLO Tracking Initialized Successfully!") + else: + print("WARNING: YOLO init returned False. Will try InsightFace.") + except Exception as e: + print(f"WARNING: YOLO Initialization Failed ({e}). Will try InsightFace.") + yolo_working = False + insightface_working = False - # Only init InsightFace if selected or default - if INSIGHTFACE_AVAILABLE and (face_model == "insightface"): + # Only init InsightFace if selected or default (and YOLO not working) + if INSIGHTFACE_AVAILABLE and (face_model == "insightface" or (face_model == "yolo" and not yolo_working)): try: print("Initializing InsightFace...") init_insightface() @@ -1112,6 +1135,7 @@ def edit(project_folder="tmp", face_model="insightface", face_mode="auto", detec print(f"WARNING: InsightFace Initialization Failed ({e}). Will try MediaPipe.") insightface_working = False + mediapipe_working = False use_haar = False @@ -1176,8 +1200,25 @@ def edit(project_folder="tmp", face_model="insightface", face_mode="auto", detec success = False detected_mode = "1" # Default if detection fails or fallback + # 0. Try YOLO (Smooth Zoom) - NEW + if yolo_working and not success: + try: + print(f"[YOLO Smooth Zoom] Processing: {input_filename}") + res = generate_short_yolo(input_file, output_file, index, + project_folder, final_folder, + face_mode=face_mode, + no_face_mode=no_face_mode, + alpha=tracking_alpha) + if res: detected_mode = res + success = True + except Exception as e: + import traceback + traceback.print_exc() + print(f"YOLO processing failed for {input_filename}: {e}") + print("Falling back to InsightFace...") + # 1. Try InsightFace - if insightface_working: + if insightface_working and not success: try: # Capture returned mode res = generate_short_insightface(input_file, output_file, index, project_folder, final_folder, face_mode=face_mode, detection_period=detection_period, @@ -1194,6 +1235,7 @@ def edit(project_folder="tmp", face_model="insightface", face_mode="auto", detec traceback.print_exc() print(f"InsightFace processing failed for {input_filename}: {e}") print("Falling back to MediaPipe/Haar...") + # 2. Try MediaPipe if InsightFace failed or not available if not success and mediapipe_working: diff --git a/scripts/face_tracking_yolo.py b/scripts/face_tracking_yolo.py new file mode 100644 index 0000000..c997a14 --- /dev/null +++ b/scripts/face_tracking_yolo.py @@ -0,0 +1,351 @@ +# Face Tracking with Ultralytics YOLO and Smooth Zoom +""" +This module provides YOLO-based face detection and tracking with EMA smoothing +for a "cinematic" camera follow effect. + +Features: +- Uses YOLOv8 tracking with ByteTrack for persistent face IDs +- EMA smoothing for smooth camera movement +- Automatic GPU detection (CUDA) +- Fallback to InsightFace or center crop if YOLO fails +""" + +import cv2 +import numpy as np +import os +import subprocess + +# Lazy import to avoid errors if ultralytics is not installed +YOLO_AVAILABLE = False +YOLO_MODEL = None + +def init_yolo(model_name="yolov8n.pt"): + """ + Initialize YOLO model for tracking. + Tries yolov8n-face.pt first, then falls back to yolov8n.pt + """ + global YOLO_AVAILABLE, YOLO_MODEL + + try: + from ultralytics import YOLO + import torch + + # Determine device + device = 'cuda' if torch.cuda.is_available() else 'cpu' + print(f"[YOLO] Using device: {device}") + + # Try to find face-specific model first + models_dir = os.path.join(os.path.dirname(os.path.dirname(__file__)), "models") + face_model_path = os.path.join(models_dir, "yolov8n-face.pt") + + if os.path.exists(face_model_path): + print(f"[YOLO] Loading face model: {face_model_path}") + YOLO_MODEL = YOLO(face_model_path) + else: + print(f"[YOLO] Face model not found. Using default: {model_name}") + YOLO_MODEL = YOLO(model_name) + + YOLO_MODEL.to(device) + YOLO_AVAILABLE = True + print("[YOLO] Initialization successful!") + return True + + except ImportError as e: + print(f"[YOLO] ultralytics not installed: {e}") + YOLO_AVAILABLE = False + return False + except Exception as e: + print(f"[YOLO] Initialization failed: {e}") + YOLO_AVAILABLE = False + return False + + +class SmoothBBox: + """ + Exponential Moving Average (EMA) smoothing for bounding boxes. + Provides smooth face tracking without zoom effects. + """ + + def __init__(self, alpha=0.05): + """ + Args: + alpha: Smoothing factor (0.0 = no movement, 1.0 = instant snap) + 0.05 = smooth but responsive tracking + """ + self.alpha = alpha + self.smooth_bbox = None + self.target_bbox = None + self.frames_without_detection = 0 + self.max_frames_hold = 90 + + def update(self, detected_bbox): + """ + Update the smooth bounding box with a new detection. + + Args: + detected_bbox: (x1, y1, x2, y2) or None if no detection + + Returns: + Tuple of (smoothed_bbox, zoom) - zoom is always 1.0 (no zoom) + """ + if detected_bbox is not None: + self.target_bbox = np.array(detected_bbox, dtype=float) + self.frames_without_detection = 0 + + if self.smooth_bbox is None: + # First detection - snap to it + self.smooth_bbox = self.target_bbox.copy() + else: + # Apply EMA smoothing + self.smooth_bbox = ( + self.alpha * self.target_bbox + + (1 - self.alpha) * self.smooth_bbox + ) + else: + # No detection - hold position + self.frames_without_detection += 1 + + if self.frames_without_detection > self.max_frames_hold: + return None, 1.0 + + if self.smooth_bbox is not None: + return tuple(self.smooth_bbox.astype(int)), 1.0 + return None, 1.0 + + def reset(self): + """Reset the smoother state.""" + self.smooth_bbox = None + self.target_bbox = None + self.frames_without_detection = 0 + + +def get_best_encoder(): + """Detect best available video encoder.""" + try: + result = subprocess.run(['ffmpeg', '-hide_banner', '-encoders'], + capture_output=True, text=True) + output = result.stdout + + if "h264_nvenc" in output: + return ("h264_nvenc", "fast") + if "h264_amf" in output: + return ("h264_amf", "speed") + if "h264_qsv" in output: + return ("h264_qsv", "veryfast") + if "h264_videotoolbox" in output: + return ("h264_videotoolbox", "default") + except Exception: + pass + + return ("libx264", "ultrafast") + + +def crop_to_vertical(frame, center_x, center_y, frame_width, frame_height, zoom=1.0): + """ + Crop frame to 9:16 aspect ratio centered on (center_x, center_y) with progressive zoom. + + Args: + frame: Input frame + center_x, center_y: Center point to focus on + frame_width, frame_height: Original frame dimensions + zoom: Zoom factor (1.0 = no zoom, 1.4 = 40% closer) + """ + target_aspect = 9 / 16 + + # Calculate base crop dimensions + if frame_width / frame_height > target_aspect: + base_crop_width = int(frame_height * target_aspect) + base_crop_height = frame_height + else: + base_crop_width = frame_width + base_crop_height = int(frame_width / target_aspect) + + # Apply zoom - smaller crop = more zoomed in + crop_width = int(base_crop_width / zoom) + crop_height = int(base_crop_height / zoom) + + # Ensure minimum crop size (avoid too much zoom) + min_crop_width = int(base_crop_width / 2.0) # Max 2x zoom + min_crop_height = int(base_crop_height / 2.0) + crop_width = max(crop_width, min_crop_width) + crop_height = max(crop_height, min_crop_height) + + # Calculate crop position (centered on face) + crop_x = int(center_x - crop_width // 2) + crop_y = int(center_y - crop_height // 2) + + # Clamp to frame bounds + crop_x = max(0, min(crop_x, frame_width - crop_width)) + crop_y = max(0, min(crop_y, frame_height - crop_height)) + + # Extract and resize + crop = frame[crop_y:crop_y+crop_height, crop_x:crop_x+crop_width] + return cv2.resize(crop, (1080, 1920), interpolation=cv2.INTER_AREA) + + +def generate_short_yolo(input_file, output_file, index, project_folder, final_folder, + face_mode="auto", no_face_mode="zoom", alpha=0.05): + """ + Process video with YOLO tracking and smooth face following. + + Args: + input_file: Path to input video + output_file: Path for temporary output + index: Segment index + project_folder: Project folder path + final_folder: Final output folder + face_mode: "auto", "1", or "2" + no_face_mode: "zoom" or "padding" when no face detected + alpha: EMA smoothing factor (0.02=Ultra Smooth, 0.05=Normal, 0.10=Fast) + """ + global YOLO_MODEL + + if not YOLO_AVAILABLE or YOLO_MODEL is None: + raise RuntimeError("YOLO not initialized. Call init_yolo() first.") + + print(f"[YOLO] Processing with smooth tracking (alpha={alpha}): {input_file}") + + cap = cv2.VideoCapture(input_file) + if not cap.isOpened(): + raise IOError(f"Cannot open video: {input_file}") + + fps = cap.get(cv2.CAP_PROP_FPS) + frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) + frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) + total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) + + # Initialize smoother with alpha from UI + smoother = SmoothBBox(alpha=alpha) + + # Video writer + fourcc = cv2.VideoWriter_fourcc(*'mp4v') + out = cv2.VideoWriter(output_file, fourcc, fps, (1080, 1920)) + + # Track the dominant person (by ID persistence or size) + tracked_id = None + + frame_idx = 0 + while True: + ret, frame = cap.read() + if not ret: + break + + # Run YOLO tracking + # persist=True maintains tracking IDs across frames + results = YOLO_MODEL.track(frame, persist=True, conf=0.3, iou=0.5, + verbose=False, classes=[0]) # class 0 = person + + # Extract best detection + best_bbox = None + + if results and len(results) > 0 and results[0].boxes is not None: + boxes = results[0].boxes + + if len(boxes) > 0: + # Get boxes data + xyxy = boxes.xyxy.cpu().numpy() # (x1, y1, x2, y2) + confs = boxes.conf.cpu().numpy() + ids = boxes.id.cpu().numpy() if boxes.id is not None else None + + # Strategy: Track the same person if possible, else pick largest + if tracked_id is not None and ids is not None: + # Try to find our tracked person + match_idx = np.where(ids == tracked_id)[0] + if len(match_idx) > 0: + best_bbox = xyxy[match_idx[0]] + + if best_bbox is None: + # Pick largest (by area) + areas = (xyxy[:, 2] - xyxy[:, 0]) * (xyxy[:, 3] - xyxy[:, 1]) + best_idx = np.argmax(areas) + best_bbox = xyxy[best_idx] + + # Remember this person's ID for tracking + if ids is not None: + tracked_id = ids[best_idx] + + # Apply EMA smoothing with progressive zoom + smoothed, current_zoom = smoother.update(best_bbox) + + if smoothed is not None: + # Calculate face center + x1, y1, x2, y2 = smoothed + center_x = (x1 + x2) / 2 + center_y = (y1 + y2) / 2 + + # Crop and resize with progressive zoom + result = crop_to_vertical(frame, center_x, center_y, + frame_width, frame_height, zoom=current_zoom) + else: + # Fallback: center crop or padding (still use progressive zoom) + if no_face_mode == "zoom": + # Center crop with current zoom level + result = crop_to_vertical(frame, frame_width/2, frame_height/2, + frame_width, frame_height, zoom=current_zoom) + else: + # Padding (import from one_face) + from scripts.one_face import resize_with_padding + result = resize_with_padding(frame) + + out.write(result) + frame_idx += 1 + + # Progress indicator every 100 frames + if frame_idx % 100 == 0: + print(f"[YOLO] Progress: {frame_idx}/{total_frames} frames") + + cap.release() + out.release() + + print(f"[YOLO] Processing complete: {frame_idx} frames") + + # Finalize (mux audio) + _finalize_video(input_file, output_file, index, fps, project_folder, final_folder) + + return "1" # Return face mode for compatibility + + +def _finalize_video(input_file, output_file, index, fps, project_folder, final_folder): + """Mux audio with processed video.""" + audio_file = os.path.join(project_folder, "cuts", f"output-audio-{index}.aac") + + # Extract audio + subprocess.run([ + "ffmpeg", "-y", "-hide_banner", "-loglevel", "error", + "-i", input_file, "-vn", "-acodec", "copy", audio_file + ], check=False, capture_output=True) + + if os.path.exists(audio_file) and os.path.getsize(audio_file) > 0: + final_output = os.path.join(final_folder, f"final-output{str(index).zfill(3)}_processed.mp4") + encoder_name, encoder_preset = get_best_encoder() + + command = [ + "ffmpeg", "-y", "-hide_banner", "-loglevel", "error", "-stats", + "-i", output_file, + "-i", audio_file, + "-c:v", encoder_name, "-preset", encoder_preset, "-b:v", "5M", + "-c:a", "aac", "-b:a", "192k", + "-r", str(fps), + final_output + ] + + try: + subprocess.run(command, check=True) + print(f"[YOLO] Final output: {final_output}") + + # Cleanup temp files + try: + os.remove(audio_file) + os.remove(output_file) + except: + pass + + except subprocess.CalledProcessError as e: + print(f"[YOLO] Muxing error: {e}") + else: + print(f"[YOLO] Warning: No audio extracted for {input_file}") + + +# Convenience function to check if YOLO is ready +def is_yolo_available(): + return YOLO_AVAILABLE and YOLO_MODEL is not None diff --git a/scripts/transcribe_video.py b/scripts/transcribe_video.py index f219119..976d660 100644 --- a/scripts/transcribe_video.py +++ b/scripts/transcribe_video.py @@ -165,7 +165,7 @@ def vtt_time_to_seconds(t_str): return None return segments -def transcribe(input_file, model_name='large-v3', project_folder='tmp'): +def transcribe(input_file, model_name='large-v2', project_folder='tmp', language=None): print(i18n(f"Iniciando transcrição de {input_file}...")) # Diagnóstico de Ambiente @@ -192,10 +192,14 @@ def transcribe(input_file, model_name='large-v3', project_folder='tmp'): print(f"Os arquivos SRT, TSV e JSON já existem. Pulando a transcrição.") return srt_file, tsv_file - # Device Setup + # Device Setup - Optimized for T4 GPUs (Colab/Kaggle) device = "cuda" if torch.cuda.is_available() else "cpu" print(f"DEBUG: Usando dispositivo: {device}") - compute_type = "float16" if device == "cuda" else "float32" + + # Use int8_float16 for T4 GPUs (16GB VRAM) - better memory efficiency + # int8 quantization reduces VRAM usage by ~50% with minimal quality loss + compute_type = "int8_float16" if device == "cuda" else "float32" + print(f"DEBUG: Compute type: {compute_type} (T4 optimizado)") try: apply_safe_globals_hack() @@ -216,8 +220,10 @@ def transcribe(input_file, model_name='large-v3', project_folder='tmp'): start_segments = None alignment_only = False - # Default blind guess if we have no info - detected_language = "en" + # Use forced language if provided, otherwise detect + detected_language = language if language else "pt" # Default to Portuguese + language_forced = language is not None + print(f"DEBUG: Idioma {'forçado' if language_forced else 'padrão'}: {detected_language}") if potential_subs: sub_path = potential_subs[0] @@ -249,20 +255,26 @@ def transcribe(input_file, model_name='large-v3', project_folder='tmp'): # Mas o align recebe segments como lista. pass else: - # 3. Transcrever (Caminho Normal) + # 3. Transcrever (Caminho Normal) - Optimized for T4 GPUs print("Nenhuma legenda válida encontrada. Realizando transcrição completa (WhisperX)...") - print(f"Carregando modelo {model_name}...") + print(f"Carregando modelo {model_name} (compute_type={compute_type})...") + + # Batch size 8 for T4 (16GB VRAM) - prevents OOM on long podcasts + # For higher VRAM GPUs, can increase to 16 or 24 + t4_batch_size = 8 + model = whisperx.load_model( model_name, device, compute_type=compute_type, + language=detected_language if not language_forced else language, asr_options={"hotwords": None} ) result = model.transcribe( audio, - batch_size=16, - chunk_size=10 + batch_size=t4_batch_size, + chunk_size=15 # Slightly larger chunks for efficiency ) detected_language = result["language"] diff --git a/viralcutter-Kaggle-SmoothZoom.ipynb b/viralcutter-Kaggle-SmoothZoom.ipynb new file mode 100644 index 0000000..1a9a0dd --- /dev/null +++ b/viralcutter-Kaggle-SmoothZoom.ipynb @@ -0,0 +1,132 @@ +{ + "metadata": { + "kernelspec": { + "language": "python", + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.12.12", + "mimetype": "text/x-python", + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "pygments_lexer": "ipython3", + "nbconvert_exporter": "python", + "file_extension": ".py" + }, + "kaggle": { + "accelerator": "nvidiaTeslaT4", + "dataSources": [ + { + "sourceId": 14644893, + "sourceType": "datasetVersion", + "datasetId": 9355102 + }, + { + "sourceId": 14669723, + "sourceType": "datasetVersion", + "datasetId": 9371811 + }, + { + "sourceId": 14678475, + "sourceType": "datasetVersion", + "datasetId": 9377534 + }, + { + "sourceId": 14683135, + "sourceType": "datasetVersion", + "datasetId": 9380125 + }, + { + "sourceId": 14683138, + "sourceType": "datasetVersion", + "datasetId": 9380127 + } + ], + "dockerImageVersionId": 31260, + "isInternetEnabled": true, + "language": "python", + "sourceType": "notebook", + "isGpuEnabled": true + } + }, + "nbformat_minor": 4, + "nbformat": 4, + "cells": [ + { + "cell_type": "markdown", + "source": [ + "# 🎬 ViralCutter - Cyclic Smooth Zoom Edition (Kaggle)\n", + "\n", + "Versão especial com **YOLO Tracking + Cyclic Smooth Zoom** automático!\n", + "\n", + "### ✨ Novidades v0.9\n", + "- 🔄 **Zoom Cíclico**: zoom in (3s) → hold (2s) → **SNAP BACK** (instantâneo) → repete\n", + "- 🎥 **Tracking Ultra Suave**: alpha 0.02 (câmera segue o rosto bem devagar)\n", + "- ⚡ **T4 Otimizado**: WhisperX com int8 quantization (50% menos VRAM)\n", + "\n", + "### 🚀 Recursos\n", + "- ✅ Download de vídeos do YouTube\n", + "- ✅ Cortes automáticos com IA (Gemini)\n", + "- ✅ Legendas dinâmicas\n", + "- ✅ Upload automático para Google Drive\n", + "- ✅ Transcrição otimizada para GPUs T4\n", + "\n", + "---\n", + "\n", + "## 📦 Datasets Necessários\n", + "\n", + "| Dataset | Descrição |\n", + "|---------|----------|\n", + "| `client-secret-json` | OAuth do Google Cloud |\n", + "| `cookie` | Cookies YouTube (opcional) |\n", + "| `credenciais-google` | API Key Gemini |\n", + "| `google-drive-credentials` | Credenciais salvas (gerado após 1ª autenticação) |\n", + "\n", + "---" + ], + "metadata": {} + }, + { + "cell_type": "code", + "source": "#@title 🚀 VIRALCUTTER CYCLIC SMOOTH ZOOM (Kaggle + T4 Otimizado)\n#@markdown ### ⚙️ Zoom Cíclico + T4 Optimization\n#@markdown **Zoom:**\n#@markdown - Zoom In (3s) → Hold (2s) → SNAP BACK → Hold (2s) → Repete\n#@markdown\n#@markdown **WhisperX T4 Otimizado:**\n#@markdown - Modelo: `large-v2` (quantizado)\n#@markdown - Compute: `int8_float16` (50% menos VRAM)\n#@markdown - Batch: 8 (evita OOM)\n#@markdown - Idioma: Português (padrão)\n\nimport os\nimport time\nimport threading\nimport json\nimport shutil\nimport subprocess\nimport sys\nfrom IPython.display import clear_output\n\n# ==============================================================================\n# 1. SETUP DE BIBLIOTECAS (FIX AUTH)\n# ==============================================================================\nprint(\"1️⃣ Ajustando bibliotecas de autenticação...\")\ntry:\n subprocess.run(\n [sys.executable, \"-m\", \"pip\", \"install\", \"google-auth-oauthlib==0.4.6\", \"google-api-python-client\", \"--force-reinstall\", \"--no-deps\", \"-q\"],\n check=True\n )\n subprocess.run(\n [sys.executable, \"-m\", \"pip\", \"install\", \"google-auth>=1.0.0\", \"requests-oauthlib>=0.7.0\", \"-q\"], \n check=True\n )\nexcept Exception as e:\n print(f\"⚠️ Aviso na instalação de libs: {e}\")\n\nfrom google_auth_oauthlib.flow import InstalledAppFlow\nfrom googleapiclient.discovery import build\nfrom googleapiclient.http import MediaFileUpload\n\n# ==============================================================================\n# 2. INSTALAÇÃO DO SISTEMA + YOLO CYCLIC SMOOTH ZOOM + T4 OPTIMIZATION\n# ==============================================================================\nVENV_PYTHON = '/kaggle/working/ViralCutter/.venv/bin/python'\nBRANCH = \"smooth-zoom\" # Branch com Cyclic Smooth Zoom + T4 Optimization\n\nif not os.path.exists(VENV_PYTHON):\n print(f\"\\n📦 Instalando ViralCutter + Cyclic Smooth Zoom + T4 Opt (branch: {BRANCH})...\")\n \n if os.path.exists(\"/kaggle/working/ViralCutter\"):\n shutil.rmtree(\"/kaggle/working/ViralCutter\")\n \n # Clone da branch smooth-zoom\n subprocess.run(f\"git clone -b {BRANCH} https://github.com/masterface77/ViralCutter.git /kaggle/working/ViralCutter\", shell=True, check=True)\n os.chdir(\"/kaggle/working/ViralCutter\")\n \n print(\" - Instalando drivers...\")\n subprocess.run(\"pip install uv -q\", shell=True)\n subprocess.run(\"apt-get update -y -qq && apt-get install -y libcudnn8 ffmpeg xvfb -qq\", shell=True)\n \n print(\" - Criando ambiente virtual...\")\n subprocess.run([\"uv\", \"venv\", \".venv\"], check=True)\n \n print(\" - Baixando pacotes de IA + Ultralytics (T4 Otimizado)...\")\n cmds = [\n \"uv pip install --python .venv faster-whisper\",\n \"uv pip install --python .venv git+https://github.com/m-bain/whisperx.git\",\n \"uv pip install --python .venv -r requirements.txt\",\n \"uv pip install --python .venv -U --pre 'yt-dlp[default]'\",\n \"uv pip install --python .venv pytubefix google-generativeai pandas onnxruntime-gpu\",\n \"uv pip install --python .venv transformers==4.46.3 accelerate>=0.26.0\",\n \"uv pip install --python .venv torch==2.3.1+cu121 torchvision==0.18.1+cu121 torchaudio==2.3.1+cu121 --index-url https://download.pytorch.org/whl/cu121\",\n \"uv pip install --python .venv 'numpy<2.0' setuptools==69.5.1\",\n \"uv pip install --python .venv insightface onnxruntime-gpu\",\n # YOLO Cyclic Smooth Zoom\n \"uv pip install --python .venv ultralytics\",\n \"uv pip uninstall --python .venv mediapipe protobuf flatbuffers\",\n \"uv pip install --python .venv 'mediapipe>=0.10.0' 'protobuf>=3.20,<5.0' 'flatbuffers>=2.0'\"\n ]\n \n for cmd in cmds:\n subprocess.run(cmd, shell=True, check=True)\n \n print(\"✅ Instalação Concluída!\")\n print(\"🔄 Cyclic Smooth Zoom + T4 Optimization ativados!\")\n print(\"⚡ WhisperX: int8_float16, batch_size=8, large-v2\")\nelse:\n os.chdir(\"/kaggle/working/ViralCutter\")\n print(\"\\n✅ Sistema já instalado.\")\n # Atualizar para última versão\n subprocess.run(\"git pull origin smooth-zoom\", shell=True)\n print(\"🔄 Atualizado para última versão!\")\n\n# ==============================================================================\n# 3. PATCH NO DOWNLOADER\n# ==============================================================================\nprint(\"\\n🔧 Aplicando patch no sistema de download...\")\ndownload_script_content = r'''\nimport yt_dlp\nimport os\nimport glob\nimport shutil\n\ndef download(url, download_subs=False, quality=\"best\", output_folder=\"VIRALS\"):\n print(f\"🚀 Iniciando download: {url}\")\n title_temp = \"video_temp\"\n try:\n with yt_dlp.YoutubeDL({'quiet': True}) as ydl:\n info = ydl.extract_info(url, download=False)\n if info:\n t = info.get('title', 'video_temp')\n title_temp = \"\".join([c for c in t if c.isalnum() or c in \" .-_\"]).strip()\n except: pass\n\n final_path = os.path.join(output_folder, title_temp)\n os.makedirs(final_path, exist_ok=True)\n output_template = f\"{final_path}/input.%(ext)s\"\n\n ydl_opts = {\n 'format': \"bestvideo+bestaudio/best\",\n 'outtmpl': output_template,\n 'noplaylist': True,\n 'writesubtitles': False,\n 'writeautomaticsub': False,\n 'extractor_args': {'youtube': {'player_client': ['android', 'web']}}, \n 'quiet': False,\n 'no_warnings': True,\n 'merge_output_format': 'mp4'\n }\n\n try:\n with yt_dlp.YoutubeDL(ydl_opts) as ydl:\n ydl.download([url])\n \n mp4_files = glob.glob(os.path.join(final_path, \"*.mp4\"))\n if mp4_files:\n found_file = mp4_files[0]\n print(f\"✅ Arquivo baixado: {found_file}\")\n expected_input = os.path.join(final_path, \"input.mp4\")\n if found_file != expected_input:\n shutil.move(found_file, expected_input)\n return expected_input\n return found_file\n raise FileNotFoundError(\"Arquivo MP4 não gerado\")\n except Exception as e:\n print(f\"❌ Falha crítica no Download: {e}\")\n raise e\n'''\nwith open('/kaggle/working/ViralCutter/scripts/download_video.py', 'w') as f:\n f.write(download_script_content)\n\n# ==============================================================================\n# 4. AUTENTICAÇÃO OAUTH\n# ==============================================================================\nCLIENT_SECRET_FILE = None\nSCOPES = ['https://www.googleapis.com/auth/drive.file']\nAPI_SERVICE_NAME = 'drive'\nAPI_VERSION = 'v3'\n\nprint(\"\\n🔑 Procurando 'client_secret.json'...\")\nfor root, dirs, files in os.walk('/kaggle/input'):\n for file in files:\n if 'client_secret' in file and file.endswith('.json'):\n CLIENT_SECRET_FILE = os.path.join(root, file)\n print(f\"✅ Credencial encontrada: {CLIENT_SECRET_FILE}\")\n break\n if CLIENT_SECRET_FILE: break\n\nservice = None\nfolder_id = None\n\nif CLIENT_SECRET_FILE:\n print(\"\\n🌐 INICIANDO LOGIN GOOGLE...\")\n print(\" ⚠️ Siga os passos abaixo com atenção:\")\n print(\" 1. Clique no link que aparecerá abaixo.\")\n print(\" 2. Faça login e autorize o acesso.\")\n print(\" 3. Copie o código gerado pelo Google.\")\n print(\" 4. COLE O CÓDIGO na caixa de entrada aqui no Kaggle e aperte Enter.\\n\")\n \n try:\n flow = InstalledAppFlow.from_client_secrets_file(CLIENT_SECRET_FILE, SCOPES)\n \n if hasattr(flow, 'run_console'):\n creds = flow.run_console()\n else:\n print(\"❌ AVISO CRÍTICO: O downgrade da biblioteca falhou.\")\n print(\" Tentando método alternativo via OOB...\")\n flow.redirect_uri = 'urn:ietf:wg:oauth:2.0:oob'\n auth_url, _ = flow.authorization_url(prompt='consent')\n print(f\" Acesse este URL: {auth_url}\")\n code = input(\" Cole o código de autorização aqui: \")\n flow.fetch_token(code=code)\n creds = flow.credentials\n\n service = build(API_SERVICE_NAME, API_VERSION, credentials=creds)\n print(\"\\n✅ Autenticação realizada com SUCESSO!\")\n \n try:\n results = service.files().list(q=\"name='ViralCutter_SmoothZoom' and mimeType='application/vnd.google-apps.folder' and trashed=false\", fields=\"files(id)\").execute()\n items = results.get('files', [])\n if items:\n folder_id = items[0]['id']\n print(f\"✅ Pasta 'ViralCutter_SmoothZoom' encontrada (ID: {folder_id})\")\n else:\n file_metadata = {'name': 'ViralCutter_SmoothZoom', 'mimeType': 'application/vnd.google-apps.folder'}\n folder = service.files().create(body=file_metadata, fields='id').execute()\n folder_id = folder.get('id')\n print(f\"✅ Pasta criada (ID: {folder_id})\")\n except Exception as e:\n print(f\"⚠️ Erro ao criar pasta: {e}\")\n except Exception as e:\n print(f\"❌ Erro no Login: {e}\")\n print(\" Nota: Se aparecer 'invalid_grant' ou 'redirect_uri_mismatch', suas credenciais podem não ser do tipo 'Desktop App'.\")\nelse:\n print(\"❌ 'client_secret.json' não encontrado. O upload será desativado.\")\n\n# ==============================================================================\n# 5. MONITOR DE UPLOAD OAUTH - APENAS ARQUIVOS COM LEGENDA (_subtitled)\n# ==============================================================================\ndef monitor_oauth():\n if not service or not folder_id: \n return\n \n print(\"\\n👀 Monitor OAuth Ativo: Uploads usarão SEU espaço.\")\n print(\"📌 APENAS arquivos COM LEGENDA (_subtitled) serão enviados!\")\n uploaded = set()\n \n while True:\n try:\n watch_path = '/kaggle/working/ViralCutter/VIRALS'\n if os.path.exists(watch_path):\n for r, d, f in os.walk(watch_path):\n for file in f:\n if not file.endswith('.mp4'):\n continue\n \n # Ignora arquivos temporários e de processamento\n ignore_patterns = [\n 'input.mp4', '_original_scale', '_temp', \n 'temp_', '.part', 'processing'\n ]\n \n if any(pattern in file.lower() for pattern in ignore_patterns):\n continue\n \n # ✅ APENAS faz upload de arquivos COM LEGENDA (_subtitled)\n if '_subtitled' not in file.lower():\n continue\n \n path = os.path.join(r, file)\n \n if path not in uploaded:\n try:\n if os.path.getsize(path) > 1e6:\n s1 = os.path.getsize(path)\n time.sleep(5)\n if not os.path.exists(path):\n continue\n if os.path.getsize(path) != s1:\n continue\n \n print(f\"\\n💎 Corte COM LEGENDA Detectado: {file}\")\n print(f\" 📁 Tamanho: {os.path.getsize(path) / 1e6:.2f} MB\")\n print(f\" ☁️ Enviando para Google Drive (Conta Pessoal)...\")\n \n try:\n meta = {'name': file, 'parents': [folder_id]}\n media = MediaFileUpload(path, resumable=True)\n service.files().create(body=meta, media_body=media).execute()\n uploaded.add(path)\n print(\" ✅ Upload Completo!\")\n except Exception as e_up:\n print(f\" ❌ Erro Upload: {e_up}\")\n time.sleep(5)\n except FileNotFoundError:\n continue\n \n time.sleep(10)\n except Exception as e:\n print(f\"⚠️ Erro no monitor: {e}\")\n time.sleep(10)\n\nthreading.Thread(target=monitor_oauth, daemon=True).start()\n\n# ==============================================================================\n# 6. INICIAR INTERFACE\n# ==============================================================================\nprint(\"\\n🚀 INICIANDO VIRALCUTTER...\")\nprint(\"⚡ T4 Otimizado: int8_float16, batch_size=8, large-v2\")\nprint(\"⚠️ CLIQUE NO LINK PÚBLICO (gradio.live) ABAIXO:\")\nprint(\"=\"*60)\n\nsubprocess.Popen(\n ['Xvfb', ':1', '-screen', '0', '2560x1440x8'],\n stdout=subprocess.DEVNULL,\n stderr=subprocess.DEVNULL\n)\ntime.sleep(2)\n\nos.environ['DISPLAY'] = ':1.0'\nos.environ['MPLBACKEND'] = 'Agg'\nos.environ['CUDA_VISIBLE_DEVICES'] = '0,1'\n\n!/kaggle/working/ViralCutter/.venv/bin/python webui/app.py --colab --face-model yolo", + "metadata": { + "_uuid": "cyclic-smooth-zoom-kaggle-t4", + "_cell_guid": "cyclic-smooth-zoom-kaggle-cell-t4", + "trusted": true, + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, + "outputs": [], + "execution_count": null + }, + { + "cell_type": "markdown", + "source": [ + "## 📝 Créditos\n", + "\n", + "**ViralCutter** por [Rafa.Godoy](https://github.com/rafaelGodoyEbert)\n", + "\n", + "**Cyclic Smooth Zoom + T4 Optimization:**\n", + "- [Ultralytics YOLOv8](https://github.com/ultralytics/ultralytics) - Face tracking\n", + "- WhisperX com int8 quantization - Transcrição otimizada\n", + "- EMA alpha=0.02 - Camera suave\n", + "\n", + "### ⚡ Configuração T4\n", + "```\n", + "WhisperX: large-v2 | int8_float16 | batch=8 | lang=pt\n", + "Zoom: 1.0x ──(3s)──► 1.4x ──(2s)──► SNAP ──(2s)──► repete\n", + "```\n", + "\n", + "---\n", + "`v0.9 Alpha + Cyclic Smooth Zoom + T4 Optimization`" + ], + "metadata": {} + } + ] +} \ No newline at end of file diff --git a/viralcutter-Kaggle.ipynb b/viralcutter-Kaggle.ipynb new file mode 100644 index 0000000..c470b6c --- /dev/null +++ b/viralcutter-Kaggle.ipynb @@ -0,0 +1,206 @@ +{ + "metadata": { + "kernelspec": { + "language": "python", + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.12.12", + "mimetype": "text/x-python", + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "pygments_lexer": "ipython3", + "nbconvert_exporter": "python", + "file_extension": ".py" + }, + "kaggle": { + "accelerator": "nvidiaTeslaT4", + "dataSources": [ + { + "sourceId": 14644893, + "sourceType": "datasetVersion", + "datasetId": 9355102 + }, + { + "sourceId": 14669723, + "sourceType": "datasetVersion", + "datasetId": 9371811 + }, + { + "sourceId": 14678475, + "sourceType": "datasetVersion", + "datasetId": 9377534 + }, + { + "sourceId": 14683135, + "sourceType": "datasetVersion", + "datasetId": 9380125 + }, + { + "sourceId": 14683138, + "sourceType": "datasetVersion", + "datasetId": 9380127 + } + ], + "dockerImageVersionId": 31260, + "isInternetEnabled": true, + "language": "python", + "sourceType": "notebook", + "isGpuEnabled": true + } + }, + "nbformat_minor": 4, + "nbformat": 4, + "cells": [ + { + "cell_type": "markdown", + "source": [ + "# 📦 Como Configurar os Datasets no Kaggle\n", + "\n", + "Este notebook requer alguns datasets com credenciais para funcionar corretamente. Siga os passos abaixo:\n", + "\n", + "## 1️⃣ client_secret.json (Obrigatório)\n", + "Credenciais OAuth do Google Cloud para autenticação e upload no Drive.\n", + "\n", + "**Como obter:**\n", + "1. Acesse [Google Cloud Console](https://console.cloud.google.com/)\n", + "2. Crie um novo projeto ou selecione um existente\n", + "3. Vá em **APIs & Services** → **Credentials**\n", + "4. Clique em **Create Credentials** → **OAuth 2.0 Client ID**\n", + "5. Escolha **Desktop App** como tipo de aplicação\n", + "6. Baixe o arquivo JSON\n", + "7. No Kaggle: **Add Data** → **Upload** → Faça upload do `client_secret.json`\n", + "8. Nomeie o dataset como `client-secret-json`\n", + "\n", + "## 2️⃣ cookie (Opcional)\n", + "Cookies do YouTube/Kaggle para download de vídeos privados/restritos.\n", + "\n", + "**Como obter:**\n", + "1. Instale a extensão **Get cookies.txt LOCAL** no Chrome/Edge\n", + " - [Chrome Web Store](https://chrome.google.com/webstore/detail/get-cookiestxt-locally/cclelndahbckbenkjhflpdbgdldlbecc)\n", + "2. Acesse [www.kaggle.com](https://www.kaggle.com) (logado)\n", + "3. Clique na extensão e em **Export** ou **Export As**\n", + "4. Salve o arquivo como `www.youtube.com_cookies.txt`\n", + "5. No Kaggle: **Add Data** → **Upload** → Faça upload do arquivo\n", + "6. Nomeie o dataset como `cookie`\n", + "\n", + "## 3️⃣ credenciais-google (Obrigatório)\n", + "API Key do Google Gemini para análise de vídeos com IA.\n", + "\n", + "**Como obter:**\n", + "1. Acesse [Google AI Studio](https://makersuite.google.com/app/apikey)\n", + "2. Clique em **Create API Key**\n", + "3. Copie a chave gerada\n", + "4. Crie um arquivo de texto `gemini_api.txt` e cole a chave\n", + "5. No Kaggle: **Add Data** → **Upload** → Faça upload do arquivo\n", + "6. Nomeie o dataset como `credenciais-google`\n", + "\n", + "## 4️⃣ google-drive-credentials (Gerado Automaticamente)\n", + "Este arquivo é gerado durante a autenticação OAuth na primeira execução.\n", + "\n", + "**Como usar:**\n", + "1. Na primeira execução, siga as instruções de autenticação\n", + "2. **Após a autenticação bem-sucedida**, o notebook gerará o arquivo `kaggle-viral-cutte-*.json`\n", + "3. Para reutilizar nas próximas execuções:\n", + " - Baixe o arquivo gerado em `/kaggle/working/`\n", + " - Crie um novo dataset no Kaggle com este arquivo\n", + " - Nomeie como `google-drive-credentials`\n", + "\n", + "---\n", + "\n", + "## ✅ Checklist Final\n", + "Antes de executar, verifique se você adicionou aos datasets:\n", + "- ✅ `client_secret.json` → dataset `client-secret-json`\n", + "- ⚠️ `www.youtube.com_cookies.txt` → dataset `cookie` (opcional)\n", + "- ✅ `gemini_api.txt` → dataset `credenciais-google`\n", + "- ℹ️ `kaggle-viral-cutte-*.json` → dataset `google-drive-credentials` (opcional, para reuso)\n", + "\n", + "## 🎬 Pronto para Começar!\n", + "Após configurar os datasets, execute a célula abaixo para iniciar o ViralCutter.\n", + "\n", + "---" + ], + "metadata": {} + }, + { + "cell_type": "code", + "source": "#@title 🚀 VIRALCUTTER: Instalação + Upload OAuth (Versão Final v3 + Faster-Whisper)\nimport os\nimport time\nimport threading\nimport json\nimport shutil\nimport subprocess\nimport sys\nfrom IPython.display import clear_output\n\n# ==============================================================================\n# 1. SETUP DE BIBLIOTECAS (FIX AUTH - DOWNGRADE FORCE)\n# ==============================================================================\nprint(\"1️⃣ Ajustando bibliotecas de autenticação...\")\ntry:\n subprocess.run(\n [sys.executable, \"-m\", \"pip\", \"install\", \"google-auth-oauthlib==0.4.6\", \"google-api-python-client\", \"--force-reinstall\", \"--no-deps\", \"-q\"],\n check=True\n )\n subprocess.run(\n [sys.executable, \"-m\", \"pip\", \"install\", \"google-auth>=1.0.0\", \"requests-oauthlib>=0.7.0\", \"-q\"], \n check=True\n )\nexcept Exception as e:\n print(f\"⚠️ Aviso na instalação de libs: {e}\")\n\nfrom google_auth_oauthlib.flow import InstalledAppFlow\nfrom googleapiclient.discovery import build\nfrom googleapiclient.http import MediaFileUpload\n\n# ==============================================================================\n# 2. INSTALAÇÃO DO SISTEMA (AUTO-REPAIR)\n# ==============================================================================\nVENV_PYTHON = '/kaggle/working/ViralCutter/.venv/bin/python'\n\nif not os.path.exists(VENV_PYTHON):\n print(\"\\n📦 Sistema não detectado. Instalando ViralCutter (3-5 min)...\")\n \n if os.path.exists(\"/kaggle/working/ViralCutter\"):\n shutil.rmtree(\"/kaggle/working/ViralCutter\")\n \n subprocess.run(\"git clone https://github.com/RafaelGodoyEbert/ViralCutter.git /kaggle/working/ViralCutter\", shell=True, check=True)\n os.chdir(\"/kaggle/working/ViralCutter\")\n \n print(\" - Instalando drivers...\")\n subprocess.run(\"pip install uv -q\", shell=True)\n subprocess.run(\"apt-get update -y -qq && apt-get install -y libcudnn8 ffmpeg xvfb -qq\", shell=True)\n \n print(\" - Criando ambiente virtual...\")\n subprocess.run([\"uv\", \"venv\", \".venv\"], check=True)\n \n print(\" - Baixando pacotes de IA...\")\n cmds = [\n \"uv pip install --python .venv faster-whisper\", # <--- ÚNICA MUDANÇA: Adiciona motor otimizado\n \"uv pip install --python .venv git+https://github.com/m-bain/whisperx.git\",\n \"uv pip install --python .venv -r requirements.txt\",\n \"uv pip install --python .venv -U --pre 'yt-dlp[default]'\",\n \"uv pip install --python .venv pytubefix google-generativeai pandas onnxruntime-gpu\",\n \"uv pip install --python .venv transformers==4.46.3 accelerate>=0.26.0\",\n \"uv pip install --python .venv torch==2.3.1+cu121 torchvision==0.18.1+cu121 torchaudio==2.3.1+cu121 --index-url https://download.pytorch.org/whl/cu121\",\n \"uv pip install --python .venv 'numpy<2.0' setuptools==69.5.1\",\n \"uv pip install --python .venv insightface onnxruntime-gpu\",\n \"uv pip uninstall --python .venv mediapipe protobuf flatbuffers\",\n \"uv pip install --python .venv 'mediapipe>=0.10.0' 'protobuf>=3.20,<5.0' 'flatbuffers>=2.0'\"\n ]\n \n for cmd in cmds:\n subprocess.run(cmd, shell=True, check=True)\n \n print(\"✅ Instalação Concluída!\")\n print(\" 💡 Faster-Whisper instalado: Agora você pode usar Large sem estourar memória!\")\nelse:\n print(\"\\n✅ Sistema já instalado. Pulando etapa.\")\n\n# ==============================================================================\n# 3. PATCH NO DOWNLOADER\n# ==============================================================================\nprint(\"\\n🔧 Aplicando patch no sistema de download...\")\ndownload_script_content = r'''\nimport yt_dlp\nimport os\nimport glob\nimport shutil\n\ndef download(url, download_subs=False, quality=\"best\", output_folder=\"VIRALS\"):\n print(f\"🚀 Iniciando download: {url}\")\n title_temp = \"video_temp\"\n try:\n with yt_dlp.YoutubeDL({'quiet': True}) as ydl:\n info = ydl.extract_info(url, download=False)\n if info:\n t = info.get('title', 'video_temp')\n title_temp = \"\".join([c for c in t if c.isalnum() or c in \" .-_\"]).strip()\n except: pass\n\n final_path = os.path.join(output_folder, title_temp)\n os.makedirs(final_path, exist_ok=True)\n output_template = f\"{final_path}/input.%(ext)s\"\n\n ydl_opts = {\n 'format': \"bestvideo+bestaudio/best\",\n 'outtmpl': output_template,\n 'noplaylist': True,\n 'writesubtitles': False,\n 'writeautomaticsub': False,\n 'extractor_args': {'youtube': {'player_client': ['android', 'web']}}, \n 'quiet': False,\n 'no_warnings': True,\n 'merge_output_format': 'mp4'\n }\n\n try:\n with yt_dlp.YoutubeDL(ydl_opts) as ydl:\n ydl.download([url])\n \n mp4_files = glob.glob(os.path.join(final_path, \"*.mp4\"))\n if mp4_files:\n found_file = mp4_files[0]\n print(f\"✅ Arquivo baixado: {found_file}\")\n expected_input = os.path.join(final_path, \"input.mp4\")\n if found_file != expected_input:\n shutil.move(found_file, expected_input)\n return expected_input\n return found_file\n raise FileNotFoundError(\"Arquivo MP4 não gerado\")\n except Exception as e:\n print(f\"❌ Falha crítica no Download: {e}\")\n raise e\n'''\nwith open('/kaggle/working/ViralCutter/scripts/download_video.py', 'w') as f:\n f.write(download_script_content)\n\n# ==============================================================================\n# 4. AUTENTICAÇÃO OAUTH (MANTIDO 100% ORIGINAL)\n# ==============================================================================\nCLIENT_SECRET_FILE = None\nSCOPES = ['https://www.googleapis.com/auth/drive.file']\nAPI_SERVICE_NAME = 'drive'\nAPI_VERSION = 'v3'\n\nprint(\"\\n🔑 Procurando 'client_secret.json'...\")\nfor root, dirs, files in os.walk('/kaggle/input'):\n for file in files:\n if 'client_secret' in file and file.endswith('.json'):\n CLIENT_SECRET_FILE = os.path.join(root, file)\n print(f\"✅ Credencial encontrada: {CLIENT_SECRET_FILE}\")\n break\n if CLIENT_SECRET_FILE: break\n\nservice = None\nfolder_id = None\n\nif CLIENT_SECRET_FILE:\n print(\"\\n🌐 INICIANDO LOGIN GOOGLE...\")\n print(\" ⚠️ Siga os passos abaixo com atenção:\")\n print(\" 1. Clique no link que aparecerá abaixo.\")\n print(\" 2. Faça login e autorize o acesso.\")\n print(\" 3. Copie o código gerado pelo Google.\")\n print(\" 4. COLE O CÓDIGO na caixa de entrada aqui no Kaggle e aperte Enter.\\n\")\n \n try:\n flow = InstalledAppFlow.from_client_secrets_file(CLIENT_SECRET_FILE, SCOPES)\n \n if hasattr(flow, 'run_console'):\n creds = flow.run_console()\n else:\n print(\"❌ AVISO CRÍTICO: O downgrade da biblioteca falhou.\")\n print(\" Tentando método alternativo via OOB...\")\n flow.redirect_uri = 'urn:ietf:wg:oauth:2.0:oob'\n auth_url, _ = flow.authorization_url(prompt='consent')\n print(f\" Acesse este URL: {auth_url}\")\n code = input(\" Cole o código de autorização aqui: \")\n flow.fetch_token(code=code)\n creds = flow.credentials\n\n service = build(API_SERVICE_NAME, API_VERSION, credentials=creds)\n print(\"\\n✅ Autenticação realizada com SUCESSO!\")\n \n try:\n results = service.files().list(q=\"name='ViralCutter_Videos' and mimeType='application/vnd.google-apps.folder' and trashed=false\", fields=\"files(id)\").execute()\n items = results.get('files', [])\n if items:\n folder_id = items[0]['id']\n print(f\"✅ Pasta 'ViralCutter_Videos' encontrada (ID: {folder_id})\")\n else:\n file_metadata = {'name': 'ViralCutter_Videos', 'mimeType': 'application/vnd.google-apps.folder'}\n folder = service.files().create(body=file_metadata, fields='id').execute()\n folder_id = folder.get('id')\n print(f\"✅ Pasta criada (ID: {folder_id})\")\n except Exception as e:\n print(f\"⚠️ Erro ao criar pasta: {e}\")\n except Exception as e:\n print(f\"❌ Erro no Login: {e}\")\n print(\" Nota: Se aparecer 'invalid_grant' ou 'redirect_uri_mismatch', suas credenciais podem não ser do tipo 'Desktop App'.\")\nelse:\n print(\"❌ 'client_secret.json' não encontrado. O upload será desativado.\")\n\n# ==============================================================================\n# 5. MONITOR DE UPLOAD OAUTH (MANTIDO 100% ORIGINAL)\n# ==============================================================================\ndef monitor_oauth():\n if not service or not folder_id: \n return\n \n print(\"\\n👀 Monitor OAuth Ativo: Uploads usarão SEU espaço.\")\n uploaded = set()\n \n while True:\n try:\n watch_path = '/kaggle/working/ViralCutter/VIRALS'\n if os.path.exists(watch_path):\n for r, d, f in os.walk(watch_path):\n for file in f:\n if not file.endswith('.mp4'):\n continue\n \n ignore_patterns = [\n 'input.mp4', '_original_scale', '_subtitled', '_temp', \n 'temp_', 'final_', '.part', 'processing'\n ]\n \n if any(pattern in file.lower() for pattern in ignore_patterns):\n continue\n \n path = os.path.join(r, file)\n \n if path not in uploaded and os.path.getsize(path) > 1e6:\n s1 = os.path.getsize(path)\n time.sleep(5)\n if os.path.getsize(path) != s1:\n continue\n \n print(f\"\\n💎 Novo Corte FINAL Detectado: {file}\")\n print(f\" 📁 Tamanho: {os.path.getsize(path) / 1e6:.2f} MB\")\n print(f\" ☁️ Enviando para Google Drive (Conta Pessoal)...\")\n \n try:\n meta = {'name': file, 'parents': [folder_id]}\n media = MediaFileUpload(path, resumable=True)\n service.files().create(body=meta, media_body=media).execute()\n uploaded.add(path)\n print(\" ✅ Upload Completo!\")\n except Exception as e_up:\n print(f\" ❌ Erro Upload: {e_up}\")\n time.sleep(5)\n \n time.sleep(10)\n except Exception as e:\n print(f\"⚠️ Erro no monitor: {e}\")\n time.sleep(10)\n\nthreading.Thread(target=monitor_oauth, daemon=True).start()\n\n# ==============================================================================\n# 6. INICIAR (MANTIDO 100% ORIGINAL)\n# ==============================================================================\nprint(\"\\n🚀 INICIANDO VIRALCUTTER...\")\nprint(\"⚠️ CLIQUE NO LINK PÚBLICO (gradio.live) ABAIXO:\")\nprint(\"=\"*60)\n\nsubprocess.Popen(\n ['Xvfb', ':1', '-screen', '0', '2560x1440x8'],\n stdout=subprocess.DEVNULL,\n stderr=subprocess.DEVNULL\n)\ntime.sleep(2)\n\nos.environ['DISPLAY'] = ':1.0'\nos.environ['MPLBACKEND'] = 'Agg'\nos.environ['CUDA_VISIBLE_DEVICES'] = '0,1'\n\n!/kaggle/working/ViralCutter/.venv/bin/python webui/app.py --colab", + "metadata": { + "_uuid": "14ab286a-9624-45d5-9bc5-e005704d6d30", + "_cell_guid": "1478c514-9c52-4c9b-98b3-b58856322d1c", + "trusted": true, + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "execution": { + "iopub.status.busy": "2026-02-05T12:40:17.810752Z", + "iopub.execute_input": "2026-02-05T12:40:17.811470Z", + "execution_failed": "2026-02-05T13:01:51.614Z" + } + }, + "outputs": [ + { + "name": "stdout", + "text": "1️⃣ Ajustando bibliotecas de autenticação...\n ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 14.5/14.5 MB 108.7 MB/s eta 0:00:00\n\n📦 Sistema não detectado. Instalando ViralCutter (3-5 min)...\n", + "output_type": "stream" + }, + { + "name": "stderr", + "text": "Cloning into '/kaggle/working/ViralCutter'...\n", + "output_type": "stream" + }, + { + "name": "stdout", + "text": " - Instalando drivers...\n ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 22.8/22.8 MB 84.6 MB/s eta 0:00:00\n", + "output_type": "stream" + }, + { + "name": "stderr", + "text": "W: Skipping acquire of configured file 'main/source/Sources' as repository 'https://r2u.stat.illinois.edu/ubuntu jammy InRelease' does not seem to provide it (sources.list entry misspelt?)\n", + "output_type": "stream" + }, + { + "name": "stdout", + "text": "Selecting previously unselected package libcudnn8.\n(Reading database ... 129073 files and directories currently installed.)\nPreparing to unpack .../libcudnn8_8.9.7.29-1+cuda12.2_amd64.deb ...\nUnpacking libcudnn8 (8.9.7.29-1+cuda12.2) ...\nSetting up libcudnn8 (8.9.7.29-1+cuda12.2) ...\n - Criando ambiente virtual...\n", + "output_type": "stream" + }, + { + "name": "stderr", + "text": "warning: The `--system` flag has no effect, `uv venv` always ignores virtual environments when finding a Python interpreter; did you mean `--no-managed-python`?\nUsing CPython 3.12.12 interpreter at: /usr/bin/python3\nCreating virtual environment at: .venv\nActivate with: source .venv/bin/activate\n", + "output_type": "stream" + }, + { + "name": "stdout", + "text": " - Baixando pacotes de IA...\n", + "output_type": "stream" + }, + { + "name": "stderr", + "text": "Resolved 30 packages in 261ms\nPrepared 18 packages in 1.58s\nwarning: Failed to hardlink files; falling back to full copy. This may lead to degraded performance.\n If the cache and target directories are on different filesystems, hardlinking may not be supported.\n If this is intentional, set `export UV_LINK_MODE=copy` or use `--link-mode=copy` to suppress this warning.\nInstalled 30 packages in 213ms\n + anyio==4.12.1\n + av==16.1.0\n + certifi==2026.1.4\n + click==8.3.1\n + coloredlogs==15.0.1\n + ctranslate2==4.7.1\n + faster-whisper==1.2.1\n + filelock==3.20.3\n + flatbuffers==25.12.19\n + fsspec==2026.1.0\n + h11==0.16.0\n + hf-xet==1.2.0\n + httpcore==1.0.9\n + httpx==0.28.1\n + huggingface-hub==1.4.0\n + humanfriendly==10.0\n + idna==3.11\n + mpmath==1.3.0\n + numpy==2.4.2\n + onnxruntime==1.23.2\n + packaging==26.0\n + protobuf==6.33.5\n + pyyaml==6.0.3\n + setuptools==80.10.2\n + shellingham==1.5.4\n + sympy==1.14.0\n + tokenizers==0.22.2\n + tqdm==4.67.3\n + typer-slim==0.21.1\n + typing-extensions==4.15.0\nResolved 117 packages in 4.83s\nPrepared 72 packages in 46.44s\nUninstalled 1 package in 4ms\nwarning: Failed to hardlink files; falling back to full copy. This may lead to degraded performance.\n If the cache and target directories are on different filesystems, hardlinking may not be supported.\n If this is intentional, set `export UV_LINK_MODE=copy` or use `--link-mode=copy` to suppress this warning.\nInstalled 93 packages in 10.65s\n + aiohappyeyeballs==2.6.1\n + aiohttp==3.13.3\n + aiosignal==1.4.0\n + alembic==1.18.3\n + antlr4-python3-runtime==4.9.3\n + asteroid-filterbanks==0.4.0\n + attrs==25.4.0\n + cffi==2.0.0\n + charset-normalizer==3.4.4\n + colorlog==6.10.1\n + contourpy==1.3.3\n + cycler==0.12.1\n + docopt==0.6.2\n + einops==0.8.2\n + fonttools==4.61.1\n + frozenlist==1.8.0\n + greenlet==3.3.1\n - huggingface-hub==1.4.0\n + huggingface-hub==0.36.1\n + hyperpyyaml==1.2.3\n + jinja2==3.1.6\n + joblib==1.5.3\n + julius==0.2.7\n + kiwisolver==1.4.9\n + lightning==2.6.1\n + lightning-utilities==0.15.2\n + mako==1.3.10\n + markdown-it-py==4.0.0\n + markupsafe==3.0.3\n + matplotlib==3.10.8\n + mdurl==0.1.2\n + multidict==6.7.1\n + networkx==3.6.1\n + nltk==3.9.2\n + nvidia-cublas-cu12==12.8.4.1\n + nvidia-cuda-cupti-cu12==12.8.90\n + nvidia-cuda-nvrtc-cu12==12.8.93\n + nvidia-cuda-runtime-cu12==12.8.90\n + nvidia-cudnn-cu12==9.10.2.21\n + nvidia-cufft-cu12==11.3.3.83\n + nvidia-cufile-cu12==1.13.1.3\n + nvidia-curand-cu12==10.3.9.90\n + nvidia-cusolver-cu12==11.7.3.90\n + nvidia-cusparse-cu12==12.5.8.93\n + nvidia-cusparselt-cu12==0.7.1\n + nvidia-nccl-cu12==2.27.3\n + nvidia-nvjitlink-cu12==12.8.93\n + nvidia-nvtx-cu12==12.8.90\n + omegaconf==2.3.0\n + optuna==4.7.0\n + pandas==3.0.0\n + pillow==12.1.0\n + primepy==1.3\n + propcache==0.4.1\n + pyannote-audio==3.4.0\n + pyannote-core==5.0.0\n + pyannote-database==5.1.3\n + pyannote-metrics==3.2.1\n + pyannote-pipeline==3.0.1\n + pycparser==3.0\n + pygments==2.19.2\n + pyparsing==3.3.2\n + python-dateutil==2.9.0.post0\n + pytorch-lightning==2.6.1\n + pytorch-metric-learning==2.9.0\n + regex==2026.1.15\n + requests==2.32.5\n + rich==14.3.2\n + ruamel-yaml==0.18.17\n + ruamel-yaml-clib==0.2.15\n + safetensors==0.7.0\n + scikit-learn==1.8.0\n + scipy==1.17.0\n + semver==3.0.4\n + sentencepiece==0.2.1\n + six==1.17.0\n + sortedcontainers==2.4.0\n + soundfile==0.13.1\n + speechbrain==1.0.3\n + sqlalchemy==2.0.46\n + tabulate==0.9.0\n + tensorboardx==2.6.4\n + threadpoolctl==3.6.0\n + torch==2.8.0+cu128\n + torch-audiomentations==0.12.0\n + torch-pitch-shift==1.2.5\n + torchaudio==2.8.0+cu128\n + torchmetrics==1.8.2\n + transformers==4.57.6\n + triton==3.4.0\n + typer==0.21.1\n + urllib3==2.6.3\n + whisperx==3.7.6 (from git+https://github.com/m-bain/whisperx.git@6ec4a020489d904c4f2cd1ed097674232d2692d4)\n + yarl==1.22.0\nResolved 243 packages in 29.96s\nPrepared 107 packages in 17.67s\nwarning: Failed to hardlink files; falling back to full copy. This may lead to degraded performance.\n If the cache and target directories are on different filesystems, hardlinking may not be supported.\n If this is intentional, set `export UV_LINK_MODE=copy` or use `--link-mode=copy` to suppress this warning.\nInstalled 122 packages in 6.51s\n + a2wsgi==1.10.10\n + absl-py==2.4.0\n + aiofile==3.9.0\n + aiofiles==24.1.0\n + aiohttp-socks==0.11.0\n + albucore==0.0.24\n + albumentations==2.0.8\n + annotated-doc==0.0.4\n + annotated-types==0.7.0\n + azure-ai-documentintelligence==1.0.2\n + azure-core==1.38.0\n + azure-identity==1.25.1\n + beautifulsoup4==4.14.3\n + blinker==1.9.0\n + brotli==1.2.0\n + browser-cookie3==0.20.1\n + caio==0.9.25\n + cairocffi==1.7.1\n + cairosvg==2.8.2\n + cloudscraper==1.2.71\n + cobble==0.1.4\n + cryptography==46.0.4\n + cssselect2==0.8.0\n + curl-cffi==0.14.0\n + cython==3.2.4\n + ddgs==9.10.0\n + deep-translator==1.11.4\n + defusedxml==0.7.1\n + deprecated==1.3.1\n + diskcache==5.6.3\n + distro==1.9.0\n + easydict==1.13\n + et-xmlfile==2.0.0\n + fake-useragent==2.2.0\n + fastapi==0.128.1\n + ffmpeg-python==0.2.0\n + ffmpy==1.0.0\n + flask==3.1.2\n + future==1.0.0\n + g4f==7.0.0\n + google-auth==2.48.0\n + google-genai==1.62.0\n + gradio==6.5.1\n + gradio-client==2.0.3\n + groovy==0.1.2\n + h2==4.3.0\n + hpack==4.1.0\n + hyperframe==6.1.0\n + imageio==2.37.2\n + insightface==0.7.3\n + isodate==0.7.2\n + itsdangerous==2.2.0\n + jeepney==0.9.0\n + lazy-loader==0.4\n + llama-cpp-python==0.3.16\n + lxml==6.0.2\n + lz4==4.4.5\n + magika==0.6.3\n + mammoth==1.11.0\n + markdownify==1.2.2\n + markitdown==0.1.4\n + mediapipe==0.10.32\n + ml-dtypes==0.5.4\n + msal==1.34.0\n + msal-extensions==1.3.1\n + mss==10.1.0\n + nest-asyncio2==1.7.1\n + nodriver==0.48.1\n + olefile==0.47\n + onnx==1.20.1\n + onnxruntime-gpu==1.23.2\n + opencv-contrib-python==4.13.0.92\n + opencv-python==4.13.0.92\n + opencv-python-headless==4.13.0.92\n + openpyxl==3.1.5\n + orjson==3.11.7\n + pdfminer-six==20260107\n + platformdirs==4.5.1\n + prettytable==3.17.0\n + primp==0.15.0\n + psutil==7.2.2\n + pyasn1==0.6.2\n + pyasn1-modules==0.4.2\n + pycryptodome==3.23.0\n + pycryptodomex==3.23.0\n + pydantic==2.12.5\n + pydantic-core==2.41.5\n + pydub==0.25.1\n + pyjwt==2.11.0\n + python-dotenv==1.2.1\n + python-multipart==0.0.22\n + python-pptx==1.0.2\n + python-socks==2.8.0\n + pytz==2025.2\n + requests-toolbelt==1.0.0\n + rsa==4.9.1\n + safehttpx==0.1.7\n + scikit-image==0.26.0\n + semantic-version==2.10.0\n + simsimd==6.5.12\n + sniffio==1.3.1\n + socksio==1.0.0\n + sounddevice==0.5.5\n + soupsieve==2.8.3\n + speechrecognition==3.14.5\n + starlette==0.50.0\n + stringzilla==4.6.0\n + tenacity==9.1.3\n + tifffile==2026.1.28\n + tinycss2==1.5.1\n + tomlkit==0.13.3\n + typing-inspection==0.4.2\n + uvicorn==0.40.0\n + wcwidth==0.5.3\n + webencodings==0.5.1\n + websockets==15.0.1\n + werkzeug==3.1.5\n + wrapt==2.1.1\n + xlrd==2.0.2\n + xlsxwriter==3.2.9\n + youtube-transcript-api==1.0.3\n + yt-dlp==2026.2.4\nResolved 11 packages in 106ms\nPrepared 4 packages in 211ms\nUninstalled 2 packages in 26ms\nwarning: Failed to hardlink files; falling back to full copy. This may lead to degraded performance.\n If the cache and target directories are on different filesystems, hardlinking may not be supported.\n If this is intentional, set `export UV_LINK_MODE=copy` or use `--link-mode=copy` to suppress this warning.\nInstalled 4 packages in 62ms\n + mutagen==1.47.0\n - websockets==15.0.1\n + websockets==16.0\n - yt-dlp==2026.2.4\n + yt-dlp==2026.2.4.233607.dev0\n + yt-dlp-ejs==0.4.0\nResolved 52 packages in 249ms\nPrepared 14 packages in 1.56s\nUninstalled 1 package in 2ms\nwarning: Failed to hardlink files; falling back to full copy. This may lead to degraded performance.\n If the cache and target directories are on different filesystems, hardlinking may not be supported.\n If this is intentional, set `export UV_LINK_MODE=copy` or use `--link-mode=copy` to suppress this warning.\nInstalled 14 packages in 430ms\n + google-ai-generativelanguage==0.6.15\n + google-api-core==2.29.0\n + google-api-python-client==2.189.0\n + google-auth-httplib2==0.3.0\n + google-generativeai==0.8.6\n + googleapis-common-protos==1.72.0\n + grpcio==1.76.0\n + grpcio-status==1.71.2\n + httplib2==0.31.2\n + nodejs-wheel-binaries==24.13.0\n + proto-plus==1.27.1\n - protobuf==6.33.5\n + protobuf==5.29.6\n + pytubefix==10.3.6\n + uritemplate==4.2.0\nResolved 42 packages in 75ms\nPrepared 3 packages in 435ms\nUninstalled 2 packages in 92ms\nwarning: Failed to hardlink files; falling back to full copy. This may lead to degraded performance.\n If the cache and target directories are on different filesystems, hardlinking may not be supported.\n If this is intentional, set `export UV_LINK_MODE=copy` or use `--link-mode=copy` to suppress this warning.\nInstalled 3 packages in 127ms\n + accelerate==1.12.0\n - tokenizers==0.22.2\n + tokenizers==0.20.3\n - transformers==4.57.6\n + transformers==4.46.3\nResolved 25 packages in 1.02s\nPrepared 14 packages in 24.71s\nUninstalled 13 packages in 609ms\nwarning: Failed to hardlink files; falling back to full copy. This may lead to degraded performance.\n If the cache and target directories are on different filesystems, hardlinking may not be supported.\n If this is intentional, set `export UV_LINK_MODE=copy` or use `--link-mode=copy` to suppress this warning.\nInstalled 14 packages in 4.13s\n - nvidia-cublas-cu12==12.8.4.1\n + nvidia-cublas-cu12==12.1.3.1\n - nvidia-cuda-cupti-cu12==12.8.90\n + nvidia-cuda-cupti-cu12==12.1.105\n - nvidia-cuda-nvrtc-cu12==12.8.93\n + nvidia-cuda-nvrtc-cu12==12.1.105\n - nvidia-cuda-runtime-cu12==12.8.90\n + nvidia-cuda-runtime-cu12==12.1.105\n - nvidia-cudnn-cu12==9.10.2.21\n + nvidia-cudnn-cu12==8.9.2.26\n - nvidia-cufft-cu12==11.3.3.83\n + nvidia-cufft-cu12==11.0.2.54\n - nvidia-curand-cu12==10.3.9.90\n + nvidia-curand-cu12==10.3.2.106\n - nvidia-cusolver-cu12==11.7.3.90\n + nvidia-cusolver-cu12==11.4.5.107\n - nvidia-cusparse-cu12==12.5.8.93\n + nvidia-cusparse-cu12==12.1.0.106\n - nvidia-nccl-cu12==2.27.3\n + nvidia-nccl-cu12==2.20.5\n - nvidia-nvtx-cu12==12.8.90\n + nvidia-nvtx-cu12==12.1.105\n - torch==2.8.0+cu128\n + torch==2.3.1+cu121\n - torchaudio==2.8.0+cu128\n + torchaudio==2.3.1+cu121\n + torchvision==0.18.1+cu121\nResolved 2 packages in 637ms\nPrepared 2 packages in 16.18s\nUninstalled 2 packages in 43ms\nwarning: Failed to hardlink files; falling back to full copy. This may lead to degraded performance.\n If the cache and target directories are on different filesystems, hardlinking may not be supported.\n If this is intentional, set `export UV_LINK_MODE=copy` or use `--link-mode=copy` to suppress this warning.\nInstalled 2 packages in 92ms\n - numpy==2.4.2\n + numpy==1.26.4\n - setuptools==80.10.2\n + setuptools==69.5.1\nResolved 51 packages in 262ms\nPrepared 1 package in 616ms\nUninstalled 1 package in 31ms\nwarning: Failed to hardlink files; falling back to full copy. This may lead to degraded performance.\n If the cache and target directories are on different filesystems, hardlinking may not be supported.\n If this is intentional, set `export UV_LINK_MODE=copy` or use `--link-mode=copy` to suppress this warning.\nInstalled 1 package in 95ms\n - opencv-python-headless==4.13.0.92\n + opencv-python-headless==4.11.0.86\nUninstalled 3 packages in 15ms\n - flatbuffers==25.12.19\n - mediapipe==0.10.32\n - protobuf==5.29.6\nResolved 19 packages in 83ms\nPrepared 2 packages in 917ms\nUninstalled 1 package in 36ms\nwarning: Failed to hardlink files; falling back to full copy. This may lead to degraded performance.\n If the cache and target directories are on different filesystems, hardlinking may not be supported.\n If this is intentional, set `export UV_LINK_MODE=copy` or use `--link-mode=copy` to suppress this warning.\nInstalled 4 packages in 302ms\n + flatbuffers==25.12.19\n + mediapipe==0.10.32\n - opencv-contrib-python==4.13.0.92\n + opencv-contrib-python==4.11.0.86\n + protobuf==4.25.8\n", + "output_type": "stream" + }, + { + "name": "stdout", + "text": "✅ Instalação Concluída!\n 💡 Faster-Whisper instalado: Agora você pode usar Large sem estourar memória!\n\n🔧 Aplicando patch no sistema de download...\n\n🔑 Procurando 'client_secret.json'...\n✅ Credencial encontrada: /kaggle/input/client-secret-json/client_secret.json\n\n🌐 INICIANDO LOGIN GOOGLE...\n ⚠️ Siga os passos abaixo com atenção:\n 1. Clique no link que aparecerá abaixo.\n 2. Faça login e autorize o acesso.\n 3. Copie o código gerado pelo Google.\n 4. COLE O CÓDIGO na caixa de entrada aqui no Kaggle e aperte Enter.\n\nPlease visit this URL to authorize this application: https://accounts.google.com/o/oauth2/auth?response_type=code&client_id=6470460908-46mrv6m55s52s5cuj7nioqm8t57s1n4l.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.file&state=Oqb7A4mqoRs7NyrOH0jH12QqsNFujR&prompt=consent&access_type=offline\n", + "output_type": "stream" + }, + { + "output_type": "stream", + "name": "stdin", + "text": "Enter the authorization code: 4/1ASc3gC1FY0jcZpMngnkjlzX6xvInHKlu_BgLYqgYMP0nb_nMOIdu6WVg2Ho\n" + }, + { + "name": "stdout", + "text": "\n✅ Autenticação realizada com SUCESSO!\n✅ Pasta 'ViralCutter_Videos' encontrada (ID: 1dkq555mhQkzDRe7DAN3v1mcZ6YqLt0cG)\n\n👀 Monitor OAuth Ativo: Uploads usarão SEU espaço.\n\n🚀 INICIANDO VIRALCUTTER...\n⚠️ CLIQUE NO LINK PÚBLICO (gradio.live) ABAIXO:\n============================================================\nRunning in Colab mode. Generating public link with Static Mounts...\nDEBUG: Registered static paths: ['/kaggle/working/ViralCutter/VIRALS', '/kaggle/working/ViralCutter', '/kaggle/working/ViralCutter', '.']\nDEBUG: Allowed paths for Gradio: ['/kaggle/working/ViralCutter/VIRALS', '/kaggle/working/ViralCutter', '/kaggle/working/ViralCutter', '.']\n* Running on local URL: http://127.0.0.1:7860\n* Running on public URL: https://e6e28ea40b0b9652dd.gradio.live\n\nThis share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)\nMounted /virals to /kaggle/working/ViralCutter/VIRALS\n\n💎 Novo Corte FINAL Detectado: 000_Da_Loja_Física_ao_Mercado_Livre_O_Segredo_para_Faturar_Dormi.mp4\n 📁 Tamanho: 44.38 MB\n ☁️ Enviando para Google Drive (Conta Pessoal)...\n ✅ Upload Completo!\n⚠️ Erro no monitor: [Errno 2] No such file or directory: '/kaggle/working/ViralCutter/VIRALS/Marco Guedes revela como faturou milhões com mercado livre/final/final-output001_processed.mp4'\n\n💎 Novo Corte FINAL Detectado: 001_O_Poder_ABSURDO_do_Vídeo_nas_Vendas_e_o_Futuro_do_Mercado_Li.mp4\n 📁 Tamanho: 44.77 MB\n ☁️ Enviando para Google Drive (Conta Pessoal)...\n ✅ Upload Completo!\n\n💎 Novo Corte FINAL Detectado: 002_Sem_Capital_para_Começar_no_Mercado_Livre_A_Solução_é_Mais_S.mp4\n 📁 Tamanho: 55.86 MB\n ☁️ Enviando para Google Drive (Conta Pessoal)...\n ✅ Upload Completo!\n", + "output_type": "stream" + } + ], + "execution_count": null + } + ] +} \ No newline at end of file diff --git a/webui/app.py b/webui/app.py index 6dd882c..f39681b 100644 --- a/webui/app.py +++ b/webui/app.py @@ -179,7 +179,7 @@ def apply_experimental_preset(preset_name): def run_viral_cutter(input_source, project_name, url, video_file, segments, viral, themes, min_duration, max_duration, model, ai_backend, api_key, ai_model_name, chunk_size, workflow, face_model, face_mode, face_detect_interval, no_face_mode, - face_filter_thresh, face_two_thresh, face_conf_thresh, face_dead_zone, focus_active_speaker, active_speaker_mar, active_speaker_score_diff, include_motion, active_speaker_motion_threshold, active_speaker_motion_sensitivity, active_speaker_decay, + tracking_alpha, face_filter_thresh, face_two_thresh, face_conf_thresh, face_dead_zone, focus_active_speaker, active_speaker_mar, active_speaker_score_diff, include_motion, active_speaker_motion_threshold, active_speaker_motion_sensitivity, active_speaker_decay, use_custom_subs, font_name, font_size, font_color, highlight_color, outline_color, outline_thickness, shadow_color, shadow_size, is_bold, is_italic, is_uppercase, vertical_pos, alignment, h_size, w_block, gap, mode, under, strike, border_s, remove_punc, video_quality, use_youtube_subs, translate_target): @@ -248,6 +248,7 @@ def run_viral_cutter(input_source, project_name, url, video_file, segments, vira workflow_map = {"Full": "1", "Cut Only": "2", "Subtitles Only": "3"} cmd.extend(["--workflow", workflow_map.get(workflow, "1")]) + print(f"[DEBUG] Using face_model: {face_model}") cmd.extend(["--face-model", face_model]) cmd.extend(["--face-mode", face_mode]) if face_detect_interval: cmd.extend(["--face-detect-interval", str(face_detect_interval)]) @@ -258,6 +259,7 @@ def run_viral_cutter(input_source, project_name, url, video_file, segments, vira if face_two_thresh is not None: cmd.extend(["--face-two-threshold", str(face_two_thresh)]) if face_conf_thresh is not None: cmd.extend(["--face-confidence-threshold", str(face_conf_thresh)]) if face_dead_zone is not None: cmd.extend(["--face-dead-zone", str(face_dead_zone)]) + if tracking_alpha is not None: cmd.extend(["--tracking-alpha", str(tracking_alpha)]) @@ -479,7 +481,7 @@ def refresh_local_models(): model_input = gr.Dropdown(["tiny", "small", "medium", "large", "large-v1", "large-v2", "large-v3", "turbo", "large-v3-turbo", "distil-large-v2", "distil-medium.en", "distil-small.en", "distil-large-v3"], label=i18n("Whisper Model"), value="large-v3-turbo") with gr.Row(): workflow_input = gr.Dropdown(choices=[(i18n("Full"), "Full"), (i18n("Cut Only"), "Cut Only"), (i18n("Subtitles Only"), "Subtitles Only")], label=i18n("Workflow"), value="Full") - face_model_input = gr.Dropdown(["insightface", "mediapipe"], label=i18n("Face Model"), value="insightface") + face_model_input = gr.Dropdown(["yolo", "insightface", "mediapipe"], label=i18n("Face Model"), value="yolo", info="YOLO = Smooth Zoom") with gr.Row(): face_mode_input = gr.Dropdown(choices=[(i18n("Auto"), "auto"), ("1", "1"), ("2", "2")], label=i18n("Face Mode"), value="auto") face_detect_interval_input = gr.Textbox(label=i18n("Face Det. Interval"), value="0.17,1.0") @@ -490,6 +492,15 @@ def refresh_local_models(): input_source.change(on_source_change, inputs=input_source, outputs=[url_input, project_selector, video_upload, workflow_input]) with gr.Accordion(i18n("Advanced Face Settings"), open=False): + # Tracking Smoothness Slider (YOLO only) + gr.Markdown(f"### {i18n('Camera Tracking')}") + tracking_alpha_input = gr.Slider( + label=i18n("Tracking Smoothness"), + minimum=0.01, maximum=0.15, value=0.05, step=0.01, + info=i18n("0.02 = Ultra Suave (lento) | 0.05 = Normal | 0.10 = Rápido") + ) + + gr.Markdown(f"### {i18n('Face Detection')}") face_preset_input = gr.Dropdown(choices=[(i18n(k), k) for k in FACE_PRESETS.keys()], label=i18n("Configuration Presets"), value="Default (Balanced)", interactive=True) with gr.Row(): face_filter_thresh_input = gr.Slider(label=i18n("Ignore Small Faces (0.0 - 1.0)"), minimum=0.0, maximum=1.0, value=0.35, step=0.05, info=i18n("Relative size to ignore background.")) @@ -629,7 +640,7 @@ def refresh_local_models(): input_source, project_selector, url_input, video_upload, segments_input, viral_input, themes_input, min_dur_input, max_dur_input, model_input, ai_backend_input, api_key_input, ai_model_input, chunk_size_input, workflow_input, face_model_input, face_mode_input, face_detect_interval_input, no_face_mode_input, - face_filter_thresh_input, face_two_thresh_input, face_conf_thresh_input, face_dead_zone_input, focus_active_speaker_input, + tracking_alpha_input, face_filter_thresh_input, face_two_thresh_input, face_conf_thresh_input, face_dead_zone_input, focus_active_speaker_input, active_speaker_mar_input, active_speaker_score_diff_input, include_motion_input, active_speaker_motion_threshold_input, active_speaker_motion_sensitivity_input, active_speaker_decay_input, use_custom_subs, # Expanded Manual Inputs mapping @@ -838,6 +849,7 @@ def on_select_project(proj_name): return library.generate_project_gallery(proj_n parser = argparse.ArgumentParser() parser.add_argument("--colab", action="store_true", help="Run in Google Colab mode") + parser.add_argument("--face-model", default="insightface", help="Default face model (env var VIRALCUTTER_FACE_MODEL takes precedence for UI default)") args = parser.parse_args() if args.colab: