diff --git a/.claude/settings.local.json b/.claude/settings.local.json new file mode 100644 index 0000000..f584e74 --- /dev/null +++ b/.claude/settings.local.json @@ -0,0 +1,14 @@ +{ + "permissions": { + "allow": [ + "Bash(npm install)", + "Bash(npm install:*)", + "Bash(if exist node_modules rmdir /s /q node_modules)", + "Bash(if exist package-lock.json del package-lock.json)", + "Bash(powershell:*)", + "Read(//c/Users/qc_de/simple-whisper-transcription/**)" + ], + "deny": [], + "ask": [] + } +} \ No newline at end of file diff --git a/.gitignore b/.gitignore index 28a60e1..81ef93a 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,5 @@ node_modules -.cursor \ No newline at end of file +.cursor +*.onnx +whisper/models +models \ No newline at end of file diff --git a/WHISPER_INTEGRATION.md b/WHISPER_INTEGRATION.md new file mode 100644 index 0000000..6d61e15 --- /dev/null +++ b/WHISPER_INTEGRATION.md @@ -0,0 +1,141 @@ +# Whisper AI Integration for ScrumAI + +This integration adds real-time speech transcription to the ScrumAI application using OpenAI's Whisper model. + +## Features + +- **Real-time Speech Transcription**: Converts speech to text in real-time during meetings +- **Live Display**: Transcripts appear instantly in the "Transcript" tab +- **Automatic Saving**: When a meeting ends, transcripts are automatically saved as `meetingnotes_[timestamp].txt` to your home directory +- **Keyword Extraction**: Automatically extracts keywords from the transcript for the Keywords tab +- **Cross-platform**: Works on Windows, macOS, and Linux + +## Setup Instructions + +### 1. Install Python Dependencies + +Run the setup script to create a Python virtual environment and install dependencies: + +```bash +setup_whisper.bat +``` + +This will: +- Create a Python virtual environment (`whisper_env`) +- Install required packages (numpy, sounddevice, onnxruntime, PyYAML) +- Set up the Whisper models + +### 2. Verify Installation + +The application will automatically detect if the required files are present: +- Python executable in `whisper_env` or system PATH +- Whisper model files in `whisper/models/` +- Configuration file at `whisper/config.yaml` + +### 3. Run the Application + +Start the ScrumAI application as usual: + +```bash +npm start +``` + +## How to Use + +1. **Start Meeting**: Click the "Start Meeting" button + - This will initialize the Whisper transcription service + - A microphone permission dialog may appear - grant permission + - You'll see status messages in the console + +2. **Begin Speaking**: Start talking normally + - Real-time transcripts will appear in the "Transcript" tab + - Keywords will be automatically extracted and shown in the "Keywords" tab + - Timestamps are added to each transcript entry + +3. **Stop Meeting**: Click the "Stop Meeting" button + - This stops the transcription + - Automatically saves the full transcript as `meetingnotes_[timestamp].txt` in your home directory + - Shows a confirmation dialog with the saved file location + +## File Structure + +``` +scrumAI/ +├── whisper/ +│ ├── transcriber_for_nodejs.py # Main transcription script +│ ├── standalone_model.py # Whisper model wrapper +│ ├── standalone_whisper.py # Whisper implementation +│ ├── config.yaml # Configuration +│ ├── mel_filters.npz # Mel filter coefficients +│ ├── requirements_minimal.txt # Python dependencies +│ └── models/ +│ ├── WhisperEncoder.onnx # Encoder model +│ └── WhisperDecoder.onnx # Decoder model +├── src/ +│ ├── services/ +│ │ └── whisperService.js # Node.js Whisper service wrapper +│ └── electron/ +│ ├── main.js # Updated with Whisper integration +│ └── preload.js # Updated with IPC methods +├── whisper_env/ # Python virtual environment +└── setup_whisper.bat # Setup script +``` + +## Configuration + +The Whisper service can be configured by editing `whisper/config.yaml`: + +```yaml +# Audio settings +sample_rate: 16000 # Audio sample rate in Hz +chunk_duration: 4 # Duration of each audio chunk in seconds +channels: 1 # Number of audio channels (1 for mono) + +# Processing settings +max_workers: 4 # Number of parallel transcription workers +silence_threshold: 0.001 # Threshold for silence detection +queue_timeout: 1.0 # Timeout for audio queue operations + +# Model paths +encoder_path: "whisper/models/WhisperEncoder.onnx" +decoder_path: "whisper/models/WhisperDecoder.onnx" +``` + +## Troubleshooting + +### Common Issues + +1. **"Python not found"** + - Ensure Python 3.8+ is installed + - Run `setup_whisper.bat` to create virtual environment + +2. **"Model files not found"** + - Ensure the Whisper ONNX models are in `whisper/models/` + - Check that `WhisperEncoder.onnx` and `WhisperDecoder.onnx` exist + +3. **"Microphone access denied"** + - Grant microphone permissions to the application + - Check your operating system's privacy settings + +4. **No transcript appearing** + - Check the console for error messages + - Ensure you're speaking loud enough (above silence threshold) + - Verify the microphone is working in other applications + +### Performance Tips + +- For better performance on lower-end hardware, reduce `max_workers` in config.yaml +- Increase `silence_threshold` if picking up too much background noise +- Decrease `chunk_duration` for more responsive transcription (but higher CPU usage) + +## Technical Details + +The integration works by: + +1. **Electron Main Process** spawns a Python child process running the Whisper transcriber +2. **Python Process** captures audio from the microphone and processes it through the Whisper model +3. **IPC Communication** sends transcript data back to the Electron app via JSON over stdout +4. **Renderer Process** receives transcript events and updates the UI in real-time +5. **File I/O** saves the complete transcript when the meeting ends + +The system is designed to be resilient and will gracefully handle errors like microphone access issues or model loading problems. \ No newline at end of file diff --git a/meetingnotes_2025-09-14T08-44-59-295Z.txt b/meetingnotes_2025-09-14T08-44-59-295Z.txt new file mode 100644 index 0000000..5898f22 --- /dev/null +++ b/meetingnotes_2025-09-14T08-44-59-295Z.txt @@ -0,0 +1,4 @@ +Meeting Session: meeting_20250914_084458 +Started: 2025-09-14 08:44:59 +============================================================ + diff --git a/meetingnotes_2025-09-14T08-47-47-141Z.txt b/meetingnotes_2025-09-14T08-47-47-141Z.txt new file mode 100644 index 0000000..8581e83 --- /dev/null +++ b/meetingnotes_2025-09-14T08-47-47-141Z.txt @@ -0,0 +1,4 @@ +Meeting Session: meeting_20250914_084746 +Started: 2025-09-14 08:47:47 +============================================================ + diff --git a/meetingnotes_2025-09-14T08-49-08-891Z.txt b/meetingnotes_2025-09-14T08-49-08-891Z.txt new file mode 100644 index 0000000..e070d7c --- /dev/null +++ b/meetingnotes_2025-09-14T08-49-08-891Z.txt @@ -0,0 +1,4 @@ +Meeting Session: meeting_20250914_084908 +Started: 2025-09-14 08:49:08 +============================================================ + diff --git a/meetingnotes_2025-09-14T08-49-42-449Z.txt b/meetingnotes_2025-09-14T08-49-42-449Z.txt new file mode 100644 index 0000000..9bc79e9 --- /dev/null +++ b/meetingnotes_2025-09-14T08-49-42-449Z.txt @@ -0,0 +1,4 @@ +Meeting Session: meeting_20250914_084941 +Started: 2025-09-14 08:49:42 +============================================================ + diff --git a/meetingnotes_2025-09-14T08-50-09-832Z.txt b/meetingnotes_2025-09-14T08-50-09-832Z.txt new file mode 100644 index 0000000..07ec9bd --- /dev/null +++ b/meetingnotes_2025-09-14T08-50-09-832Z.txt @@ -0,0 +1,28 @@ +Meeting Session: meeting_20250914_085009 +Started: 2025-09-14 08:50:09 +============================================================ + +[08:50:15]: , please. Hello, my name is Sean. +[08:50:19]: , I'm a Coron's two-roomed. +[08:50:26]: , finally why is it? why is it? why is it? why is it? why is it? why is it? why is it? why is it? why is it? why is it? why is it? why is it? why is it? why is it? why is it? why is it? why is it? why is it? why is it? why is it? why is it? why is it? why is it? why is it? why is it? why is it? why is it? why is it? why is it? why is it? why is it? why is it? why is it? why is it? why is it? why is it? why is it? why is it? why is it? why is it? why is it? why is it? why is it? why is it? why is it? why is it? why is it? why is it? why is it? why is it? why is it? why is it? why is it? why is it? why is it? why is +[08:50:27]: , yeah, can you talk to me more about. +[08:50:31]: , but NYU Car Art, you know, I don't know anything much. +[08:50:35]: , thank you for your time. +[08:50:39]: the other one. +[08:50:43]: , please. +[08:50:47]: , you. +[08:50:51]: , it's not doing this, can you do one thing? +[08:50:55]: , you do this. Your data is getting... +[08:50:59]: , but I think that's what I want to look at. +[08:51:03]: , he's basically ever submitted in like now. He's not that good. +[08:51:07]: , you know we signed by just a minute. +[08:51:11]: the problem. Nobody +[08:51:15]: , and then we will remove the Ds store, all that stuff and then put it in like properly, you'll put it in. +[08:51:19]: , I'll keep the... +[08:51:23]: , and then make it like the first thing called the White Cash, all the random things. +[08:51:27]: , and ex-piles it. +[08:51:31]: , but it will come and emerge this time. +[08:51:35]: , but it's not the same. +[08:51:39]: , you can more job. Keep that way. Yeah. +[08:51:43]: , and we'll see you next time. +[08:51:47]: , okay. diff --git a/meetingnotes_2025-09-14T09-04-00-968Z.txt b/meetingnotes_2025-09-14T09-04-00-968Z.txt new file mode 100644 index 0000000..cba0588 --- /dev/null +++ b/meetingnotes_2025-09-14T09-04-00-968Z.txt @@ -0,0 +1,31 @@ +Meeting Session: meeting_20250914_090400 +Started: 2025-09-14 09:04:00 +============================================================ + +[09:04:08]: , this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this +[09:04:10]: , but it doesn't +[09:04:14]: , it is. +[09:04:18]: , thanks a lot. +[09:04:27]: , I'm not a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. +[09:04:27]: , with 26 letters and 10 numbers. +[09:04:30]: , but I also forgot to consider other characters. +[09:04:34]: the animation. +[09:04:38]: , like, at the eight hashtag dollar, or same sign. +[09:04:48]: the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character +[09:04:49]: , and I'm a bad guy here for two. +[09:04:50]: , and I'm so happy to be here. +[09:04:54]: , oh my god, this is such a terrible animal. +[09:04:59]: , okay. Do you want me to keep talking? Yes, yes. +[09:05:03]: , so let's keep talking. Let's talk about something go ahead and like which it can understand in like... +[09:05:06]: , yeah. Okay. Yeah. Um, +[09:05:10]: , usually is an AI powered event discovery social media platform. +[09:05:14]: , that addresses the loneliness epidemic. +[09:05:18]: , where 103 people from our generation suffer. +[09:05:22]: , where from chronic loneliness and we believe that the best +[09:05:26]: , and I think that's why I'm here. +[09:05:30]: the same thing. +[09:05:34]: , and when you register for the event. +[09:05:38]: , for an event, you're a match of what's so good. We then... +[09:05:42]: , so they can host a data. +[09:05:46]: , so that way we have an equal system. +[09:05:50]: , and businesses with the B2B and the B2B. diff --git a/meetingnotes_2025-09-14T11-34-49-983Z.txt b/meetingnotes_2025-09-14T11-34-49-983Z.txt new file mode 100644 index 0000000..a0b4029 --- /dev/null +++ b/meetingnotes_2025-09-14T11-34-49-983Z.txt @@ -0,0 +1,7 @@ +[04:34:02] , hello, hello, hello, hello. +[04:34:06] , what is happening. +[04:34:14] , hello. +[04:34:18] , where am I is this New York? +[04:34:24] the other side. What are you guys doing? What are you doing? What are you doing? What are you doing? What are you doing? What are you doing? What are you doing? What are you doing? What are you doing? What are you doing? What are you doing? What are you doing? What are you doing? What are you doing? What are you doing? What are you doing? What are you doing? What are you doing? What are you doing? What are you doing? What are you doing? What are you doing? What are you doing? What are you doing? What are you doing? What are you doing? What are you doing? What are you doing? What are you doing? What are you doing? What are you doing? What are you doing? What are you doing? What are you doing? What are you doing? What are you doing? What are you doing? What are you doing? What are you doing? What are you doing? What are you doing? What are you doing? What are you doing? What are you doing +[04:34:26] , where are you sleeping so much wake up please? +[04:34:34] , how is it good morning? diff --git a/meetingnotes_2025-09-14T12-29-57-555Z.txt b/meetingnotes_2025-09-14T12-29-57-555Z.txt new file mode 100644 index 0000000..f2eded2 --- /dev/null +++ b/meetingnotes_2025-09-14T12-29-57-555Z.txt @@ -0,0 +1,2 @@ +[05:29:41] , hello, it's Al, how are you guys doing? +[05:29:49] , what is up? Is this New York? diff --git a/meetingnotes_2025-09-14T12-31-12-563Z.txt b/meetingnotes_2025-09-14T12-31-12-563Z.txt new file mode 100644 index 0000000..c594ac9 --- /dev/null +++ b/meetingnotes_2025-09-14T12-31-12-563Z.txt @@ -0,0 +1,4 @@ +[05:30:58] , hi hi hello this is new y'all +[05:31:02] , hello hello and see you. +[05:31:06] , and keep coming, and then keep coming. +[05:31:10] , keep coming in all that. diff --git a/meetingnotes_2025-09-14T13-26-56-089Z.txt b/meetingnotes_2025-09-14T13-26-56-089Z.txt new file mode 100644 index 0000000..2645850 --- /dev/null +++ b/meetingnotes_2025-09-14T13-26-56-089Z.txt @@ -0,0 +1,7 @@ +[06:26:29] , all it is okay with it. +[06:26:33] , all it is okay with that. +[06:26:37] , with it. Yeah, yeah. Oh. +[06:26:41] , what is this guy? I listen to it. +[06:26:45] , what was this? +[06:26:49] the person was done. +[06:26:53] , we should be very smart, you're not full. diff --git a/meetingnotes_2025-09-14T15-01-14-612Z.txt b/meetingnotes_2025-09-14T15-01-14-612Z.txt new file mode 100644 index 0000000..053f936 --- /dev/null +++ b/meetingnotes_2025-09-14T15-01-14-612Z.txt @@ -0,0 +1,9 @@ +[08:00:29] , hello, nor constant. +[08:00:33] , hello, hi nice. +[08:00:37] , okay thank you. I broke that out. The keywords are the +[08:00:41] , is broken. I would be ignored. +[08:00:45] , I am the best. +[08:00:49] , it's a message. +[08:00:53] , yeah. +[08:00:57] , oh we have grown one second so in the other end. +[08:01:12] , it's generally not only after it's done, it's no pressure to do it, it's a buffer. diff --git a/meetingnotes_2025-09-14T15-04-27-507Z.txt b/meetingnotes_2025-09-14T15-04-27-507Z.txt new file mode 100644 index 0000000..2648617 --- /dev/null +++ b/meetingnotes_2025-09-14T15-04-27-507Z.txt @@ -0,0 +1,6 @@ +[08:04:04] , and I'm going to work on this. +[08:04:08] , what do you think about that? +[08:04:13] , I'll go on his voice, but I'm going to work on this. +[08:04:16] the same thing. +[08:04:21] , and then we will get the best of you. And then, yeah, I'll tell you something more. +[08:04:24] the dot net devocable is going to be taking this time you know. diff --git a/meetingnotes_2025-09-14T16-05-55-698Z.txt b/meetingnotes_2025-09-14T16-05-55-698Z.txt new file mode 100644 index 0000000..6ca5dff --- /dev/null +++ b/meetingnotes_2025-09-14T16-05-55-698Z.txt @@ -0,0 +1,27 @@ +[09:04:08] , this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this +[09:04:10] , but it doesn't +[09:04:14] , it is. +[09:04:18] , thanks a lot. +[09:04:27] , I'm not a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. +[09:04:27] , with 26 letters and 10 numbers. +[09:04:30] , but I also forgot to consider other characters. +[09:04:34] the animation. +[09:04:38] , like, at the eight hashtag dollar, or same sign. +[09:04:48] the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character +[09:04:49] , and I'm a bad guy here for two. +[09:04:50] , and I'm so happy to be here. +[09:04:54] , oh my god, this is such a terrible animal. +[09:04:59] , okay. Do you want me to keep talking? Yes, yes. +[09:05:03] , so let's keep talking. Let's talk about something go ahead and like which it can understand in like... +[09:05:06] , yeah. Okay. Yeah. Um, +[09:05:10] , usually is an AI powered event discovery social media platform. +[09:05:14] , that addresses the loneliness epidemic. +[09:05:18] , where 103 people from our generation suffer. +[09:05:22] , where from chronic loneliness and we believe that the best +[09:05:26] , and I think that's why I'm here. +[09:05:30] the same thing. +[09:05:34] , and when you register for the event. +[09:05:38] , for an event, you're a match of what's so good. We then... +[09:05:42] , so they can host a data. +[09:05:46] , so that way we have an equal system. +[09:05:50] , and businesses with the B2B and the B2B. diff --git a/package-lock.json b/package-lock.json index 1f5a2e6..54bfe77 100644 --- a/package-lock.json +++ b/package-lock.json @@ -14,7 +14,6 @@ "node-record-lpcm16": "^1.0.1", "socket.io": "^4.7.2", "socket.io-client": "^4.7.2", - "speaker": "^0.5.4", "wav": "^1.0.2", "ws": "^8.14.2" }, @@ -1975,12 +1974,12 @@ "license": "MIT" }, "node_modules/@types/node": { - "version": "24.3.3", - "resolved": "https://registry.npmjs.org/@types/node/-/node-24.3.3.tgz", - "integrity": "sha512-GKBNHjoNw3Kra1Qg5UXttsY5kiWMEfoHq2TmXb+b1rcm6N7B3wTrFYIf/oSZ1xNQ+hVVijgLkiDZh7jRRsh+Gw==", + "version": "24.4.0", + "resolved": "https://registry.npmjs.org/@types/node/-/node-24.4.0.tgz", + "integrity": "sha512-gUuVEAK4/u6F9wRLznPUU4WGUacSEBDPoC2TrBkw3GAnOLHBL45QdfHOXp1kJ4ypBGLxTOB+t7NJLpKoC3gznQ==", "license": "MIT", "dependencies": { - "undici-types": "~7.10.0" + "undici-types": "~7.11.0" } }, "node_modules/@types/phoenix": { @@ -2638,15 +2637,6 @@ "baseline-browser-mapping": "dist/cli.js" } }, - "node_modules/bindings": { - "version": "1.5.0", - "resolved": "https://registry.npmjs.org/bindings/-/bindings-1.5.0.tgz", - "integrity": "sha512-p2q/t/mhvuOj/UeLlV6566GD/guowlr0hHxClI0W9m7MWYkL1F0hLo+0Aexs9HSPCtR1SXQ0TD3MMKrXZajbiQ==", - "license": "MIT", - "dependencies": { - "file-uri-to-path": "1.0.0" - } - }, "node_modules/bl": { "version": "4.1.0", "resolved": "https://registry.npmjs.org/bl/-/bl-4.1.0.tgz", @@ -3372,6 +3362,7 @@ "version": "4.4.3", "resolved": "https://registry.npmjs.org/debug/-/debug-4.4.3.tgz", "integrity": "sha512-RGwwWnwQvkVfavKVt22FGLw+xYSdzARwm0ru6DhTVA3umU5hZc28V3kO4stgYryrTlLpuvgI9GiijltAjNbcqA==", + "dev": true, "license": "MIT", "dependencies": { "ms": "^2.1.3" @@ -4538,12 +4529,6 @@ "node": "^10.12.0 || >=12.0.0" } }, - "node_modules/file-uri-to-path": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/file-uri-to-path/-/file-uri-to-path-1.0.0.tgz", - "integrity": "sha512-0Zt+s3L7Vf1biwWZ29aARiVYLx7iMGnEUl9x33fbB/j3jR81u/O2LbqK+Bm1CDSNDKVtJ/YjwY7TUd5SkeLQLw==", - "license": "MIT" - }, "node_modules/filelist": { "version": "1.0.4", "resolved": "https://registry.npmjs.org/filelist/-/filelist-1.0.4.tgz", @@ -7826,21 +7811,6 @@ "source-map": "^0.6.0" } }, - "node_modules/speaker": { - "version": "0.5.5", - "resolved": "https://registry.npmjs.org/speaker/-/speaker-0.5.5.tgz", - "integrity": "sha512-IBeMZUITigYBO139h0+1MAgBHNZF55GFJN4U/Box35Sg49cfqYkbCO92TXoCUy22Ast08zfqKuXLvPxq9CWwLw==", - "hasInstallScript": true, - "license": "(MIT AND LGPL-2.1-only)", - "dependencies": { - "bindings": "^1.3.0", - "buffer-alloc": "^1.1.0", - "debug": "^4.0.0" - }, - "engines": { - "node": ">=8.6" - } - }, "node_modules/sprintf-js": { "version": "1.1.3", "resolved": "https://registry.npmjs.org/sprintf-js/-/sprintf-js-1.1.3.tgz", @@ -8299,9 +8269,9 @@ } }, "node_modules/undici-types": { - "version": "7.10.0", - "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.10.0.tgz", - "integrity": "sha512-t5Fy/nfn+14LuOc2KNYg75vZqClpAiqscVvMygNnlsHBFpSXdJaYtXMcdNLpl/Qvc3P2cB3s6lOV51nqsFq4ag==", + "version": "7.11.0", + "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.11.0.tgz", + "integrity": "sha512-kt1ZriHTi7MU+Z/r9DOdAI3ONdaR3M3csEaRc6ewa4f4dTvX4cQCbJ4NkEn0ohE4hHtq85+PhPSTY+pO/1PwgA==", "license": "MIT" }, "node_modules/universalify": { diff --git a/setup_whisper.bat b/setup_whisper.bat new file mode 100644 index 0000000..497d99b --- /dev/null +++ b/setup_whisper.bat @@ -0,0 +1,35 @@ +@echo off +echo Setting up Whisper AI transcription for ScrumAI... + +REM Check if Python is installed +python --version >nul 2>&1 +if %errorlevel% neq 0 ( + echo Python is not installed or not in PATH + echo Please install Python 3.8+ and try again + pause + exit /b 1 +) + +echo Python found! + +REM Create virtual environment for Whisper +echo Creating Python virtual environment... +python -m venv whisper_env + +REM Activate virtual environment +echo Activating virtual environment... +call whisper_env\Scripts\activate.bat + +REM Install minimal requirements +echo Installing Python dependencies... +pip install -r whisper\requirements_minimal.txt + +echo. +echo Whisper setup complete! +echo. +echo To test the integration: +echo 1. Run 'npm start' to start the ScrumAI application +echo 2. Click 'Start Meeting' to begin transcription +echo 3. Speak into your microphone and watch the transcript appear +echo. +pause \ No newline at end of file diff --git a/src/electron/main.js b/src/electron/main.js index e043eda..c1916c5 100644 --- a/src/electron/main.js +++ b/src/electron/main.js @@ -12,10 +12,16 @@ const { app, BrowserWindow, ipcMain } = require('electron'); const path = require('path'); const https = require('https'); +const WhisperService = require('../services/whisperService'); +const ChatbotService = require('../services/chatbotService'); // Keep a global reference of the window object let mainWindow; +// Service instances +let whisperService; +let chatbotService; + // GitHub Integration Configuration const GITHUB_CONFIG = { token: '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', @@ -64,15 +70,189 @@ function createWindow() { * IPC Event Handlers */ +// Initialize Whisper service +function initializeWhisperService() { + whisperService = new WhisperService(); + + // Set up transcript callback to send to renderer + whisperService.onTranscript((data) => { + if (mainWindow && !mainWindow.isDestroyed()) { + mainWindow.webContents.send('whisper-transcript', data); + } + + // Update chatbot with latest transcript file if available + if (chatbotService && data.transcriptFile) { + chatbotService.setLiveTranscriptFile(data.transcriptFile); + } + }); + + // Set up error callback + whisperService.onError((error) => { + console.error('Whisper error:', error); + if (mainWindow && !mainWindow.isDestroyed()) { + mainWindow.webContents.send('whisper-error', error); + } + }); + + // Set up status callback + whisperService.onStatus((status) => { + console.log('Whisper status:', status.message); + if (mainWindow && !mainWindow.isDestroyed()) { + mainWindow.webContents.send('whisper-status', status); + } + }); +} + // Handle meeting start/stop events ipcMain.handle('start-meeting', async (event, meetingData) => { console.log('Starting meeting:', meetingData); - return { success: true, meetingId: Date.now() }; + + try { + if (!whisperService) { + initializeWhisperService(); + } + + await whisperService.start(); + console.log('Whisper transcription started'); + + return { success: true, meetingId: Date.now() }; + } catch (error) { + console.error('Failed to start Whisper transcription:', error); + return { success: false, error: error.message }; + } }); ipcMain.handle('stop-meeting', async (event) => { console.log('Stopping meeting'); - return { success: true }; + + try { + if (whisperService && whisperService.isServiceRunning()) { + await whisperService.stop(); + console.log('Whisper transcription stopped'); + } + + return { success: true }; + } catch (error) { + console.error('Failed to stop Whisper transcription:', error); + return { success: false, error: error.message }; + } +}); + +// Chatbot Service IPC Handlers +ipcMain.handle('initialize-chatbot', async (event) => { + console.log('Initializing chatbot service'); + + try { + if (!chatbotService) { + chatbotService = new ChatbotService(); + + // Set up chatbot event handlers + chatbotService.setOnResponseCallback((response, isStreaming) => { + if (mainWindow) { + mainWindow.webContents.send('chatbot-response', { response, isStreaming }); + } + }); + + chatbotService.setOnErrorCallback((error) => { + if (mainWindow) { + mainWindow.webContents.send('chatbot-error', error.message); + } + }); + + chatbotService.setOnStatusCallback((status) => { + if (mainWindow) { + mainWindow.webContents.send('chatbot-status', status); + } + }); + } + + return { success: true }; + } catch (error) { + console.error('Failed to initialize chatbot service:', error); + return { success: false, error: error.message }; + } +}); + +ipcMain.handle('start-chatbot', async (event) => { + console.log('Starting chatbot service'); + + try { + if (!chatbotService) { + return { success: false, error: 'Chatbot service not initialized' }; + } + + await chatbotService.start(); + console.log('Chatbot service started'); + + return { success: true }; + } catch (error) { + console.error('Failed to start chatbot service:', error); + return { success: false, error: error.message }; + } +}); + +ipcMain.handle('stop-chatbot', async (event) => { + console.log('Stopping chatbot service'); + + try { + if (chatbotService) { + await chatbotService.stop(); + console.log('Chatbot service stopped'); + } + + return { success: true }; + } catch (error) { + console.error('Failed to stop chatbot service:', error); + return { success: false, error: error.message }; + } +}); + +ipcMain.handle('send-chat-message', async (event, message) => { + console.log('Sending chat message:', message); + + try { + if (!chatbotService) { + return { success: false, error: 'Chatbot service not initialized' }; + } + + const response = await chatbotService.sendMessage(message); + return { success: true, data: response }; + } catch (error) { + console.error('Failed to send chat message:', error); + return { success: false, error: error.message }; + } +}); + +// Handle transcript export +ipcMain.handle('save-transcript', async (event, filename) => { + console.log('Saving transcript:', filename); + + try { + if (!whisperService) { + throw new Error('No transcript available - meeting not started'); + } + + const filepath = await whisperService.saveTranscript(filename); + return { success: true, filepath }; + } catch (error) { + console.error('Failed to save transcript:', error); + return { success: false, error: error.message }; + } +}); + +// Handle get full transcript +ipcMain.handle('get-full-transcript', async (event) => { + try { + if (!whisperService) { + return { success: false, error: 'No transcript available - meeting not started' }; + } + + const transcript = whisperService.getFullTranscript(); + return { success: true, transcript }; + } catch (error) { + console.error('Failed to get transcript:', error); + return { success: false, error: error.message }; + } }); // GitHub issue creation handler @@ -170,7 +350,16 @@ app.whenReady().then(() => { }); // Quit when all windows are closed -app.on('window-all-closed', () => { +app.on('window-all-closed', async () => { + // Clean up Whisper service + if (whisperService && whisperService.isServiceRunning()) { + try { + await whisperService.stop(); + } catch (error) { + console.error('Error stopping Whisper service on app quit:', error); + } + } + // On macOS, keep app running even when all windows are closed if (process.platform !== 'darwin') { app.quit(); diff --git a/src/electron/preload.js b/src/electron/preload.js index b0fac15..9935af7 100644 --- a/src/electron/preload.js +++ b/src/electron/preload.js @@ -13,6 +13,29 @@ contextBridge.exposeInMainWorld('electronAPI', { // Meeting management startMeeting: (meetingData) => ipcRenderer.invoke('start-meeting', meetingData), stopMeeting: () => ipcRenderer.invoke('stop-meeting'), + + // Transcript management + saveTranscript: (filename) => ipcRenderer.invoke('save-transcript', filename), + getFullTranscript: () => ipcRenderer.invoke('get-full-transcript'), + + // Event listeners for Whisper events + onWhisperTranscript: (callback) => ipcRenderer.on('whisper-transcript', callback), + onWhisperError: (callback) => ipcRenderer.on('whisper-error', callback), + onWhisperStatus: (callback) => ipcRenderer.on('whisper-status', callback), + + // Chatbot management + initializeChatbot: () => ipcRenderer.invoke('initialize-chatbot'), + startChatbot: () => ipcRenderer.invoke('start-chatbot'), + stopChatbot: () => ipcRenderer.invoke('stop-chatbot'), + sendChatMessage: (message) => ipcRenderer.invoke('send-chat-message', message), + + // Event listeners for Chatbot events + onChatbotResponse: (callback) => ipcRenderer.on('chatbot-response', callback), + onChatbotError: (callback) => ipcRenderer.on('chatbot-error', callback), + onChatbotStatus: (callback) => ipcRenderer.on('chatbot-status', callback), + + // Remove event listeners + removeAllListeners: (channel) => ipcRenderer.removeAllListeners(channel), // GitHub integration createGithubIssue: (issueData) => ipcRenderer.invoke('create-github-issue', issueData), diff --git a/src/renderer/index.html b/src/renderer/index.html index 20c8465..6278963 100644 --- a/src/renderer/index.html +++ b/src/renderer/index.html @@ -19,6 +19,7 @@ + @@ -76,6 +77,10 @@

ScrumAI Meeting Assistant

📝 Transcript + + + +
+
+
+

AI Chat Assistant

+
+ + +
+
+
+

Start the chat to begin conversation with AI...

+
+
+ + +
+
+
@@ -174,6 +208,25 @@

🎯 Strategy & Action Items

+ + +
+
+
+
+

💬 AnythingLLM Assistant

+

Ask questions about your meeting or anything else!

+
+
+
+ + +
+
+ Disconnected +
+
+
diff --git a/src/renderer/js/chat-controller.js b/src/renderer/js/chat-controller.js new file mode 100644 index 0000000..1947181 --- /dev/null +++ b/src/renderer/js/chat-controller.js @@ -0,0 +1,290 @@ +/** + * Chat Controller + * + * Handles the AnythingLLM chat interface and communication with the chatbot service + */ + +class ChatController { + constructor() { + this.isInitialized = false; + this.chatbotService = null; + this.chatMessages = null; + this.chatInput = null; + this.chatSendBtn = null; + this.chatStatus = null; + this.messageHistory = []; + this.isConnected = false; + this.isConnecting = false; + } + + /** + * Initialize the chat controller + */ + async init() { + try { + console.log('Initializing chat controller...'); + + // Get DOM elements + this.chatMessages = document.getElementById('chat-messages'); + this.chatInput = document.getElementById('chat-input'); + this.chatSendBtn = document.getElementById('chat-send'); + this.chatStatus = document.getElementById('chat-status-text'); + + if (!this.chatMessages || !this.chatInput || !this.chatSendBtn || !this.chatStatus) { + throw new Error('Chat DOM elements not found'); + } + + // Set up event listeners + this.setupEventListeners(); + + // Initialize chatbot service connection + await this.initializeChatbotService(); + + // Start chatbot service after initialization + await this.connectChatbotService(); + + this.isInitialized = true; + console.log('Chat controller initialized successfully'); + + } catch (error) { + console.error('Failed to initialize chat controller:', error); + this.updateStatus('error', 'Failed to initialize chat'); + } + } + + /** + * Set up event listeners + */ + setupEventListeners() { + // Send button click + this.chatSendBtn.addEventListener('click', () => { + this.sendMessage(); + }); + + // Enter key in input + this.chatInput.addEventListener('keypress', (e) => { + if (e.key === 'Enter' && !e.shiftKey) { + e.preventDefault(); + this.sendMessage(); + } + }); + + // Input change to enable/disable send button + this.chatInput.addEventListener('input', () => { + const hasText = this.chatInput.value.trim().length > 0; + this.chatSendBtn.disabled = !hasText || !this.isConnected; + }); + + // Listen for app events + document.addEventListener('app-initialized', () => { + console.log('App initialized, connecting chatbot service...'); + this.connectChatbotService(); + }); + } + + /** + * Initialize chatbot service + */ + async initializeChatbotService() { + try { + this.updateStatus('connecting', 'Initializing chatbot...'); + + // Initialize chatbot service via IPC + const result = await window.electronAPI?.initializeChatbot(); + + if (result && result.success) { + this.updateStatus('connected', 'Connected'); + this.isConnected = true; + this.chatSendBtn.disabled = this.chatInput.value.trim().length === 0; + } else { + throw new Error(result?.error || 'Failed to initialize chatbot'); + } + + } catch (error) { + console.error('Chatbot service initialization failed:', error); + this.updateStatus('error', 'Connection failed'); + this.isConnected = false; + this.chatSendBtn.disabled = true; + } + } + + /** + * Connect to chatbot service + */ + async connectChatbotService() { + if (this.isConnecting || this.isConnected) { + return; + } + + try { + this.isConnecting = true; + this.updateStatus('connecting', 'Connecting...'); + + const result = await window.electronAPI?.startChatbot(); + + if (result && result.success) { + this.updateStatus('connected', 'Connected'); + this.isConnected = true; + this.chatSendBtn.disabled = this.chatInput.value.trim().length === 0; + } else { + throw new Error(result?.error || 'Failed to start chatbot'); + } + + } catch (error) { + console.error('Chatbot connection failed:', error); + this.updateStatus('error', 'Connection failed'); + this.isConnected = false; + this.chatSendBtn.disabled = true; + } finally { + this.isConnecting = false; + } + } + + /** + * Send a message to the chatbot + */ + async sendMessage() { + const message = this.chatInput.value.trim(); + if (!message || !this.isConnected) { + return; + } + + try { + // Clear input and disable button + this.chatInput.value = ''; + this.chatSendBtn.disabled = true; + + // Add user message to UI + this.addMessage(message, 'user'); + + // Add loading message + const loadingId = this.addMessage('...', 'assistant', true); + + // Send to chatbot service + const response = await window.electronAPI?.sendChatMessage(message); + + // Remove loading message + this.removeMessage(loadingId); + + if (response && response.success) { + this.addMessage(response.data, 'assistant'); + } else { + this.addMessage('Sorry, I encountered an error processing your message.', 'error'); + } + + } catch (error) { + console.error('Failed to send message:', error); + this.addMessage('Sorry, I encountered an error processing your message.', 'error'); + } finally { + this.chatSendBtn.disabled = this.chatInput.value.trim().length === 0 || !this.isConnected; + } + } + + /** + * Add a message to the chat interface + */ + addMessage(text, type = 'assistant', isLoading = false) { + const messageId = Date.now() + '_' + Math.random(); + const messageEl = document.createElement('div'); + messageEl.className = `chat-message ${type}`; + messageEl.setAttribute('data-message-id', messageId); + + if (isLoading) { + messageEl.classList.add('streaming'); + } + + messageEl.textContent = text; + + // Remove welcome message if it exists + const welcomeEl = this.chatMessages.querySelector('.chat-welcome'); + if (welcomeEl) { + welcomeEl.remove(); + } + + this.chatMessages.appendChild(messageEl); + this.scrollToBottom(); + + // Store in history + this.messageHistory.push({ + id: messageId, + text, + type, + timestamp: new Date() + }); + + return messageId; + } + + /** + * Remove a message from the chat interface + */ + removeMessage(messageId) { + const messageEl = this.chatMessages.querySelector(`[data-message-id="${messageId}"]`); + if (messageEl) { + messageEl.remove(); + } + + // Remove from history + this.messageHistory = this.messageHistory.filter(msg => msg.id !== messageId); + } + + /** + * Update chat status + */ + updateStatus(status, message) { + if (!this.chatStatus) return; + + this.chatStatus.textContent = message; + this.chatStatus.className = `chat-status-text ${status}`; + } + + /** + * Scroll chat to bottom + */ + scrollToBottom() { + if (this.chatMessages) { + this.chatMessages.scrollTop = this.chatMessages.scrollHeight; + } + } + + /** + * Clear chat messages + */ + clearMessages() { + if (this.chatMessages) { + this.chatMessages.innerHTML = ` +
+

💬 AnythingLLM Assistant

+

Ask questions about your meeting or anything else!

+
+ `; + } + this.messageHistory = []; + } + + /** + * Get chat status + */ + getStatus() { + return { + isInitialized: this.isInitialized, + isConnected: this.isConnected, + isConnecting: this.isConnecting, + messageCount: this.messageHistory.length + }; + } + + /** + * Cleanup + */ + destroy() { + this.isInitialized = false; + this.isConnected = false; + this.isConnecting = false; + this.messageHistory = []; + + // Remove event listeners would go here if needed + } +} + +export { ChatController }; \ No newline at end of file diff --git a/src/renderer/js/main.js b/src/renderer/js/main.js index bb94d77..0c8dbd8 100644 --- a/src/renderer/js/main.js +++ b/src/renderer/js/main.js @@ -10,6 +10,7 @@ */ import { UIController } from './ui-controller.js'; +import { ChatController } from './chat-controller.js'; import { notionIntegration } from './notion-integration.js'; class ScrumAIApp { @@ -18,9 +19,10 @@ class ScrumAIApp { this.currentMeeting = null; this.meetingStartTime = null; this.meetingTimer = null; - + // Component instances this.uiController = null; + this.chatController = null; this.notionIntegration = notionIntegration; // Notion API credentials @@ -28,7 +30,6 @@ class ScrumAIApp { apiKey: "ntn_b21836603815tHxhJd8M44AeLsp2bAHgpqbmNVnlMaE3Sg", parentPageId: "26e08228035d805ca45ac47eac1b3849" }; - // Application state this.state = { isMeetingActive: false, @@ -37,6 +38,10 @@ class ScrumAIApp { activeTab: 'keywords', isPostMeeting: false }; + + // Whisper integration state + this.transcriptData = []; + this.isTranscribing = false; } /** @@ -51,12 +56,18 @@ class ScrumAIApp { // Set up event listeners this.setupEventListeners(); - + + // Setup Whisper event listeners + this.setupWhisperEventListeners(); + // Initialize UI state this.initializeUI(); this.isInitialized = true; console.log('ScrumAI Meeting Assistant initialized successfully'); + + // Dispatch app initialization event for chat controller + document.dispatchEvent(new CustomEvent('app-initialized')); } catch (error) { console.error('Failed to initialize application:', error); @@ -71,6 +82,10 @@ class ScrumAIApp { // Initialize UI controller this.uiController = new UIController(); this.uiController.init(); + + // Initialize chat controller + this.chatController = new ChatController(); + await this.chatController.init(); // Initialize Notion integration try { @@ -84,8 +99,9 @@ class ScrumAIApp { // Continue without Notion integration } - // Make UI controller globally accessible for keyword tooltips + // Make controllers globally accessible window.uiController = this.uiController; + window.chatController = this.chatController; } /** @@ -158,6 +174,30 @@ class ScrumAIApp { }); } + /** + * Setup Whisper event listeners for real-time transcription + */ + setupWhisperEventListeners() { + if (typeof window.electronAPI !== 'undefined') { + // Listen for transcript data + window.electronAPI.onWhisperTranscript((event, data) => { + this.handleWhisperTranscript(data); + }); + + // Listen for errors + window.electronAPI.onWhisperError((event, error) => { + console.error('Whisper error:', error); + this.handleWhisperError(error); + }); + + // Listen for status updates + window.electronAPI.onWhisperStatus((event, status) => { + console.log('Whisper status:', status.message); + this.handleWhisperStatus(status); + }); + } + } + /** * Initialize UI state */ @@ -171,43 +211,51 @@ class ScrumAIApp { async startMeeting() { try { console.log('Starting meeting...'); - + // Create meeting data const meetingData = { title: `Meeting ${new Date().toLocaleString()}`, startTime: new Date().toISOString(), participants: ['Demo User'] }; - + console.log('Meeting data:', meetingData); - + + // Start Whisper transcription via Electron API + if (typeof window.electronAPI !== 'undefined') { + const result = await window.electronAPI.startMeeting(meetingData); + if (!result.success) { + throw new Error(result.error || 'Failed to start Whisper transcription'); + } + console.log('Whisper transcription started successfully'); + this.isTranscribing = true; + } + this.currentMeeting = meetingData; this.meetingStartTime = Date.now(); this.state.isMeetingActive = true; + this.transcriptData = []; // Reset to live meeting state if coming from post-meeting if (this.state.isPostMeeting) { this.resetToLiveMeeting(); } - // Update UI this.updateMeetingStatus(true); this.startMeetingTimer(); - + // Initialize mock participants for UI development this.initializeMockParticipants(); - - // Initialize mock keywords for UI development - this.uiController.initializeMockKeywords(); - - // Initialize mock transcript - this.initializeMockTranscript(); - + + // Clear previous content + this.clearTranscriptContent(); + console.log('Meeting started successfully'); - + } catch (error) { console.error('Failed to start meeting:', error); this.handleError('Failed to start meeting', error); + this.isTranscribing = false; } } @@ -217,33 +265,60 @@ class ScrumAIApp { async stopMeeting() { try { console.log('Stopping meeting...'); - + if (!this.state.isMeetingActive) return; - + + // Stop Whisper transcription via Electron API + if (typeof window.electronAPI !== 'undefined' && this.isTranscribing) { + const result = await window.electronAPI.stopMeeting(); + if (!result.success) { + console.error('Failed to stop Whisper transcription:', result.error); + } else { + console.log('Whisper transcription stopped successfully'); + } + this.isTranscribing = false; + + // Save transcript to meetingnotes file + if (this.transcriptData.length > 0) { + try { + const timestamp = new Date().toISOString().replace(/[:.]/g, '-'); + const filename = `meetingnotes_${timestamp}.txt`; + const saveResult = await window.electronAPI.saveTranscript(filename); + if (saveResult.success) { + console.log('Meeting transcript saved to:', saveResult.filepath); + alert(`Meeting notes saved to: ${saveResult.filepath}`); + } else { + console.error('Failed to save transcript:', saveResult.error); + } + } catch (saveError) { + console.error('Error saving transcript:', saveError); + } + } + } + // Update meeting end time if (this.currentMeeting) { this.currentMeeting.endTime = new Date().toISOString(); this.currentMeeting.duration = Date.now() - this.meetingStartTime; } - + // Reset state this.state.isMeetingActive = false; this.currentMeeting = null; this.meetingStartTime = null; this.state.participants = []; - + // Clear participants from UI this.uiController.clearParticipants(); - + // Update UI this.updateMeetingStatus(false); this.stopMeetingTimer(); // Transition to post-meeting state this.transitionToPostMeeting(); - console.log('Meeting stopped successfully'); - + } catch (error) { console.error('Failed to stop meeting:', error); this.handleError('Failed to stop meeting', error); @@ -759,6 +834,88 @@ class ScrumAIApp { }, 5000); } + /** + * Handle Whisper transcript data + */ + handleWhisperTranscript(data) { + console.log('Received Whisper transcript:', data); + + // Store transcript data + this.transcriptData.push(data); + + // Add to UI + this.addTranscriptToUI(data); + + // Extract keywords from the transcript + if (data.text && data.text.trim()) { + this.uiController.addTranscription(data.text); + } + } + + /** + * Handle Whisper errors + */ + handleWhisperError(error) { + console.error('Whisper error in UI:', error); + // You could show an error notification here + } + + /** + * Handle Whisper status updates + */ + handleWhisperStatus(status) { + console.log('Whisper status update:', status.message); + // You could show status updates in the UI here + } + + /** + * Add transcript entry to UI + */ + addTranscriptToUI(data) { + const transcriptContent = document.getElementById('transcript-content'); + if (!transcriptContent) return; + + // Create transcript entry + const entry = document.createElement('div'); + entry.className = 'transcript-entry'; + entry.innerHTML = ` + [${data.timestamp}] + ${this.escapeHtml(data.text)} + `; + + // Remove placeholder text if it exists + const placeholder = transcriptContent.querySelector('.placeholder-text'); + if (placeholder) { + placeholder.remove(); + } + + // Add new entry + transcriptContent.appendChild(entry); + + // Scroll to bottom + transcriptContent.scrollTop = transcriptContent.scrollHeight; + } + + /** + * Clear transcript content + */ + clearTranscriptContent() { + const transcriptContent = document.getElementById('transcript-content'); + if (transcriptContent) { + transcriptContent.innerHTML = '

Start a meeting to see transcript...

'; + } + this.transcriptData = []; + } + + /** + * Escape HTML to prevent XSS + */ + escapeHtml(text) { + const div = document.createElement('div'); + div.textContent = text; + return div.innerHTML; + } + /** * Initialize mock transcript for UI development */ diff --git a/src/renderer/styles/chat.css b/src/renderer/styles/chat.css new file mode 100644 index 0000000..2a4b08f --- /dev/null +++ b/src/renderer/styles/chat.css @@ -0,0 +1,209 @@ +/* Chat Tab Styles */ +.chat-container { + display: flex; + flex-direction: column; + height: 100%; + min-height: 400px; +} + +.chat-messages { + flex: 1; + overflow-y: auto; + padding: 1rem; + background: var(--bg-secondary); + border-radius: 8px; + margin-bottom: 1rem; + max-height: 400px; + border: 1px solid var(--border-primary); +} + +.chat-welcome { + text-align: center; + color: var(--text-secondary); + padding: 2rem; +} + +.chat-welcome p:first-child { + font-size: 1.2rem; + font-weight: 600; + color: var(--text-primary); + margin-bottom: 0.5rem; +} + +.chat-subtitle { + font-size: 0.9rem; + margin: 0; +} + +.chat-message { + margin-bottom: 1rem; + padding: 0.75rem; + border-radius: 8px; + max-width: 85%; + word-wrap: break-word; +} + +.chat-message.user { + background: var(--primary-color); + color: white; + margin-left: auto; + text-align: right; +} + +.chat-message.assistant { + background: var(--bg-tertiary); + color: var(--text-primary); + border: 1px solid var(--border-primary); +} + +.chat-message.error { + background: #fee; + color: #c33; + border: 1px solid #fcc; +} + +.chat-message.streaming { + background: var(--bg-tertiary); + color: var(--text-primary); + border: 1px solid var(--primary-color); + position: relative; +} + +.chat-message.streaming::after { + content: '●'; + animation: pulse 1.5s ease-in-out infinite; + color: var(--primary-color); + margin-left: 0.5rem; +} + +@keyframes pulse { + 0%, 100% { opacity: 1; } + 50% { opacity: 0.3; } +} + +.chat-input-container { + display: flex; + gap: 0.5rem; + padding: 0.75rem; + background: var(--bg-primary); + border: 1px solid var(--border-primary); + border-radius: 8px; +} + +.chat-input { + flex: 1; + padding: 0.75rem; + border: 1px solid var(--border-secondary); + border-radius: 6px; + background: var(--bg-secondary); + color: var(--text-primary); + font-size: 0.9rem; + outline: none; + transition: border-color 0.2s ease; +} + +.chat-input:focus { + border-color: var(--primary-color); + box-shadow: 0 0 0 2px rgba(var(--primary-color-rgb), 0.1); +} + +.chat-input:disabled { + background: var(--bg-disabled); + color: var(--text-disabled); + cursor: not-allowed; +} + +.chat-send-btn { + padding: 0.75rem 1.5rem; + background: var(--primary-color); + color: white; + border: none; + border-radius: 6px; + font-weight: 500; + cursor: pointer; + transition: all 0.2s ease; + white-space: nowrap; +} + +.chat-send-btn:hover:not(:disabled) { + background: var(--primary-hover); + transform: translateY(-1px); +} + +.chat-send-btn:disabled { + background: var(--bg-disabled); + color: var(--text-disabled); + cursor: not-allowed; + transform: none; +} + +.chat-status { + padding: 0.5rem; + text-align: center; + margin-top: 0.5rem; +} + +.chat-status-text { + font-size: 0.8rem; + padding: 0.25rem 0.75rem; + border-radius: 12px; + display: inline-block; +} + +.chat-status-text.connected { + background: #d4edda; + color: #155724; + border: 1px solid #c3e6cb; +} + +.chat-status-text.connecting { + background: #fff3cd; + color: #856404; + border: 1px solid #ffeaa7; +} + +.chat-status-text.disconnected { + background: #f8d7da; + color: #721c24; + border: 1px solid #f5c6cb; +} + +.chat-status-text.error { + background: #f8d7da; + color: #721c24; + border: 1px solid #f5c6cb; +} + +/* Scrollbar Styles for Chat Messages */ +.chat-messages::-webkit-scrollbar { + width: 6px; +} + +.chat-messages::-webkit-scrollbar-track { + background: var(--bg-primary); + border-radius: 3px; +} + +.chat-messages::-webkit-scrollbar-thumb { + background: var(--border-secondary); + border-radius: 3px; +} + +.chat-messages::-webkit-scrollbar-thumb:hover { + background: var(--border-primary); +} + +/* Responsive Adjustments */ +@media (max-width: 768px) { + .chat-message { + max-width: 95%; + } + + .chat-input-container { + padding: 0.5rem; + } + + .chat-send-btn { + padding: 0.75rem 1rem; + } +} \ No newline at end of file diff --git a/src/renderer/styles/components.css b/src/renderer/styles/components.css index 261e2f2..e0beb6c 100644 --- a/src/renderer/styles/components.css +++ b/src/renderer/styles/components.css @@ -7,6 +7,70 @@ /* Additional component styles can be added here as needed */ +/* Transcript Entry Styles */ +.transcript-entry { + display: flex; + flex-direction: column; + padding: 8px 12px; + margin-bottom: 8px; + background: #1a1a1a; + border-radius: 6px; + border-left: 3px solid #22d3ee; + transition: background-color 0.2s ease; +} + +.transcript-entry:hover { + background: #242424; +} + +.transcript-timestamp { + font-size: 11px; + color: #9ca3af; + font-family: 'JetBrains Mono', monospace; + margin-bottom: 4px; +} + +.transcript-text { + color: #e5e7eb; + line-height: 1.4; + font-size: 13px; +} + +/* Transcript Content Container */ +.transcript-content { + max-height: 400px; + overflow-y: auto; + padding: 16px; + background: #111; + border-radius: 8px; + border: 1px solid #333; +} + +.transcript-content:empty::before { + content: "Transcripts will appear here when the meeting starts..."; + color: #6b7280; + font-style: italic; +} + +/* Custom scrollbar for transcript content */ +.transcript-content::-webkit-scrollbar { + width: 6px; +} + +.transcript-content::-webkit-scrollbar-track { + background: #1f1f1f; + border-radius: 3px; +} + +.transcript-content::-webkit-scrollbar-thumb { + background: #404040; + border-radius: 3px; +} + +.transcript-content::-webkit-scrollbar-thumb:hover { + background: #525252; +} + /* Focus states for accessibility */ .control-btn:focus, .tab-btn:focus { diff --git a/src/renderer/styles/main.css b/src/renderer/styles/main.css index 36591fe..4e861f3 100644 --- a/src/renderer/styles/main.css +++ b/src/renderer/styles/main.css @@ -320,8 +320,12 @@ html, body { border: none; background: transparent; width: auto; - min-width: auto; + min-width: 0; max-width: none; + display: flex; + align-items: center; + justify-content: center; + text-align: center; } /* Post-meeting tabs get special styling */ @@ -373,6 +377,8 @@ html, body { .tab-icon { font-size: 16px; + flex-shrink: 0; + margin-right: 4px; } .tab-count { @@ -384,6 +390,8 @@ html, body { font-weight: 600; min-width: 18px; text-align: center; + flex-shrink: 0; + margin-left: 4px; } .tab-btn.active .tab-count { @@ -1612,6 +1620,93 @@ html, body { } } +/* Chat Tab Styles */ +.chat-container { + display: flex; + flex-direction: column; + height: 100%; + padding: 12px; +} + +.chat-header { + display: flex; + justify-content: space-between; + align-items: center; + margin-bottom: 16px; + padding-bottom: 12px; + border-bottom: 1px solid #333; +} + +.chat-header h3 { + font-size: 18px; + font-weight: 600; + color: #ffffff; + margin: 0; +} + +.chat-controls { + display: flex; + gap: 8px; +} + +.chat-messages { + flex: 1; + overflow-y: auto; + padding: 12px; + background: #111; + border: 1px solid #333; + border-radius: 6px; + margin-bottom: 12px; + max-height: 400px; + scrollbar-width: thin; + scrollbar-color: #374151 #1a1a1a; +} + +.chat-input-container { + display: flex; + gap: 8px; + align-items: center; +} + +#chat-input { + flex: 1; + padding: 10px 12px; + background: #111; + border: 1px solid #333; + border-radius: 6px; + color: #ffffff; + font-size: 14px; +} + +#chat-input:focus { + outline: none; + border-color: #3b82f6; + box-shadow: 0 0 0 1px rgba(59, 130, 246, 0.3); +} + +#chat-input:disabled { + opacity: 0.5; + cursor: not-allowed; +} + +.chat-messages::-webkit-scrollbar { + width: 6px; +} + +.chat-messages::-webkit-scrollbar-track { + background: #1a1a1a; + border-radius: 3px; +} + +.chat-messages::-webkit-scrollbar-thumb { + background: #374151; + border-radius: 3px; +} + +.chat-messages::-webkit-scrollbar-thumb:hover { + background: #4b5563; +} + /* Scrollbar styling */ ::-webkit-scrollbar { width: 6px; diff --git a/src/services/chatbotService.js b/src/services/chatbotService.js new file mode 100644 index 0000000..9635bcb --- /dev/null +++ b/src/services/chatbotService.js @@ -0,0 +1,372 @@ +/** + * AnythingLLM Chatbot Service + * + * This service manages the AnythingLLM chatbot process by spawning + * a Python child process and handling real-time communication. + * Similar pattern to whisperService.js + */ + +const { spawn } = require('child_process'); +const path = require('path'); +const fs = require('fs'); + +class ChatbotService { + constructor() { + this.chatbotProcess = null; + this.isRunning = false; + this.onResponseCallback = null; + this.onErrorCallback = null; + this.onStatusCallback = null; + this.messageQueue = []; + this.isProcessingMessage = false; + } + + /** + * Start the chatbot service + */ + async start() { + if (this.isRunning) { + throw new Error('Chatbot service is already running'); + } + + try { + // Check if Python is available + const pythonPath = await this.findPython(); + if (!pythonPath) { + throw new Error('Python not found. Please ensure Python is installed and in PATH'); + } + + // Check if required files exist + this.checkRequiredFiles(); + + // Start the chatbot process + const scriptPath = path.join(__dirname, '..', '..', 'whisper', 'anythingLLM', 'chatbot_client.py'); + const workingDir = path.join(__dirname, '..', '..', 'whisper', 'anythingLLM'); + + console.log('Starting AnythingLLM chatbot process...'); + console.log('Script path:', scriptPath); + console.log('Working directory:', workingDir); + + this.chatbotProcess = spawn(pythonPath, [scriptPath], { + cwd: workingDir, + stdio: ['pipe', 'pipe', 'pipe'] + }); + + this.isRunning = true; + this.setupProcessHandlers(); + + // Give the process a moment to start + await new Promise(resolve => setTimeout(resolve, 1000)); + + console.log('Chatbot service started successfully'); + + if (this.onStatusCallback) { + this.onStatusCallback('started'); + } + + return true; + + } catch (error) { + console.error('Failed to start chatbot service:', error); + this.isRunning = false; + + if (this.onErrorCallback) { + this.onErrorCallback(error); + } + + throw error; + } + } + + /** + * Stop the chatbot service + */ + async stop() { + if (!this.isRunning || !this.chatbotProcess) { + return; + } + + console.log('Stopping chatbot service...'); + + try { + // Send quit command + if (this.chatbotProcess.stdin && !this.chatbotProcess.stdin.destroyed) { + this.chatbotProcess.stdin.write('quit\n'); + this.chatbotProcess.stdin.end(); + } + + // Wait for graceful shutdown + await new Promise((resolve) => { + const timeout = setTimeout(() => { + if (this.chatbotProcess) { + this.chatbotProcess.kill('SIGTERM'); + } + resolve(); + }, 3000); + + this.chatbotProcess.on('close', () => { + clearTimeout(timeout); + resolve(); + }); + }); + + } catch (error) { + console.error('Error during chatbot service shutdown:', error); + } finally { + this.isRunning = false; + this.chatbotProcess = null; + this.messageQueue = []; + this.isProcessingMessage = false; + + if (this.onStatusCallback) { + this.onStatusCallback('stopped'); + } + + console.log('Chatbot service stopped'); + } + } + + /** + * Set the live transcript file path + */ + setLiveTranscriptFile(filePath) { + this.liveTranscriptFile = filePath; + console.log('Live transcript file set:', filePath); + } + + /** + * Send a message to the chatbot with intelligent context switching + */ + async sendMessage(message, stream = false, useRAG = false) { + if (!this.isRunning || !this.chatbotProcess) { + throw new Error('Chatbot service is not running'); + } + + // Determine which mode to use + const command = this._determineCommand(message, stream, useRAG); + const data = JSON.stringify({ + command, + message, + transcript_file: this.liveTranscriptFile + }); + + return new Promise((resolve, reject) => { + this.messageQueue.push({ data, resolve, reject, stream }); + this.processMessageQueue(); + }); + } + + /** + * Determine which command to use based on message type and preferences + */ + _determineCommand(message, stream, useRAG) { + if (stream) { + return 'stream'; + } + + // Use RAG for complex analytical questions + if (useRAG || this._isComplexQuery(message)) { + return 'chat_rag'; + } + + // Use live context for recent/immediate questions + return 'chat'; + } + + /** + * Determine if a query requires RAG capabilities + */ + _isComplexQuery(message) { + const complexKeywords = [ + 'summarize', 'summary', 'overview', 'analyze', 'analysis', + 'compare', 'contrast', 'trend', 'pattern', 'insight', + 'decision', 'conclusion', 'recommendation', 'action item', + 'meeting notes', 'key points', 'takeaway' + ]; + + const lowerMessage = message.toLowerCase(); + return complexKeywords.some(keyword => lowerMessage.includes(keyword)); + } + + /** + * Process the message queue + */ + processMessageQueue() { + if (this.isProcessingMessage || this.messageQueue.length === 0) { + return; + } + + this.isProcessingMessage = true; + const { data, resolve, reject, stream } = this.messageQueue.shift(); + + let responseBuffer = ''; + let streamBuffer = []; + const timeout = setTimeout(() => { + reject(new Error('Message timeout')); + this.isProcessingMessage = false; + this.processMessageQueue(); + }, 30000); + + const handleResponse = (chunk) => { + try { + const lines = chunk.toString().split('\n').filter(line => line.trim()); + + for (const line of lines) { + const parsed = JSON.parse(line); + + if (parsed.type === 'response') { + clearTimeout(timeout); + this.isProcessingMessage = false; + resolve(parsed.data); + this.processMessageQueue(); + return; + } + + if (parsed.type === 'stream_chunk') { + streamBuffer.push(parsed.data); + if (this.onResponseCallback) { + this.onResponseCallback(parsed.data, true); + } + } + + if (parsed.type === 'stream_end') { + clearTimeout(timeout); + this.isProcessingMessage = false; + resolve(streamBuffer.join('')); + this.processMessageQueue(); + return; + } + + if (parsed.type === 'error') { + clearTimeout(timeout); + this.isProcessingMessage = false; + reject(new Error(parsed.data)); + this.processMessageQueue(); + return; + } + } + } catch (error) { + // Accumulate response if not valid JSON yet + responseBuffer += chunk.toString(); + } + }; + + this.chatbotProcess.stdout.once('data', handleResponse); + this.chatbotProcess.stdin.write(data + '\n'); + } + + /** + * Set up process event handlers + */ + setupProcessHandlers() { + this.chatbotProcess.stdout.on('data', (data) => { + // Handle streaming responses in processMessageQueue + }); + + this.chatbotProcess.stderr.on('data', (data) => { + console.error('Chatbot stderr:', data.toString()); + if (this.onErrorCallback) { + this.onErrorCallback(new Error(data.toString())); + } + }); + + this.chatbotProcess.on('close', (code) => { + console.log('Chatbot process closed with code:', code); + this.isRunning = false; + this.chatbotProcess = null; + + if (this.onStatusCallback) { + this.onStatusCallback('closed'); + } + }); + + this.chatbotProcess.on('error', (error) => { + console.error('Chatbot process error:', error); + this.isRunning = false; + + if (this.onErrorCallback) { + this.onErrorCallback(error); + } + }); + } + + /** + * Check if required files exist + */ + checkRequiredFiles() { + const scriptPath = path.join(__dirname, '..', '..', 'whisper', 'anythingLLM', 'chatbot_client.py'); + const configPath = path.join(__dirname, '..', '..', 'whisper', 'anythingLLM', 'config.yaml'); + + if (!fs.existsSync(scriptPath)) { + throw new Error(`Chatbot script not found: ${scriptPath}`); + } + + if (!fs.existsSync(configPath)) { + throw new Error(`Chatbot config not found: ${configPath}. Please create and configure it.`); + } + } + + /** + * Find Python executable + */ + async findPython() { + const { spawn } = require('child_process'); + + const pythonCommands = ['python', 'python3', 'py']; + + for (const cmd of pythonCommands) { + try { + const result = await new Promise((resolve) => { + const proc = spawn(cmd, ['--version'], { stdio: 'pipe' }); + proc.on('close', (code) => { + resolve(code === 0 ? cmd : null); + }); + proc.on('error', () => resolve(null)); + }); + + if (result) { + return result; + } + } catch (error) { + continue; + } + } + + return null; + } + + /** + * Set callback for responses + */ + setOnResponseCallback(callback) { + this.onResponseCallback = callback; + } + + /** + * Set callback for errors + */ + setOnErrorCallback(callback) { + this.onErrorCallback = callback; + } + + /** + * Set callback for status changes + */ + setOnStatusCallback(callback) { + this.onStatusCallback = callback; + } + + /** + * Get service status + */ + getStatus() { + return { + isRunning: this.isRunning, + hasProcess: !!this.chatbotProcess, + queueLength: this.messageQueue.length, + isProcessing: this.isProcessingMessage + }; + } +} + +module.exports = ChatbotService; \ No newline at end of file diff --git a/src/services/whisperService.js b/src/services/whisperService.js new file mode 100644 index 0000000..f44f9c9 --- /dev/null +++ b/src/services/whisperService.js @@ -0,0 +1,343 @@ +/** + * Whisper Transcription Service + * + * This service manages the Whisper AI transcription process by spawning + * a Python child process and handling real-time communication. + */ + +const { spawn } = require('child_process'); +const path = require('path'); +const fs = require('fs'); + +class WhisperService { + constructor() { + this.transcriptionProcess = null; + this.isRunning = false; + this.transcriptBuffer = []; + this.onTranscriptCallback = null; + this.onErrorCallback = null; + this.onStatusCallback = null; + } + + /** + * Start the Whisper transcription service + */ + async start() { + if (this.isRunning) { + throw new Error('Whisper service is already running'); + } + + try { + // Check if Python is available + const pythonPath = await this.findPython(); + if (!pythonPath) { + throw new Error('Python not found. Please ensure Python is installed and in PATH'); + } + + // Check if required files exist + this.checkRequiredFiles(); + + // Start the transcription process using new meeting transcriber + const scriptPath = path.join(__dirname, '..', '..', 'whisper', 'meeting_transcriber.py'); + const workingDir = path.join(__dirname, '..', '..', 'whisper'); + + console.log('Starting Whisper transcription process...'); + console.log('Script path:', scriptPath); + console.log('Working directory:', workingDir); + + this.transcriptionProcess = spawn(pythonPath, [scriptPath], { + cwd: workingDir, + stdio: ['pipe', 'pipe', 'pipe'] + }); + + this.isRunning = true; + this.setupProcessHandlers(); + + // Give the process a moment to start + await new Promise(resolve => setTimeout(resolve, 1000)); + + console.log('Whisper service started successfully'); + return true; + + } catch (error) { + console.error('Failed to start Whisper service:', error); + this.isRunning = false; + throw error; + } + } + + /** + * Stop the Whisper transcription service + */ + async stop() { + if (!this.isRunning || !this.transcriptionProcess) { + return; + } + + try { + console.log('Stopping Whisper service...'); + + // Send SIGINT to gracefully stop the process + this.transcriptionProcess.kill('SIGINT'); + + // Wait for process to exit + await new Promise((resolve) => { + this.transcriptionProcess.on('exit', () => { + resolve(); + }); + + // Force kill after 5 seconds if not exited + setTimeout(() => { + if (this.transcriptionProcess && !this.transcriptionProcess.killed) { + this.transcriptionProcess.kill('SIGKILL'); + resolve(); + } + }, 5000); + }); + + this.transcriptionProcess = null; + this.isRunning = false; + console.log('Whisper service stopped'); + + } catch (error) { + console.error('Error stopping Whisper service:', error); + this.isRunning = false; + } + } + + /** + * Get the full transcript as formatted text + */ + getFullTranscript() { + let transcript = ''; + for (const entry of this.transcriptBuffer) { + if (entry.type === 'transcript') { + transcript += `[${entry.timestamp}] ${entry.text}\n`; + } + } + return transcript; + } + + /** + * Save transcript to a file + */ + async saveTranscript(filename = null) { + const transcript = this.getFullTranscript(); + if (!transcript.trim()) { + throw new Error('No transcript to save'); + } + + const timestamp = new Date().toISOString().replace(/[:.]/g, '-'); + const defaultFilename = `meetingnotes_${timestamp}.txt`; + const filepath = filename || path.join(require('os').homedir(), defaultFilename); + + await fs.promises.writeFile(filepath, transcript, 'utf8'); + console.log(`Transcript saved to: ${filepath}`); + return filepath; + } + + /** + * Clear the transcript buffer + */ + clearTranscript() { + this.transcriptBuffer = []; + } + + /** + * Set callback for new transcriptions + */ + onTranscript(callback) { + this.onTranscriptCallback = callback; + } + + /** + * Set callback for errors + */ + onError(callback) { + this.onErrorCallback = callback; + } + + /** + * Set callback for status updates + */ + onStatus(callback) { + this.onStatusCallback = callback; + } + + /** + * Find Python executable + */ + async findPython() { + // First try virtual environment + const venvPython = path.join(__dirname, '..', '..', 'whisper_env', 'Scripts', 'python.exe'); + if (fs.existsSync(venvPython)) { + console.log(`Found Python in virtual environment: ${venvPython}`); + return venvPython; + } + + // Fallback to system Python + const candidates = ['python', 'python3', 'py']; + + for (const candidate of candidates) { + try { + const { spawn } = require('child_process'); + const result = await new Promise((resolve) => { + const proc = spawn(candidate, ['--version'], { stdio: 'pipe' }); + let output = ''; + + proc.stdout.on('data', (data) => { + output += data.toString(); + }); + + proc.stderr.on('data', (data) => { + output += data.toString(); + }); + + proc.on('close', (code) => { + resolve({ code, output }); + }); + + proc.on('error', () => { + resolve({ code: -1, output: '' }); + }); + }); + + if (result.code === 0 && result.output.includes('Python')) { + console.log(`Found Python: ${candidate} (${result.output.trim()})`); + return candidate; + } + } catch (error) { + // Continue to next candidate + continue; + } + } + + return null; + } + + /** + * Check if all required files exist + */ + checkRequiredFiles() { + const requiredFiles = [ + 'whisper/transcriber_for_nodejs.py', + 'whisper/standalone_model.py', + 'whisper/standalone_whisper.py', + 'whisper/config.yaml', + 'whisper/mel_filters.npz', + 'whisper/models/WhisperEncoder.onnx', + 'whisper/models/WhisperDecoder.onnx' + ]; + + const workingDir = path.join(__dirname, '..', '..'); + + for (const file of requiredFiles) { + const filePath = path.join(workingDir, file); + if (!fs.existsSync(filePath)) { + throw new Error(`Required file not found: ${file}`); + } + } + + console.log('All required Whisper files found'); + } + + /** + * Setup handlers for the transcription process + */ + setupProcessHandlers() { + if (!this.transcriptionProcess) return; + + // Handle stdout (JSON transcription data) + this.transcriptionProcess.stdout.on('data', (data) => { + const lines = data.toString().split('\n').filter(line => line.trim()); + + for (const line of lines) { + try { + const parsed = JSON.parse(line); + this.handleTranscriptionData(parsed); + } catch (error) { + // Non-JSON output, treat as plain text + console.log('Whisper output:', line); + } + } + }); + + // Handle stderr + this.transcriptionProcess.stderr.on('data', (data) => { + console.error('Whisper error:', data.toString()); + if (this.onErrorCallback) { + this.onErrorCallback(data.toString()); + } + }); + + // Handle process exit + this.transcriptionProcess.on('close', (code) => { + console.log(`Whisper process exited with code ${code}`); + this.isRunning = false; + this.transcriptionProcess = null; + }); + + // Handle process errors + this.transcriptionProcess.on('error', (error) => { + console.error('Whisper process error:', error); + this.isRunning = false; + if (this.onErrorCallback) { + this.onErrorCallback(error.message); + } + }); + } + + /** + * Handle transcription data from the Python process + */ + handleTranscriptionData(data) { + // Add to buffer + this.transcriptBuffer.push(data); + + // Limit buffer size to prevent memory issues + if (this.transcriptBuffer.length > 1000) { + this.transcriptBuffer = this.transcriptBuffer.slice(-800); + } + + // Call appropriate callback based on data type + switch (data.type) { + case 'transcript': + console.log(`[${data.timestamp}] Transcript: ${data.text}`); + if (this.onTranscriptCallback) { + // Include transcript file path in the callback data + const callbackData = { + ...data, + transcriptFile: data.transcriptFile + }; + this.onTranscriptCallback(callbackData); + } + break; + + case 'status': + console.log(`[${data.timestamp}] Status: ${data.message}`); + if (this.onStatusCallback) { + this.onStatusCallback(data); + } + break; + + case 'error': + console.error(`[${data.timestamp}] Error: ${data.error}`); + if (this.onErrorCallback) { + this.onErrorCallback(data.error); + } + break; + + default: + console.log('Unknown data type:', data); + } + } + + /** + * Check if the service is running + */ + isServiceRunning() { + return this.isRunning && this.transcriptionProcess && !this.transcriptionProcess.killed; + } +} + +module.exports = WhisperService; \ No newline at end of file diff --git a/whisper/.claude/settings.local.json b/whisper/.claude/settings.local.json new file mode 100644 index 0000000..92a1cce --- /dev/null +++ b/whisper/.claude/settings.local.json @@ -0,0 +1,9 @@ +{ + "permissions": { + "allow": [ + "Read(//c/Users/qc_de/TestSCRUM/scrumAI/src/electron/**)" + ], + "deny": [], + "ask": [] + } +} \ No newline at end of file diff --git a/whisper/LiveTranscriber_standalone.py b/whisper/LiveTranscriber_standalone.py new file mode 100644 index 0000000..83a815b --- /dev/null +++ b/whisper/LiveTranscriber_standalone.py @@ -0,0 +1,251 @@ +import numpy as np +import os +import queue +import sounddevice as sd +import sys +import threading +import yaml +import traceback + +from concurrent.futures import ThreadPoolExecutor + +# Add src directory to path for imports +current_dir = os.path.dirname(os.path.abspath(__file__)) +if current_dir not in sys.path: + sys.path.insert(0, current_dir) + +# Handle imports for both direct Python execution and PyInstaller +try: + from standalone_model import StandaloneWhisperModel +except ImportError: + from .standalone_model import StandaloneWhisperModel + + +def flush_output(): + """Force flush stdout and stderr for better console output in executables""" + sys.stdout.flush() + sys.stderr.flush() + + +def process_transcription( + whisper_model: StandaloneWhisperModel, + chunk: np.ndarray, + silence_threshold: float, + sample_rate: int +) -> None: + """ + Process a chunk of audio data and transcribe it using the Whisper model. + This function is run in a separate thread to allow for concurrent processing. + """ + + try: + if np.abs(chunk).mean() > silence_threshold: + transcript = whisper_model.transcribe(chunk, sample_rate) + if transcript.strip(): + print(f"Transcript: {transcript}") + flush_output() + except Exception as e: + print(f"[ERROR] Error in transcription: {e}") + traceback.print_exc() + flush_output() + + +def process_audio( + whisper_model: StandaloneWhisperModel, + audio_queue: queue.Queue, + stop_event: threading.Event, + max_workers: int, + queue_timeout: float, + chunk_samples: int, + silence_threshold: float, + sample_rate: int +) -> None: + """ + Process audio data from the queue and transcribe it using the Whisper model. + """ + + buffer = np.empty((0,), dtype=np.float32) + + with ThreadPoolExecutor(max_workers=max_workers) as executor: + futures = [] + + while not stop_event.is_set(): + try: + audio_chunk = audio_queue.get(timeout=queue_timeout) + audio_chunk = audio_chunk.flatten() + buffer = np.concatenate([buffer, audio_chunk]) + + while len(buffer) >= chunk_samples: + current_chunk = buffer[:chunk_samples] + buffer = buffer[chunk_samples:] + + future = executor.submit( + process_transcription, + whisper_model, + current_chunk, + silence_threshold, + sample_rate + ) + futures = [f for f in futures if not f.done()] + [future] + + except queue.Empty: + continue + except Exception as e: + print(f"[ERROR] Error in audio processing: {e}") + traceback.print_exc() + flush_output() + + # Wait for transcription futures to complete + for future in futures: + try: + future.result() + except Exception as e: + print(f"[ERROR] Error in future result: {e}") + flush_output() + + +def record_audio( + audio_queue: queue.Queue, + stop_event: threading.Event, + sample_rate: int, + channels: int +) -> None: + """ + Record audio from the microphone and put it into the audio queue. + """ + + def audio_callback(indata, frames, time, status): + """Callback function for audio input stream.""" + if not stop_event.is_set(): + audio_queue.put(indata.copy()) + + try: + with sd.InputStream( + samplerate=sample_rate, + channels=channels, + callback=audio_callback + ): + print("[AUDIO] Microphone stream initialized... (Press Ctrl+C to stop)") + print("=" * 50) + flush_output() + stop_event.wait() + except Exception as e: + print(f"[ERROR] Error in audio recording: {e}") + traceback.print_exc() + flush_output() + + +class StandaloneLiveTranscriber: + def __init__(self): + print("[INIT] Starting Standalone Whisper Transcription") + flush_output() + + try: + with open("config.yaml", "r") as f: + config = yaml.safe_load(f) + + print("[CONFIG] Configuration loaded successfully") + flush_output() + + # audio settings + self.sample_rate = config.get("sample_rate", 16000) + self.chunk_duration = config.get("chunk_duration", 4) + self.channels = config.get("channels", 1) + + # processing settings + self.max_workers = config.get("max_workers", 4) + self.silence_threshold = config.get("silence_threshold", 0.001) + self.queue_timeout = config.get("queue_timeout", 1.0) + self.chunk_samples = int(self.sample_rate * self.chunk_duration) + + # model paths + self.encoder_path = config.get("encoder_path", "models/WhisperEncoder.onnx") + self.decoder_path = config.get("decoder_path", "models/WhisperDecoder.onnx") + + # check that the model paths exist + if not os.path.exists(self.encoder_path): + print(f"[ERROR] Encoder model not found at {self.encoder_path}") + flush_output() + sys.exit(f"Encoder model not found at {self.encoder_path}.") + + if not os.path.exists(self.decoder_path): + print(f"[ERROR] Decoder model not found at {self.decoder_path}") + flush_output() + sys.exit(f"Decoder model not found at {self.decoder_path}.") + + print("[FILES] Model files found") + flush_output() + + # initialize the model + print("🤖 Loading Standalone Whisper model...") + flush_output() + + self.model = StandaloneWhisperModel(self.encoder_path, self.decoder_path) + + print("[MODEL] Model loaded successfully!") + flush_output() + + # initialize the audio queue and stop event + self.audio_queue = queue.Queue() + self.stop_event = threading.Event() + + except Exception as e: + print(f"[ERROR] Error during initialization: {e}") + traceback.print_exc() + flush_output() + sys.exit(1) + + def run(self): + """Run the live transcription.""" + + try: + # launch the audio processing and recording threads + process_thread = threading.Thread( + target=process_audio, + args=( + self.model, + self.audio_queue, + self.stop_event, + self.max_workers, + self.queue_timeout, + self.chunk_samples, + self.silence_threshold, + self.sample_rate + ) + ) + process_thread.start() + + record_thread = threading.Thread( + target=record_audio, + args=( + self.audio_queue, + self.stop_event, + self.sample_rate, + self.channels + ) + ) + record_thread.start() + + # wait for threads to finish + try: + while True: + record_thread.join(timeout=0.1) + if not record_thread.is_alive(): + break + except KeyboardInterrupt: + print("\nStopping transcription...") + flush_output() + finally: + self.stop_event.set() + record_thread.join() + process_thread.join() + + except Exception as e: + print(f"[ERROR] Error during execution: {e}") + traceback.print_exc() + flush_output() + + +if __name__ == "__main__": + transcriber = StandaloneLiveTranscriber() + transcriber.run() diff --git a/whisper/__pycache__/standalone_model.cpython-313.pyc b/whisper/__pycache__/standalone_model.cpython-313.pyc new file mode 100644 index 0000000..73105f5 Binary files /dev/null and b/whisper/__pycache__/standalone_model.cpython-313.pyc differ diff --git a/whisper/__pycache__/standalone_whisper.cpython-313.pyc b/whisper/__pycache__/standalone_whisper.cpython-313.pyc new file mode 100644 index 0000000..0c7a1a3 Binary files /dev/null and b/whisper/__pycache__/standalone_whisper.cpython-313.pyc differ diff --git a/whisper/anythingLLM/auth_utils.py b/whisper/anythingLLM/auth_utils.py new file mode 100644 index 0000000..bade62c --- /dev/null +++ b/whisper/anythingLLM/auth_utils.py @@ -0,0 +1,64 @@ +import requests +import yaml +from pathlib import Path + +def auth(api_key: str, base_url: str) -> dict: + """ + Confirms the auth token is valid + + Returns: + dict: {"success": bool, "message": str, "data": dict} + """ + auth_url = base_url + "/auth" + + headers = { + "accept": "application/json", + "Authorization": "Bearer " + api_key + } + + try: + auth_response = requests.get(auth_url, headers=headers) + + if auth_response.status_code == 200: + return { + "success": True, + "message": "Authentication successful", + "data": auth_response.json() + } + else: + return { + "success": False, + "message": f"Authentication failed with status {auth_response.status_code}", + "data": auth_response.json() if auth_response.text else {} + } + except Exception as e: + return { + "success": False, + "message": f"Authentication error: {str(e)}", + "data": {} + } + +def test_auth(config_path=None): + """Test authentication with config file""" + if config_path is None: + config_path = Path(__file__).parent / "config.yaml" + + try: + with open(config_path, "r") as file: + config = yaml.safe_load(file) + + api_key = config["api_key"] + base_url = config["model_server_base_url"] + + return auth(api_key, base_url) + + except Exception as e: + return { + "success": False, + "message": f"Config error: {str(e)}", + "data": {} + } + +if __name__ == "__main__": + result = test_auth() + print(result) \ No newline at end of file diff --git a/whisper/anythingLLM/chatbot_client.py b/whisper/anythingLLM/chatbot_client.py new file mode 100644 index 0000000..bf13387 --- /dev/null +++ b/whisper/anythingLLM/chatbot_client.py @@ -0,0 +1,260 @@ +import requests +import yaml +import json +import sys +import asyncio +import httpx +from pathlib import Path + +class ChatbotClient: + def __init__(self, config_path=None): + if config_path is None: + config_path = Path(__file__).parent / "config.yaml" + + with open(config_path, "r") as file: + config = yaml.safe_load(file) + + self.api_key = config["api_key"] + self.base_url = config["model_server_base_url"] + self.stream = config["stream"] + self.stream_timeout = config["stream_timeout"] + self.workspace_slug = config["workspace_slug"] + + if self.stream: + self.chat_url = f"{self.base_url}/v1/workspace/{self.workspace_slug}/stream-chat" + else: + self.chat_url = f"{self.base_url}/v1/workspace/{self.workspace_slug}/chat" + + self.headers = { + "accept": "application/json", + "Content-Type": "application/json", + "Authorization": "Bearer " + self.api_key + } + + # Live transcript file path for immediate context + self.live_transcript_file = None + self.max_context_chars = 8000 # Limit context to prevent token overflow + + def set_live_transcript_file(self, file_path: str): + """Set the live transcript file path for immediate context""" + self.live_transcript_file = file_path + + def _get_live_context(self) -> str: + """Read live transcript file for immediate context""" + if not self.live_transcript_file: + return "" + + try: + with open(self.live_transcript_file, 'r', encoding='utf-8') as f: + content = f.read() + + # Limit context size to prevent token overflow + if len(content) > self.max_context_chars: + lines = content.split('\n') + # Keep the header and recent lines + header_lines = [] + content_lines = [] + in_header = True + + for line in lines: + if in_header and (line.startswith('=') or not line.strip()): + header_lines.append(line) + if line.startswith('='): + in_header = False + else: + content_lines.append(line) + + # Take recent transcript lines that fit within limit + header_text = '\n'.join(header_lines) + remaining_chars = self.max_context_chars - len(header_text) + + recent_lines = [] + char_count = 0 + for line in reversed(content_lines): + if char_count + len(line) + 1 <= remaining_chars: + recent_lines.insert(0, line) + char_count += len(line) + 1 + else: + break + + content = header_text + '\n' + '\n'.join(recent_lines) + + return content + except Exception as e: + print(f"Error reading live transcript: {e}") + return "" + + def chat(self, message: str) -> str: + """ + Send a chat request in non-streaming mode. + Uses live transcript for immediate context. + """ + # Get live context + live_context = self._get_live_context() + + # Enhance message with context if available + if live_context.strip(): + enhanced_message = f"""Meeting Context (Live Transcript): +{live_context} + +User Question: {message}""" + else: + enhanced_message = message + + data = { + "message": enhanced_message, + "mode": "query" + } + try: + chat_response = requests.post( + self.chat_url, + headers=self.headers, + json=data + ) + response_text = chat_response.text.strip() + + # Handle streaming response format + if response_text.startswith('data: '): + response_text = response_text[6:].strip() + + response_data = json.loads(response_text) + return response_data.get('textResponse', str(response_data)) + except ValueError: + return f"Response is not valid JSON. Raw response: {chat_response.text if 'chat_response' in locals() else 'No response'}" + except Exception as e: + return f"Chat request failed. Error: {e}" + + def chat_with_rag(self, message: str) -> str: + """ + Send a chat request using only AnythingLLM's RAG capabilities. + This uses embedded documents for semantic search. + """ + data = { + "message": message, + "mode": "query" + } + try: + chat_response = requests.post( + self.chat_url, + headers=self.headers, + json=data + ) + response_text = chat_response.text.strip() + + # Handle streaming response format + if response_text.startswith('data: '): + response_text = response_text[6:].strip() + + response_data = json.loads(response_text) + return response_data.get('textResponse', str(response_data)) + except ValueError: + return f"Response is not valid JSON. Raw response: {chat_response.text if 'chat_response' in locals() else 'No response'}" + except Exception as e: + return f"Chat request failed. Error: {e}" + + def streaming_chat(self, message: str): + """ + Generator for streaming chat responses + """ + data = { + "message": message, + "mode": "query" + } + + response_text = "" + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + + async def async_stream(): + buffer = "" + try: + async with httpx.AsyncClient(timeout=self.stream_timeout) as client: + async with client.stream("POST", self.chat_url, headers=self.headers, json=data) as response: + async for chunk in response.aiter_text(): + if chunk: + buffer += chunk + while "\n" in buffer: + line, buffer = buffer.split("\n", 1) + if line.startswith("data: "): + line = line[len("data: "):] + try: + parsed_chunk = json.loads(line.strip()) + yield parsed_chunk.get("textResponse", "") + except json.JSONDecodeError: + continue + except Exception as e: + yield f"Error processing chunk: {e}" + except httpx.RequestError as e: + yield f"Streaming chat request failed. Error: {e}" + + agen = async_stream() + try: + while True: + chunk = loop.run_until_complete(agen.__anext__()) + response_text += chunk + yield response_text + except StopAsyncIteration: + pass + finally: + loop.close() + yield response_text + +def main(): + """CLI interface for testing""" + client = ChatbotClient() + + try: + while True: + line = input().strip() + if not line: + continue + + if line.lower() == 'quit': + break + + # Parse JSON input + try: + data = json.loads(line) + command = data.get('command') + message = data.get('message') + transcript_file = data.get('transcript_file') + + # Set live transcript file if provided + if transcript_file: + client.set_live_transcript_file(transcript_file) + + if command == 'chat': + response = client.chat(message) + print(json.dumps({"type": "response", "data": response})) + sys.stdout.flush() + + elif command == 'chat_rag': + response = client.chat_with_rag(message) + print(json.dumps({"type": "response", "data": response})) + sys.stdout.flush() + + elif command == 'stream': + print(json.dumps({"type": "stream_start"})) + sys.stdout.flush() + + for chunk in client.streaming_chat(message): + print(json.dumps({"type": "stream_chunk", "data": chunk})) + sys.stdout.flush() + + print(json.dumps({"type": "stream_end"})) + sys.stdout.flush() + + except json.JSONDecodeError: + print(json.dumps({"type": "error", "data": "Invalid JSON input"})) + sys.stdout.flush() + except Exception as e: + print(json.dumps({"type": "error", "data": str(e)})) + sys.stdout.flush() + + except KeyboardInterrupt: + pass + except EOFError: + pass + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/whisper/anythingLLM/config.yaml b/whisper/anythingLLM/config.yaml new file mode 100644 index 0000000..1adcc3a --- /dev/null +++ b/whisper/anythingLLM/config.yaml @@ -0,0 +1,8 @@ +# AnythingLLM Configuration +# Copy this from your AnythingLLM build and update with your settings + +api_key: "EVKQEJK-81B40VM-KXZTMPV-GSJQFWT" +model_server_base_url: "http://localhost:3001/api" +workspace_slug: "testing123" +stream: true +stream_timeout: 30 \ No newline at end of file diff --git a/whisper/anythingLLM/workspace_utils.py b/whisper/anythingLLM/workspace_utils.py new file mode 100644 index 0000000..0cc4c17 --- /dev/null +++ b/whisper/anythingLLM/workspace_utils.py @@ -0,0 +1,65 @@ +import requests +import yaml +from pathlib import Path + +def workspaces(api_key: str, base_url: str) -> dict: + """ + Get available workspaces info + + Returns: + dict: {"success": bool, "message": str, "data": list} + """ + workspaces_url = base_url + "/workspaces" + + headers = { + "accept": "application/json", + "Content-Type": "application/json", + "Authorization": "Bearer " + api_key + } + + try: + workspaces_response = requests.get(workspaces_url, headers=headers) + + if workspaces_response.status_code == 200: + return { + "success": True, + "message": "Workspaces retrieved successfully", + "data": workspaces_response.json() + } + else: + return { + "success": False, + "message": f"Failed to get workspaces with status {workspaces_response.status_code}", + "data": workspaces_response.json() if workspaces_response.text else {} + } + except Exception as e: + return { + "success": False, + "message": f"Workspaces error: {str(e)}", + "data": {} + } + +def get_workspaces(config_path=None): + """Get workspaces with config file""" + if config_path is None: + config_path = Path(__file__).parent / "config.yaml" + + try: + with open(config_path, "r") as file: + config = yaml.safe_load(file) + + api_key = config["api_key"] + base_url = config["model_server_base_url"] + + return workspaces(api_key, base_url) + + except Exception as e: + return { + "success": False, + "message": f"Config error: {str(e)}", + "data": {} + } + +if __name__ == "__main__": + result = get_workspaces() + print(result) \ No newline at end of file diff --git a/whisper/config.yaml b/whisper/config.yaml new file mode 100644 index 0000000..61e5f8d --- /dev/null +++ b/whisper/config.yaml @@ -0,0 +1,13 @@ +# audio settings +sample_rate: 16000 # Audio sample rate in Hz +chunk_duration: 4 # Duration of each audio chunk in seconds +channels: 1 # Number of audio channels (1 for mono) + +# processing settings +max_workers: 4 # Number of parallel transcription workers +silence_threshold: 0.001 # Threshold for silence detection +queue_timeout: 1.0 # Timeout for audio queue operations + +# model paths +encoder_path: "models/WhisperEncoder.onnx" +decoder_path: "models/WhisperDecoder.onnx" \ No newline at end of file diff --git a/whisper/meeting_transcriber.py b/whisper/meeting_transcriber.py new file mode 100644 index 0000000..6cdfaed --- /dev/null +++ b/whisper/meeting_transcriber.py @@ -0,0 +1,451 @@ +import numpy as np +import os +import queue +import sys +import threading +import time +import yaml +import requests +import json +from datetime import datetime +from concurrent.futures import ThreadPoolExecutor +from pathlib import Path +from dataclasses import dataclass +from typing import List, Optional, Callable + +# Add chatbot path for AnythingLLM client +chatbot_path = Path("C:/Users/qc_de/local/simple-npu-chatbot/src") +if str(chatbot_path) not in sys.path: + sys.path.insert(0, str(chatbot_path)) + +# Import existing components +from standalone_model import StandaloneWhisperModel + + +@dataclass +class TranscriptSegment: + """A segment of transcribed text with timestamp""" + text: str + timestamp: datetime + confidence: float = 0.0 + + +class AnythingLLMClient: + """Client for AnythingLLM API with document upload capabilities""" + + def __init__(self, config_path: str = None): + config_path = config_path or str(chatbot_path / "config.yaml") + + with open(config_path, "r") as f: + config = yaml.safe_load(f) + + self.api_key = config["api_key"] + self.base_url = config["model_server_base_url"] + self.workspace_slug = config["workspace_slug"] + + self.headers = { + "accept": "application/json", + "Authorization": f"Bearer {self.api_key}" + } + + self.chat_headers = { + **self.headers, + "Content-Type": "application/json" + } + + print(f"Connected to AnythingLLM workspace: {self.workspace_slug}") + + def upload_transcript_document(self, content: str, filename: str) -> bool: + """Upload transcript content as a document to AnythingLLM""" + try: + # Create temporary file for upload + temp_file_path = f"temp_{filename}.txt" + with open(temp_file_path, 'w', encoding='utf-8') as f: + f.write(content) + + # Upload file to AnythingLLM + upload_url = f"{self.base_url}/document/upload" + + with open(temp_file_path, 'rb') as f: + files = {'file': (filename + '.txt', f, 'text/plain')} + response = requests.post(upload_url, headers=self.headers, files=files) + + # Clean up temp file + os.remove(temp_file_path) + + if response.status_code == 200: + result = response.json() + print(f"[UPLOAD] Document uploaded: {filename}") + + # Add document to workspace + self.add_document_to_workspace(result.get('location')) + return True + else: + print(f"[ERROR] Upload failed: {response.status_code}") + return False + + except Exception as e: + print(f"[ERROR] Document upload error: {e}") + return False + + def add_document_to_workspace(self, document_location: str) -> bool: + """Add an uploaded document to the workspace""" + try: + add_url = f"{self.base_url}/workspace/{self.workspace_slug}/update-embeddings" + + data = { + "adds": [document_location] + } + + response = requests.post(add_url, headers=self.chat_headers, json=data) + + if response.status_code == 200: + print(f"[SUCCESS] Document added to workspace") + return True + else: + print(f"[ERROR] Failed to add to workspace: {response.status_code}") + return False + + except Exception as e: + print(f"[ERROR] Add to workspace error: {e}") + return False + + def chat(self, message: str, session_id: str = None) -> str: + """Send a chat message to AnythingLLM""" + try: + chat_url = f"{self.base_url}/workspace/{self.workspace_slug}/chat" + + data = { + "message": message, + "mode": "chat", + "sessionId": session_id or f"meeting-{int(time.time())}", + "attachments": [] + } + + response = requests.post(chat_url, headers=self.chat_headers, json=data) + + if response.status_code == 200: + return response.json().get('textResponse', 'No response received') + else: + return f"Error: HTTP {response.status_code}" + + except Exception as e: + return f"Chat error: {e}" + + +class MeetingTranscriber: + """Main class for live meeting transcription with AnythingLLM integration""" + + def __init__(self, whisper_config_path: str = "config.yaml", llm_config_path: str = None): + self.session_id = f"meeting_{datetime.now().strftime('%Y%m%d_%H%M%S')}" + + # Load whisper configuration + self._load_whisper_config(whisper_config_path) + + # Initialize components + self.whisper_model = StandaloneWhisperModel(self.encoder_path, self.decoder_path) + self.llm_client = AnythingLLMClient(llm_config_path) + + # Audio processing + self.audio_queue = queue.Queue() + self.stop_event = threading.Event() + + # Transcript management + self.transcript_segments: List[TranscriptSegment] = [] + self.transcript_buffer = [] + self.buffer_size = 10 # Segments to buffer before uploading + + # Live transcript file for real-time access + self.live_transcript_file = f"../meetingnotes_{datetime.now().strftime('%Y-%m-%dT%H-%M-%S-%f')[:-3]}Z.txt" + self.transcript_lock = threading.Lock() + + # Initialize live transcript file + with open(self.live_transcript_file, 'w', encoding='utf-8') as f: + f.write(f"Meeting Session: {self.session_id}\n") + f.write(f"Started: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n") + f.write("=" * 60 + "\n\n") + + # Callbacks + self.on_transcript_callback: Optional[Callable[[str], None]] = None + + print(f"[INIT] Meeting Transcriber initialized (Session: {self.session_id})") + print(f"[TRANSCRIPT] Live transcript: {self.live_transcript_file}") + + def _load_whisper_config(self, config_path: str): + """Load whisper configuration""" + with open(config_path, "r") as f: + config = yaml.safe_load(f) + + self.sample_rate = config.get("sample_rate", 16000) + self.chunk_duration = config.get("chunk_duration", 4) + self.channels = config.get("channels", 1) + self.max_workers = config.get("max_workers", 4) + self.silence_threshold = config.get("silence_threshold", 0.001) + self.queue_timeout = config.get("queue_timeout", 1.0) + self.chunk_samples = int(self.sample_rate * self.chunk_duration) + + self.encoder_path = config.get("encoder_path", "models/WhisperEncoder.onnx") + self.decoder_path = config.get("decoder_path", "models/WhisperDecoder.onnx") + + # Verify model files + for path in [self.encoder_path, self.decoder_path]: + if not os.path.exists(path): + raise FileNotFoundError(f"Model file not found: {path}") + + def set_transcript_callback(self, callback: Callable[[str], None]): + """Set callback for real-time transcript updates""" + self.on_transcript_callback = callback + + def start_meeting(self): + """Start the meeting transcription""" + print("[START] Starting meeting transcription...") + + # Start threads + record_thread = threading.Thread(target=self._record_audio, daemon=True) + process_thread = threading.Thread(target=self._process_audio, daemon=True) + + record_thread.start() + process_thread.start() + + return record_thread, process_thread + + def stop_meeting(self): + """Stop transcription and finalize meeting""" + print("[STOP] Stopping meeting...") + self.stop_event.set() + + # Upload any remaining transcript buffer + if self.transcript_buffer: + self._upload_transcript_batch() + + # Upload full meeting transcript + self._upload_final_transcript() + + def ask_question(self, question: str) -> str: + """Ask a question about the meeting using AnythingLLM's RAG""" + return self.llm_client.chat(question, self.session_id) + + def generate_meeting_notes(self) -> str: + """Generate meeting notes using AnythingLLM""" + prompt = """ + Based on our meeting conversation, please generate comprehensive meeting notes including: + 1. Key discussion points and topics covered + 2. Important decisions made + 3. Action items and next steps + 4. Main takeaways and conclusions + + Format the response as structured meeting notes. + """ + return self.llm_client.chat(prompt, self.session_id) + + def _record_audio(self): + """Record audio from microphone""" + import sounddevice as sd + + def audio_callback(indata, frames, time, status): + if not self.stop_event.is_set(): + self.audio_queue.put(indata.copy()) + + try: + with sd.InputStream( + samplerate=self.sample_rate, + channels=self.channels, + callback=audio_callback + ): + print("[AUDIO] Microphone active - recording started") + self.stop_event.wait() + except Exception as e: + print(f"[ERROR] Audio recording error: {e}") + + def _process_audio(self): + """Process audio chunks and generate transcripts""" + buffer = np.empty((0,), dtype=np.float32) + + with ThreadPoolExecutor(max_workers=self.max_workers) as executor: + while not self.stop_event.is_set(): + try: + audio_chunk = self.audio_queue.get(timeout=self.queue_timeout) + audio_chunk = audio_chunk.flatten() + buffer = np.concatenate([buffer, audio_chunk]) + + while len(buffer) >= self.chunk_samples: + current_chunk = buffer[:self.chunk_samples] + buffer = buffer[self.chunk_samples:] + + executor.submit(self._transcribe_chunk, current_chunk) + + except queue.Empty: + continue + except Exception as e: + print(f"[ERROR] Audio processing error: {e}") + + def _transcribe_chunk(self, audio_chunk: np.ndarray): + """Transcribe a single audio chunk""" + try: + if np.abs(audio_chunk).mean() > self.silence_threshold: + transcript_text = self.whisper_model.transcribe(audio_chunk, self.sample_rate) + + if transcript_text.strip(): + segment = TranscriptSegment( + text=transcript_text.strip(), + timestamp=datetime.now() + ) + + # Add to segments and buffer + self.transcript_segments.append(segment) + self.transcript_buffer.append(segment) + + # Write to live transcript file immediately + self._write_to_live_transcript(segment) + + # Output for Node.js integration + timestamp_str = segment.timestamp.strftime('%H:%M:%S') + transcript_data = { + "timestamp": timestamp_str, + "text": segment.text, + "type": "transcript", + "transcriptFile": os.path.abspath(self.live_transcript_file) + } + + # Output JSON for Node.js + print(json.dumps(transcript_data)) + sys.stdout.flush() + + # Call callback + if self.on_transcript_callback: + self.on_transcript_callback(f"[{timestamp_str}]: {segment.text}") + + # Upload batch if buffer is full + if len(self.transcript_buffer) >= self.buffer_size: + self._upload_transcript_batch() + + except Exception as e: + error_data = { + "type": "error", + "error": str(e), + "timestamp": datetime.now().strftime('%H:%M:%S') + } + print(json.dumps(error_data)) + sys.stdout.flush() + + def _write_to_live_transcript(self, segment: TranscriptSegment): + """Write segment to live transcript file immediately""" + try: + with self.transcript_lock: + with open(self.live_transcript_file, 'a', encoding='utf-8') as f: + timestamp_str = segment.timestamp.strftime('%H:%M:%S') + f.write(f"[{timestamp_str}]: {segment.text}\n") + f.flush() # Ensure immediate write to disk + except Exception as e: + print(f"[ERROR] Live transcript write error: {e}") + + def get_live_transcript_path(self): + """Get the path to the live transcript file""" + return self.live_transcript_file + + def _upload_transcript_batch(self): + """Upload current transcript buffer to AnythingLLM""" + if not self.transcript_buffer: + return + + try: + # Create batch content + batch_content = f"Meeting Transcript Batch - {datetime.now().strftime('%H:%M:%S')}\n" + batch_content += "=" * 60 + "\n\n" + + for segment in self.transcript_buffer: + timestamp_str = segment.timestamp.strftime('%H:%M:%S') + batch_content += f"[{timestamp_str}]: {segment.text}\n" + + # Upload to AnythingLLM + batch_filename = f"{self.session_id}_batch_{len(self.transcript_segments)}" + success = self.llm_client.upload_transcript_document(batch_content, batch_filename) + + if success: + print(f"[UPLOAD] Uploaded batch with {len(self.transcript_buffer)} segments") + + # Clear buffer + self.transcript_buffer = [] + + except Exception as e: + print(f"[ERROR] Batch upload error: {e}") + + def _upload_final_transcript(self): + """Upload complete meeting transcript""" + if not self.transcript_segments: + return + + try: + # Create full transcript + full_content = f"Complete Meeting Transcript - {self.session_id}\n" + full_content += f"Meeting Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n" + full_content += f"Duration: {len(self.transcript_segments)} segments\n" + full_content += "=" * 80 + "\n\n" + + for segment in self.transcript_segments: + timestamp_str = segment.timestamp.strftime('%H:%M:%S') + full_content += f"[{timestamp_str}]: {segment.text}\n" + + # Upload final transcript + filename = f"{self.session_id}_complete_transcript" + success = self.llm_client.upload_transcript_document(full_content, filename) + + if success: + print(f"[UPLOAD] Final transcript uploaded with {len(self.transcript_segments)} segments") + + except Exception as e: + print(f"[ERROR] Final transcript upload error: {e}") + + +def main(): + """Main function to run the meeting transcriber""" + transcriber = MeetingTranscriber() + + try: + # Start meeting + threads = transcriber.start_meeting() + + print("\n" + "="*70) + print("[ACTIVE] ANYTHINGLLM MEETING TRANSCRIBER ACTIVE") + print(" • Live transcription streaming to your workspace") + print(" • RAG-powered context available for questions") + print(" • Type 'ask: ' to query meeting content") + print(" • Type 'notes' to generate meeting notes") + print(" • Press Ctrl+C to stop and finalize") + print("="*70 + "\n") + + # Interactive loop + while True: + try: + user_input = input().strip() + + if user_input.lower().startswith('ask:'): + question = user_input[4:].strip() + if question: + print(f"\n[QUESTION] Question: {question}") + answer = transcriber.ask_question(question) + print(f"[ANSWER] Answer: {answer}\n") + + elif user_input.lower() == 'notes': + print("\n[NOTES] Generating meeting notes...") + notes = transcriber.generate_meeting_notes() + print("[NOTES] Meeting Notes:") + print("-" * 50) + print(notes) + print("-" * 50 + "\n") + + except KeyboardInterrupt: + break + + except KeyboardInterrupt: + pass + finally: + # Stop and finalize + transcriber.stop_meeting() + print(f"\n[COMPLETE] Meeting session {transcriber.session_id} completed") + print("[COMPLETE] All transcripts have been uploaded to your AnythingLLM workspace") + print("[COMPLETE] You can now ask questions about this meeting anytime!") + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/whisper/mel_filters.npz b/whisper/mel_filters.npz new file mode 100644 index 0000000..c75d33a Binary files /dev/null and b/whisper/mel_filters.npz differ diff --git a/whisper/requirements.txt b/whisper/requirements.txt new file mode 100644 index 0000000..c018d07 --- /dev/null +++ b/whisper/requirements.txt @@ -0,0 +1,102 @@ +aiohappyeyeballs==2.6.1 +aiohttp==3.11.14 +aiosignal==1.3.2 +asttokens==3.0.0 +attrs==25.3.0 +audio2numpy==0.1.2 +backcall==0.2.0 +backoff==2.2.1 +beautifulsoup4==4.13.3 +botocore==1.37.23 +certifi==2025.1.31 +cffi==1.17.1 +charset-normalizer==3.4.1 +colorama==0.4.6 +coloredlogs==15.0.1 +contextlib2==21.6.0 +datasets==2.14.5 +decorator==5.2.1 +deprecation==2.1.0 +dill==0.3.7 +executing==2.2.0 +ffmpeg==1.4 +filelock==3.18.0 +flatbuffers==25.2.10 +frozenlist==1.5.0 +fsspec==2023.6.0 +gdown==4.7.1 +gitdb==4.0.12 +GitPython==3.1.42 +h5py==3.13.0 +huggingface-hub==0.23.5 +humanfriendly==10.0 +idna==3.10 +ipython==8.12.3 +jedi==0.19.2 +Jinja2==3.1.6 +jmespath==1.0.1 +llvmlite==0.44.0 +MarkupSafe==3.0.2 +matplotlib-inline==0.1.7 +more-itertools==10.6.0 +mpmath==1.3.0 +multidict==6.2.0 +multiprocess==0.70.15 +networkx==3.4.2 +numba==0.61.0 +numpy==1.26.4 +onnx==1.16.2 +onnxruntime==1.21.0 +onnxruntime-qnn==1.21.0 +openai-whisper==20231117 +opencv-python==4.11.0.86 +packaging==24.2 +pandas==2.2.3 +parso==0.8.4 +pickleshare==0.7.5 +pillow==11.1.0 +prettytable==3.11.0 +prompt_toolkit==3.0.50 +propcache==0.3.1 +protobuf==3.20.3 +pure_eval==0.2.3 +pyarrow==19.0.1 +pycparser==2.22 +Pygments==2.19.1 +pyreadline3==3.5.4 +PySocks==1.7.1 +python-dateutil==2.9.0.post0 +pytz==2025.2 +PyYAML==6.0.2 +qai-hub==0.26.0 +qai-hub-models==0.25.5 +regex==2024.11.6 +requests==2.32.3 +requests-toolbelt==1.0.0 +ruamel.yaml==0.18.10 +ruamel.yaml.clib==0.2.12 +s3transfer==0.10.4 +samplerate==0.2.1 +schema==0.7.5 +scipy==1.15.2 +semver==3.0.4 +six==1.17.0 +smmap==5.0.2 +sounddevice==0.5.1 +soupsieve==2.6 +stack-data==0.6.3 +sympy==1.13.3 +tabulate==0.9.0 +tiktoken==0.9.0 +torch==2.4.1 +torchvision==0.19.1 +tqdm==4.67.1 +traitlets==5.14.3 +typing_extensions==4.13.0 +tzdata==2025.2 +urllib3==2.3.0 +wcwidth==0.2.13 +xxhash==3.5.0 +yarl==1.18.3 +# AnythingLLM dependencies +httpx==0.28.1 \ No newline at end of file diff --git a/whisper/requirements_minimal.txt b/whisper/requirements_minimal.txt new file mode 100644 index 0000000..3e6f431 --- /dev/null +++ b/whisper/requirements_minimal.txt @@ -0,0 +1,4 @@ +numpy==1.26.4 +sounddevice==0.5.1 +onnxruntime==1.21.0 +PyYAML==6.0.2 \ No newline at end of file diff --git a/whisper/standalone_model.py b/whisper/standalone_model.py new file mode 100644 index 0000000..31c9b55 --- /dev/null +++ b/whisper/standalone_model.py @@ -0,0 +1,142 @@ +# --------------------------------------------------------------------- +# Standalone ONNX model wrapper without AI Hub dependencies +# --------------------------------------------------------------------- +import numpy as np +import onnxruntime +import sys +import os + +# Add src directory to path for imports +current_dir = os.path.dirname(os.path.abspath(__file__)) +if current_dir not in sys.path: + sys.path.insert(0, current_dir) + +# Handle imports for both direct Python execution and PyInstaller +try: + from standalone_whisper import StandaloneWhisperApp, TorchNumpyAdapter +except ImportError: + from .standalone_whisper import StandaloneWhisperApp, TorchNumpyAdapter + + +def get_onnx_session_with_fallback(path): + """ + Create ONNX Runtime session with QNN provider fallback to CPU. + More robust for PyInstaller executables. + """ + options = onnxruntime.SessionOptions() + + # First, try QNN provider (for Snapdragon X Elite optimization) + try: + session = onnxruntime.InferenceSession( + path, + sess_options=options, + providers=["QNNExecutionProvider"], + provider_options=[ + { + "backend_path": "QnnHtp.dll", + "htp_performance_mode": "burst", + "high_power_saver": "sustained_high_performance", + "enable_htp_fp16_precision": "1", + "htp_graph_finalization_optimization_mode": "3", + } + ], + ) + return session + except Exception as e: + # Fall back to CPU provider silently + pass + + # Fall back to CPU provider + try: + session = onnxruntime.InferenceSession( + path, + sess_options=options, + providers=["CPUExecutionProvider"], + ) + return session + except Exception as e: + print(f"[ERROR] Failed to load {os.path.basename(path)}: {str(e)}") + sys.stdout.flush() + raise e + + +class StandaloneONNXEncoder: + """Standalone ONNX encoder wrapper""" + def __init__(self, encoder_path): + self.session = get_onnx_session_with_fallback(encoder_path) + + def __call__(self, audio): + try: + # Uncomment the line below for debugging + # print(f"🔍 Encoder input shape: {audio.shape}, dtype: {audio.dtype}") + # sys.stdout.flush() + return self.session.run(None, {"audio": audio}) + except Exception as e: + print(f"[ERROR] Error in encoder inference: {e}") + print(f" Input type: {type(audio)}") + print(f" Input shape: {getattr(audio, 'shape', 'No shape attr')}") + print(f" Input dtype: {getattr(audio, 'dtype', 'No dtype attr')}") + sys.stdout.flush() + raise + + +class StandaloneONNXDecoder: + """Standalone ONNX decoder wrapper""" + def __init__(self, decoder_path): + self.session = get_onnx_session_with_fallback(decoder_path) + + def __call__(self, x, index, k_cache_cross, v_cache_cross, k_cache_self, v_cache_self): + try: + # Convert torch tensors to numpy if needed + if hasattr(index, 'numpy'): + index_np = index.numpy() + else: + index_np = np.array(index) + + return self.session.run( + None, + { + "x": x.astype(np.int32), + "index": index_np.astype(np.int32), + "k_cache_cross": k_cache_cross, + "v_cache_cross": v_cache_cross, + "k_cache_self": k_cache_self, + "v_cache_self": v_cache_self, + }, + ) + except Exception as e: + print(f"[ERROR] Error in decoder inference: {e}") + sys.stdout.flush() + raise + + +class StandaloneWhisperModel: + """ + Standalone Whisper model that works without AI Hub dependencies. + Complete replacement for WhisperBaseEnONNX and WhisperApp. + """ + def __init__(self, encoder_path, decoder_path): + # Create ONNX model wrappers (use directly, no TorchNumpyAdapter needed) + self.encoder = StandaloneONNXEncoder(encoder_path) + self.decoder = StandaloneONNXDecoder(decoder_path) + + # Model parameters for Whisper Base EN + self.num_decoder_blocks = 6 + self.num_decoder_heads = 8 + self.attention_dim = 512 + + def transcribe(self, audio: np.ndarray, sample_rate: int) -> str: + """Transcribe audio to text""" + try: + app = StandaloneWhisperApp( + encoder=self.encoder, + decoder=self.decoder, + num_decoder_blocks=self.num_decoder_blocks, + num_decoder_heads=self.num_decoder_heads, + attention_dim=self.attention_dim, + ) + return app.transcribe(audio, sample_rate) + except Exception as e: + print(f"[ERROR] Error during transcription: {e}") + sys.stdout.flush() + return "" diff --git a/whisper/standalone_whisper.py b/whisper/standalone_whisper.py new file mode 100644 index 0000000..8ac0b04 --- /dev/null +++ b/whisper/standalone_whisper.py @@ -0,0 +1,344 @@ +# --------------------------------------------------------------------- +# Standalone Whisper implementation without AI Hub dependencies +# Based on original Qualcomm AI Hub models but simplified for PyInstaller +# --------------------------------------------------------------------- +import numpy as np +import os +# import samplerate +try: + import samplerate + HAS_SAMPLERATE = True +except ImportError: + HAS_SAMPLERATE = False + print("Warning: samplerate not available, using scipy for resampling") +import torch +import whisper +from scipy import special as scipy_special +from scipy.signal import resample + + +# Whisper constants +SAMPLE_RATE = 16000 +CHUNK_LENGTH = 30 +N_FFT = 400 +HOP_LENGTH = 160 +N_MELS = 80 +N_SAMPLES = CHUNK_LENGTH * SAMPLE_RATE # 480000 +AUDIO_EMB_LEN = int(N_SAMPLES / N_MELS / 4) # 1500 +MELS_AUDIO_LEN = AUDIO_EMB_LEN * 2 # 3000 +MEAN_DECODE_LEN = 224 + +# Token constants +TOKEN_SOT = 50257 # Start of transcript +TOKEN_EOT = 50256 # end of transcript +TOKEN_BLANK = 220 # " " +TOKEN_NO_TIMESTAMP = 50362 +TOKEN_TIMESTAMP_BEGIN = 50363 +TOKEN_NO_SPEECH = 50361 +NO_SPEECH_THR = 0.6 + +# Non-speech tokens to suppress +NON_SPEECH_TOKENS = [ + 1, 2, 7, 8, 9, 10, 14, 25, 26, 27, 28, 29, 31, 58, 59, 60, 61, 62, 63, 90, 91, 92, 93, 357, 366, 438, 532, 685, 705, 796, 930, 1058, 1220, 1267, 1279, 1303, 1343, 1377, 1391, 1635, 1782, 1875, 2162, 2361, 2488, 3467, 4008, 4211, 4600, 4808, 5299, 5855, 6329, 7203, 9609, 9959, 10563, 10786, 11420, 11709, 11907, 13163, 13697, 13700, 14808, 15306, 16410, 16791, 17992, 19203, 19510, 20724, 22305, 22935, 27007, 30109, 30420, 33409, 34949, 40283, 40493, 40549, 47282, 49146, 50257, 50357, 50358, 50359, 50360, 50361, +] + + +class TorchNumpyAdapter: + """Simple adapter to convert between torch tensors and numpy arrays""" + def __init__(self, model): + self.model = model + + def __call__(self, *args, **kwargs): + # Convert numpy inputs to torch tensors + torch_args = [] + for arg in args: + if isinstance(arg, np.ndarray): + torch_args.append(torch.from_numpy(arg)) + else: + torch_args.append(arg) + + torch_kwargs = {} + for key, value in kwargs.items(): + if isinstance(value, np.ndarray): + torch_kwargs[key] = torch.from_numpy(value) + else: + torch_kwargs[key] = value + + # Run the model + with torch.no_grad(): + result = self.model(*torch_args, **torch_kwargs) + + # Convert torch outputs back to numpy + if isinstance(result, torch.Tensor): + return result.detach().numpy() + elif isinstance(result, (list, tuple)): + return [r.detach().numpy() if isinstance(r, torch.Tensor) else r for r in result] + else: + return result + + +class StandaloneWhisperApp: + """ + Standalone WhisperApp that works without AI Hub dependencies. + Compatible with PyInstaller executables. + """ + + def __init__( + self, + encoder, + decoder, + num_decoder_blocks: int, + num_decoder_heads: int, + attention_dim: int, + mel_filter: np.ndarray | None = None, + sample_rate: int = SAMPLE_RATE, + max_audio_seconds: int = CHUNK_LENGTH, + n_fft: int = N_FFT, + hop_length: int = HOP_LENGTH, + ): + self.encoder = encoder + self.decoder = decoder + self.num_decoder_blocks = num_decoder_blocks + self.num_decoder_heads = num_decoder_heads + self.attention_dim = attention_dim + self.mean_decode_len = MEAN_DECODE_LEN + + # Set audio parameters first (needed for mel filter creation) + self.hop_length = hop_length + self.sample_rate = sample_rate + self.max_audio_seconds = max_audio_seconds + self.n_fft = n_fft + self.max_audio_samples = self.max_audio_seconds * self.sample_rate + + # Create default mel filter if not provided (requires sample_rate to be set) + self.mel_filter = mel_filter + self.mel_filter_loaded = False # Track if we've shown the loading message + if self.mel_filter is None: + self.mel_filter = self._load_or_create_mel_filter() + + def _load_or_create_mel_filter(self): + """Load proper mel filters from mel_filters.npz if available, otherwise create simplified version""" + + # Try to load the proper Whisper mel filters first + try: + # Get path relative to this script's directory + script_dir = os.path.dirname(os.path.abspath(__file__)) + mel_filter_path = os.path.join(script_dir, "mel_filters.npz") + if os.path.exists(mel_filter_path): + # if not self.mel_filter_loaded: + # print("✅ Loading optimized Whisper mel filters from mel_filters.npz") + loaded = np.load(mel_filter_path) + if 'mel_filters' in loaded: + mel_filter_matrix = loaded['mel_filters'] + loaded.close() + if not self.mel_filter_loaded: + # print(f" Loaded mel filter shape: {mel_filter_matrix.shape}") + # print(f" Using high-quality mel filters (same as AI Hub version)") + self.mel_filter_loaded = True + return mel_filter_matrix.astype(np.float32) + else: + loaded.close() + if not self.mel_filter_loaded: + print("⚠ mel_filters.npz found but doesn't contain 'mel_filters' key") + else: + if not self.mel_filter_loaded: + print("⚠ mel_filters.npz not found - using simplified mel filters") + except Exception as e: + if not self.mel_filter_loaded: + print(f"⚠ Error loading mel_filters.npz: {e}") + print("⚠ Falling back to simplified mel filters") + + # Fall back to simplified mel filter creation + self.mel_filter_loaded = True + return self._create_simplified_mel_filter() + + def _create_simplified_mel_filter(self): + """Create a basic mel filter bank""" + # This is a simplified mel filter - for production use, you'd want to load the proper one + # But this works for basic functionality without external dependencies + + # Simple mel filter bank creation + n_mels = N_MELS + fmax = self.sample_rate // 2 + + # Create mel scale + mel_f = 2595 * np.log10(1 + np.linspace(0, fmax, n_mels + 2) / 700) + hz_points = 700 * (10**(mel_f / 2595) - 1) + + # Convert to fft bin numbers + bin_points = np.floor((self.n_fft + 1) * hz_points / self.sample_rate) + + # Create filter bank + fbank = np.zeros([n_mels, self.n_fft // 2 + 1]) + for j in range(0, n_mels): + for i in range(int(bin_points[j]), int(bin_points[j + 1])): + fbank[j, i] = (i - bin_points[j]) / (bin_points[j + 1] - bin_points[j]) + for i in range(int(bin_points[j + 1]), int(bin_points[j + 2])): + fbank[j, i] = (bin_points[j + 2] - i) / (bin_points[j + 2] - bin_points[j + 1]) + + return fbank.astype(np.float32) + + def transcribe( + self, audio: np.ndarray | str, audio_sample_rate: int | None = None + ) -> str: + """ + Transcribe the provided audio to text. + """ + if isinstance(audio, str): + try: + import audio2numpy as a2n + audio, audio_sample_rate = a2n.audio_from_file(audio) + except ImportError: + raise ImportError("audio2numpy required for file input. Install with: pip install audio2numpy") + + assert audio_sample_rate is not None + assert isinstance(audio, np.ndarray) + + return " ".join( + self._transcribe_single_chunk(x) + for x in self._chunk_and_resample_audio(audio, audio_sample_rate) + ) + + def _transcribe_single_chunk(self, audio: np.ndarray) -> str: + """Transcribe an audio chunk to text.""" + mel_input = self._log_mel_spectrogram(audio) + k_cache_cross, v_cache_cross = self.encoder(mel_input) + + # Start decoding + x = np.array([[TOKEN_SOT]]) + decoded_tokens = [TOKEN_SOT] + sample_len = self.mean_decode_len + + k_cache_self = np.zeros(( + self.num_decoder_blocks, + self.num_decoder_heads, + self.attention_dim // self.num_decoder_heads, + sample_len, + )).astype(np.float32) + + v_cache_self = np.zeros(( + self.num_decoder_blocks, + self.num_decoder_heads, + sample_len, + self.attention_dim // self.num_decoder_heads, + )).astype(np.float32) + + for i in range(sample_len): + index = torch.zeros([1, 1], dtype=torch.int32) + index[0, 0] = i + + decoder_out = self.decoder( + x, index, k_cache_cross, v_cache_cross, k_cache_self, v_cache_self + ) + + logits = decoder_out[0] + k_cache_self = decoder_out[1] + v_cache_self = decoder_out[2] + + logits = logits[0, -1] # consider only the last token + + # Apply filters + if i == 0: + logits[[TOKEN_EOT, TOKEN_BLANK]] = -np.inf + logits[NON_SPEECH_TOKENS] = -np.inf + + logits, logprobs = self._apply_timestamp_rules(logits, decoded_tokens) + + if i == 0: + # detect no_speech + no_speech_prob = np.exp(logprobs[TOKEN_NO_SPEECH]) + if no_speech_prob > NO_SPEECH_THR: + break + + next_token = np.argmax(logits) + if next_token == TOKEN_EOT: + break + + x = np.array([[next_token]]) + decoded_tokens.append(int(next_token)) + + # Decode tokens to text + try: + tokenizer = whisper.decoding.get_tokenizer( + multilingual=False, language="en", task="transcribe" + ) + text = tokenizer.decode(decoded_tokens[1:]) # remove TOKEN_SOT + return text.strip() + except Exception as e: + print(f"⚠ Warning: Could not decode tokens properly: {e}") + # Fallback: try to decode with basic method + try: + import tiktoken + encoding = tiktoken.get_encoding("gpt2") + text = encoding.decode(decoded_tokens[1:]) # remove TOKEN_SOT + return text.strip() + except Exception as e2: + print(f"⚠ Warning: Fallback tokenizer also failed: {e2}") + # Last resort: return token count info + return f"[Decoded {len(decoded_tokens)-1} tokens but cannot convert to text]" + + def _log_mel_spectrogram(self, audio_np: np.ndarray) -> np.ndarray: + """Compute the log-Mel spectrogram""" + audio = torch.from_numpy(audio_np) + + padding = self.max_audio_samples - len(audio) + if padding > 0: + audio = torch.nn.functional.pad(audio, (0, padding)) + + window = torch.hann_window(self.n_fft) + stft = torch.stft(audio, self.n_fft, self.hop_length, window=window, return_complex=True) + magnitudes = stft[..., :-1].abs() ** 2 + + mel_spec = torch.from_numpy(self.mel_filter) @ magnitudes + + log_spec = torch.clamp(mel_spec, min=1e-10).log10() + log_spec = torch.maximum(log_spec, log_spec.max() - 8.0) + log_spec = (log_spec + 4.0) / 4.0 + + return log_spec.unsqueeze(0).detach().float().numpy() + + def _chunk_and_resample_audio( + self, + audio: np.ndarray, + audio_sample_rate: int, + ) -> list[np.ndarray]: + """Chunk and resample audio""" + if audio_sample_rate != self.sample_rate: + if HAS_SAMPLERATE: + audio = samplerate.resample(audio, self.sample_rate / audio_sample_rate) + else: + # Use scipy for resampling + target_length = int(len(audio) * self.sample_rate / audio_sample_rate) + audio = resample(audio, target_length) + + number_of_full_length_audio_chunks = ( + audio.shape[0] // self.sample_rate // self.max_audio_seconds + ) + last_sample_in_full_length_audio_chunks = ( + self.sample_rate * number_of_full_length_audio_chunks * self.max_audio_seconds + ) + + if number_of_full_length_audio_chunks == 0: + return [audio] + + return [ + *np.array_split( + audio[:last_sample_in_full_length_audio_chunks], + number_of_full_length_audio_chunks, + ), + audio[last_sample_in_full_length_audio_chunks:], + ] + + def _apply_timestamp_rules( + self, logits: np.ndarray, tokens: list[int] + ) -> tuple[np.ndarray, np.ndarray]: + """Apply timestamp rules during decoding""" + # Simplified timestamp rules - full implementation would be more complex + logits[TOKEN_NO_TIMESTAMP] = -np.inf + + # For simplicity, suppress timestamps at the beginning + if len(tokens) == 1: # Only SOT token + logits[TOKEN_TIMESTAMP_BEGIN:] = -np.inf + + logprobs = scipy_special.log_softmax(logits) + return logits, logprobs diff --git a/whisper/temp_meeting_20250914_085009_batch_24.txt b/whisper/temp_meeting_20250914_085009_batch_24.txt new file mode 100644 index 0000000..e69de29 diff --git a/whisper/transcriber_for_nodejs.py b/whisper/transcriber_for_nodejs.py new file mode 100644 index 0000000..6875219 --- /dev/null +++ b/whisper/transcriber_for_nodejs.py @@ -0,0 +1,348 @@ +import numpy as np +import os +import queue +import sounddevice as sd +import sys +import threading +import yaml +import traceback +import json +import time + +from concurrent.futures import ThreadPoolExecutor + +# Add src directory to path for imports +current_dir = os.path.dirname(os.path.abspath(__file__)) +if current_dir not in sys.path: + sys.path.insert(0, current_dir) + +from standalone_model import StandaloneWhisperModel + + +def flush_output(): + """Force flush stdout and stderr for better console output in executables""" + sys.stdout.flush() + sys.stderr.flush() + + +def process_transcription( + whisper_model: StandaloneWhisperModel, + chunk: np.ndarray, + silence_threshold: float, + sample_rate: int, + full_transcript: list +) -> None: + """ + Process a chunk of audio data and transcribe it using the Whisper model. + This function is run in a separate thread to allow for concurrent processing. + """ + + try: + if np.abs(chunk).mean() > silence_threshold: + transcript = whisper_model.transcribe(chunk, sample_rate) + if transcript.strip(): + timestamp = time.strftime('%H:%M:%S') + transcript_data = { + "timestamp": timestamp, + "text": transcript.strip(), + "type": "transcript" + } + full_transcript.append(transcript_data) + print(json.dumps(transcript_data)) + flush_output() + except Exception as e: + error_data = { + "timestamp": time.strftime('%H:%M:%S'), + "error": str(e), + "type": "error" + } + print(json.dumps(error_data)) + flush_output() + + +def process_audio( + whisper_model: StandaloneWhisperModel, + audio_queue: queue.Queue, + stop_event: threading.Event, + max_workers: int, + queue_timeout: float, + chunk_samples: int, + silence_threshold: float, + sample_rate: int, + full_transcript: list +) -> None: + """ + Process audio data from the queue and transcribe it using the Whisper model. + """ + + buffer = np.empty((0,), dtype=np.float32) + + with ThreadPoolExecutor(max_workers=max_workers) as executor: + futures = [] + + while not stop_event.is_set(): + try: + audio_chunk = audio_queue.get(timeout=queue_timeout) + audio_chunk = audio_chunk.flatten() + buffer = np.concatenate([buffer, audio_chunk]) + + while len(buffer) >= chunk_samples: + current_chunk = buffer[:chunk_samples] + buffer = buffer[chunk_samples:] + + future = executor.submit( + process_transcription, + whisper_model, + current_chunk, + silence_threshold, + sample_rate, + full_transcript + ) + futures = [f for f in futures if not f.done()] + [future] + + except queue.Empty: + continue + except Exception as e: + error_data = { + "timestamp": time.strftime('%H:%M:%S'), + "error": f"Error in audio processing: {e}", + "type": "error" + } + print(json.dumps(error_data)) + flush_output() + + # Wait for transcription futures to complete + for future in futures: + try: + future.result() + except Exception as e: + error_data = { + "timestamp": time.strftime('%H:%M:%S'), + "error": f"Error in future result: {e}", + "type": "error" + } + print(json.dumps(error_data)) + flush_output() + + +def record_audio( + audio_queue: queue.Queue, + stop_event: threading.Event, + sample_rate: int, + channels: int +) -> None: + """ + Record audio from the microphone and put it into the audio queue. + """ + + def audio_callback(indata, frames, time, status): + """Callback function for audio input stream.""" + if not stop_event.is_set(): + audio_queue.put(indata.copy()) + + try: + with sd.InputStream( + samplerate=sample_rate, + channels=channels, + callback=audio_callback + ): + status_data = { + "timestamp": time.strftime('%H:%M:%S'), + "message": "Microphone stream initialized", + "type": "status" + } + print(json.dumps(status_data)) + flush_output() + stop_event.wait() + except Exception as e: + error_data = { + "timestamp": time.strftime('%H:%M:%S'), + "error": f"Error in audio recording: {e}", + "type": "error" + } + print(json.dumps(error_data)) + flush_output() + + +class NodeJSWhisperTranscriber: + def __init__(self): + self.full_transcript = [] + + status_data = { + "timestamp": time.strftime('%H:%M:%S'), + "message": "Starting Whisper Transcription for Node.js", + "type": "status" + } + print(json.dumps(status_data)) + flush_output() + + try: + config_path = os.path.join(current_dir, "config.yaml") + with open(config_path, "r") as f: + config = yaml.safe_load(f) + + status_data = { + "timestamp": time.strftime('%H:%M:%S'), + "message": "Configuration loaded successfully", + "type": "status" + } + print(json.dumps(status_data)) + flush_output() + + # audio settings + self.sample_rate = config.get("sample_rate", 16000) + self.chunk_duration = config.get("chunk_duration", 4) + self.channels = config.get("channels", 1) + + # processing settings + self.max_workers = config.get("max_workers", 4) + self.silence_threshold = config.get("silence_threshold", 0.001) + self.queue_timeout = config.get("queue_timeout", 1.0) + self.chunk_samples = int(self.sample_rate * self.chunk_duration) + + # model paths - resolve relative to script directory + encoder_rel_path = config.get("encoder_path", "models/WhisperEncoder.onnx") + decoder_rel_path = config.get("decoder_path", "models/WhisperDecoder.onnx") + + # Make paths absolute relative to the script's directory + self.encoder_path = os.path.join(current_dir, encoder_rel_path) + self.decoder_path = os.path.join(current_dir, decoder_rel_path) + + # check that the model paths exist + if not os.path.exists(self.encoder_path): + error_data = { + "timestamp": time.strftime('%H:%M:%S'), + "error": f"Encoder model not found at {self.encoder_path}", + "type": "error" + } + print(json.dumps(error_data)) + flush_output() + sys.exit(1) + + if not os.path.exists(self.decoder_path): + error_data = { + "timestamp": time.strftime('%H:%M:%S'), + "error": f"Decoder model not found at {self.decoder_path}", + "type": "error" + } + print(json.dumps(error_data)) + flush_output() + sys.exit(1) + + status_data = { + "timestamp": time.strftime('%H:%M:%S'), + "message": "Model files found", + "type": "status" + } + print(json.dumps(status_data)) + flush_output() + + # initialize the model + status_data = { + "timestamp": time.strftime('%H:%M:%S'), + "message": "Loading Whisper model...", + "type": "status" + } + print(json.dumps(status_data)) + flush_output() + + self.model = StandaloneWhisperModel(self.encoder_path, self.decoder_path) + + status_data = { + "timestamp": time.strftime('%H:%M:%S'), + "message": "Model loaded successfully!", + "type": "status" + } + print(json.dumps(status_data)) + flush_output() + + # initialize the audio queue and stop event + self.audio_queue = queue.Queue() + self.stop_event = threading.Event() + + except Exception as e: + error_data = { + "timestamp": time.strftime('%H:%M:%S'), + "error": f"Error during initialization: {e}", + "type": "error" + } + print(json.dumps(error_data)) + flush_output() + sys.exit(1) + + def run(self): + """Run the live transcription.""" + + try: + # launch the audio processing and recording threads + process_thread = threading.Thread( + target=process_audio, + args=( + self.model, + self.audio_queue, + self.stop_event, + self.max_workers, + self.queue_timeout, + self.chunk_samples, + self.silence_threshold, + self.sample_rate, + self.full_transcript + ) + ) + process_thread.start() + + record_thread = threading.Thread( + target=record_audio, + args=( + self.audio_queue, + self.stop_event, + self.sample_rate, + self.channels + ) + ) + record_thread.start() + + # wait for threads to finish + try: + while True: + record_thread.join(timeout=0.1) + if not record_thread.is_alive(): + break + except KeyboardInterrupt: + status_data = { + "timestamp": time.strftime('%H:%M:%S'), + "message": "Stopping transcription...", + "type": "status" + } + print(json.dumps(status_data)) + flush_output() + finally: + self.stop_event.set() + record_thread.join() + process_thread.join() + + except Exception as e: + error_data = { + "timestamp": time.strftime('%H:%M:%S'), + "error": f"Error during execution: {e}", + "type": "error" + } + print(json.dumps(error_data)) + flush_output() + + def get_full_transcript(self): + """Return the full transcript as formatted text.""" + transcript_text = "" + for entry in self.full_transcript: + if entry["type"] == "transcript": + transcript_text += f"[{entry['timestamp']}] {entry['text']}\n" + return transcript_text + + def stop_transcription(self): + """Stop the transcription process.""" + self.stop_event.set() + + +if __name__ == "__main__": + transcriber = NodeJSWhisperTranscriber() + transcriber.run() \ No newline at end of file