diff --git a/.claude/settings.local.json b/.claude/settings.local.json new file mode 100644 index 0000000..f584e74 --- /dev/null +++ b/.claude/settings.local.json @@ -0,0 +1,14 @@ +{ + "permissions": { + "allow": [ + "Bash(npm install)", + "Bash(npm install:*)", + "Bash(if exist node_modules rmdir /s /q node_modules)", + "Bash(if exist package-lock.json del package-lock.json)", + "Bash(powershell:*)", + "Read(//c/Users/qc_de/simple-whisper-transcription/**)" + ], + "deny": [], + "ask": [] + } +} \ No newline at end of file diff --git a/.gitignore b/.gitignore index 28a60e1..81ef93a 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,5 @@ node_modules -.cursor \ No newline at end of file +.cursor +*.onnx +whisper/models +models \ No newline at end of file diff --git a/WHISPER_INTEGRATION.md b/WHISPER_INTEGRATION.md new file mode 100644 index 0000000..6d61e15 --- /dev/null +++ b/WHISPER_INTEGRATION.md @@ -0,0 +1,141 @@ +# Whisper AI Integration for ScrumAI + +This integration adds real-time speech transcription to the ScrumAI application using OpenAI's Whisper model. + +## Features + +- **Real-time Speech Transcription**: Converts speech to text in real-time during meetings +- **Live Display**: Transcripts appear instantly in the "Transcript" tab +- **Automatic Saving**: When a meeting ends, transcripts are automatically saved as `meetingnotes_[timestamp].txt` to your home directory +- **Keyword Extraction**: Automatically extracts keywords from the transcript for the Keywords tab +- **Cross-platform**: Works on Windows, macOS, and Linux + +## Setup Instructions + +### 1. Install Python Dependencies + +Run the setup script to create a Python virtual environment and install dependencies: + +```bash +setup_whisper.bat +``` + +This will: +- Create a Python virtual environment (`whisper_env`) +- Install required packages (numpy, sounddevice, onnxruntime, PyYAML) +- Set up the Whisper models + +### 2. Verify Installation + +The application will automatically detect if the required files are present: +- Python executable in `whisper_env` or system PATH +- Whisper model files in `whisper/models/` +- Configuration file at `whisper/config.yaml` + +### 3. Run the Application + +Start the ScrumAI application as usual: + +```bash +npm start +``` + +## How to Use + +1. **Start Meeting**: Click the "Start Meeting" button + - This will initialize the Whisper transcription service + - A microphone permission dialog may appear - grant permission + - You'll see status messages in the console + +2. **Begin Speaking**: Start talking normally + - Real-time transcripts will appear in the "Transcript" tab + - Keywords will be automatically extracted and shown in the "Keywords" tab + - Timestamps are added to each transcript entry + +3. **Stop Meeting**: Click the "Stop Meeting" button + - This stops the transcription + - Automatically saves the full transcript as `meetingnotes_[timestamp].txt` in your home directory + - Shows a confirmation dialog with the saved file location + +## File Structure + +``` +scrumAI/ +├── whisper/ +│ ├── transcriber_for_nodejs.py # Main transcription script +│ ├── standalone_model.py # Whisper model wrapper +│ ├── standalone_whisper.py # Whisper implementation +│ ├── config.yaml # Configuration +│ ├── mel_filters.npz # Mel filter coefficients +│ ├── requirements_minimal.txt # Python dependencies +│ └── models/ +│ ├── WhisperEncoder.onnx # Encoder model +│ └── WhisperDecoder.onnx # Decoder model +├── src/ +│ ├── services/ +│ │ └── whisperService.js # Node.js Whisper service wrapper +│ └── electron/ +│ ├── main.js # Updated with Whisper integration +│ └── preload.js # Updated with IPC methods +├── whisper_env/ # Python virtual environment +└── setup_whisper.bat # Setup script +``` + +## Configuration + +The Whisper service can be configured by editing `whisper/config.yaml`: + +```yaml +# Audio settings +sample_rate: 16000 # Audio sample rate in Hz +chunk_duration: 4 # Duration of each audio chunk in seconds +channels: 1 # Number of audio channels (1 for mono) + +# Processing settings +max_workers: 4 # Number of parallel transcription workers +silence_threshold: 0.001 # Threshold for silence detection +queue_timeout: 1.0 # Timeout for audio queue operations + +# Model paths +encoder_path: "whisper/models/WhisperEncoder.onnx" +decoder_path: "whisper/models/WhisperDecoder.onnx" +``` + +## Troubleshooting + +### Common Issues + +1. **"Python not found"** + - Ensure Python 3.8+ is installed + - Run `setup_whisper.bat` to create virtual environment + +2. **"Model files not found"** + - Ensure the Whisper ONNX models are in `whisper/models/` + - Check that `WhisperEncoder.onnx` and `WhisperDecoder.onnx` exist + +3. **"Microphone access denied"** + - Grant microphone permissions to the application + - Check your operating system's privacy settings + +4. **No transcript appearing** + - Check the console for error messages + - Ensure you're speaking loud enough (above silence threshold) + - Verify the microphone is working in other applications + +### Performance Tips + +- For better performance on lower-end hardware, reduce `max_workers` in config.yaml +- Increase `silence_threshold` if picking up too much background noise +- Decrease `chunk_duration` for more responsive transcription (but higher CPU usage) + +## Technical Details + +The integration works by: + +1. **Electron Main Process** spawns a Python child process running the Whisper transcriber +2. **Python Process** captures audio from the microphone and processes it through the Whisper model +3. **IPC Communication** sends transcript data back to the Electron app via JSON over stdout +4. **Renderer Process** receives transcript events and updates the UI in real-time +5. **File I/O** saves the complete transcript when the meeting ends + +The system is designed to be resilient and will gracefully handle errors like microphone access issues or model loading problems. \ No newline at end of file diff --git a/meetingnotes_2025-09-14T08-44-59-295Z.txt b/meetingnotes_2025-09-14T08-44-59-295Z.txt new file mode 100644 index 0000000..5898f22 --- /dev/null +++ b/meetingnotes_2025-09-14T08-44-59-295Z.txt @@ -0,0 +1,4 @@ +Meeting Session: meeting_20250914_084458 +Started: 2025-09-14 08:44:59 +============================================================ + diff --git a/meetingnotes_2025-09-14T08-47-47-141Z.txt b/meetingnotes_2025-09-14T08-47-47-141Z.txt new file mode 100644 index 0000000..8581e83 --- /dev/null +++ b/meetingnotes_2025-09-14T08-47-47-141Z.txt @@ -0,0 +1,4 @@ +Meeting Session: meeting_20250914_084746 +Started: 2025-09-14 08:47:47 +============================================================ + diff --git a/meetingnotes_2025-09-14T08-49-08-891Z.txt b/meetingnotes_2025-09-14T08-49-08-891Z.txt new file mode 100644 index 0000000..e070d7c --- /dev/null +++ b/meetingnotes_2025-09-14T08-49-08-891Z.txt @@ -0,0 +1,4 @@ +Meeting Session: meeting_20250914_084908 +Started: 2025-09-14 08:49:08 +============================================================ + diff --git a/meetingnotes_2025-09-14T08-49-42-449Z.txt b/meetingnotes_2025-09-14T08-49-42-449Z.txt new file mode 100644 index 0000000..9bc79e9 --- /dev/null +++ b/meetingnotes_2025-09-14T08-49-42-449Z.txt @@ -0,0 +1,4 @@ +Meeting Session: meeting_20250914_084941 +Started: 2025-09-14 08:49:42 +============================================================ + diff --git a/meetingnotes_2025-09-14T08-50-09-832Z.txt b/meetingnotes_2025-09-14T08-50-09-832Z.txt new file mode 100644 index 0000000..07ec9bd --- /dev/null +++ b/meetingnotes_2025-09-14T08-50-09-832Z.txt @@ -0,0 +1,28 @@ +Meeting Session: meeting_20250914_085009 +Started: 2025-09-14 08:50:09 +============================================================ + +[08:50:15]: , please. Hello, my name is Sean. +[08:50:19]: , I'm a Coron's two-roomed. +[08:50:26]: , finally why is it? why is it? why is it? why is it? why is it? why is it? why is it? why is it? why is it? why is it? why is it? why is it? why is it? why is it? why is it? why is it? why is it? why is it? why is it? why is it? why is it? why is it? why is it? why is it? why is it? why is it? why is it? why is it? why is it? why is it? why is it? why is it? why is it? why is it? why is it? why is it? why is it? why is it? why is it? why is it? why is it? why is it? why is it? why is it? why is it? why is it? why is it? why is it? why is it? why is it? why is it? why is it? why is it? why is it? why is it? why is +[08:50:27]: , yeah, can you talk to me more about. +[08:50:31]: , but NYU Car Art, you know, I don't know anything much. +[08:50:35]: , thank you for your time. +[08:50:39]: the other one. +[08:50:43]: , please. +[08:50:47]: , you. +[08:50:51]: , it's not doing this, can you do one thing? +[08:50:55]: , you do this. Your data is getting... +[08:50:59]: , but I think that's what I want to look at. +[08:51:03]: , he's basically ever submitted in like now. He's not that good. +[08:51:07]: , you know we signed by just a minute. +[08:51:11]: the problem. Nobody +[08:51:15]: , and then we will remove the Ds store, all that stuff and then put it in like properly, you'll put it in. +[08:51:19]: , I'll keep the... +[08:51:23]: , and then make it like the first thing called the White Cash, all the random things. +[08:51:27]: , and ex-piles it. +[08:51:31]: , but it will come and emerge this time. +[08:51:35]: , but it's not the same. +[08:51:39]: , you can more job. Keep that way. Yeah. +[08:51:43]: , and we'll see you next time. +[08:51:47]: , okay. diff --git a/meetingnotes_2025-09-14T09-04-00-968Z.txt b/meetingnotes_2025-09-14T09-04-00-968Z.txt new file mode 100644 index 0000000..cba0588 --- /dev/null +++ b/meetingnotes_2025-09-14T09-04-00-968Z.txt @@ -0,0 +1,31 @@ +Meeting Session: meeting_20250914_090400 +Started: 2025-09-14 09:04:00 +============================================================ + +[09:04:08]: , this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this +[09:04:10]: , but it doesn't +[09:04:14]: , it is. +[09:04:18]: , thanks a lot. +[09:04:27]: , I'm not a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. +[09:04:27]: , with 26 letters and 10 numbers. +[09:04:30]: , but I also forgot to consider other characters. +[09:04:34]: the animation. +[09:04:38]: , like, at the eight hashtag dollar, or same sign. +[09:04:48]: the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character +[09:04:49]: , and I'm a bad guy here for two. +[09:04:50]: , and I'm so happy to be here. +[09:04:54]: , oh my god, this is such a terrible animal. +[09:04:59]: , okay. Do you want me to keep talking? Yes, yes. +[09:05:03]: , so let's keep talking. Let's talk about something go ahead and like which it can understand in like... +[09:05:06]: , yeah. Okay. Yeah. Um, +[09:05:10]: , usually is an AI powered event discovery social media platform. +[09:05:14]: , that addresses the loneliness epidemic. +[09:05:18]: , where 103 people from our generation suffer. +[09:05:22]: , where from chronic loneliness and we believe that the best +[09:05:26]: , and I think that's why I'm here. +[09:05:30]: the same thing. +[09:05:34]: , and when you register for the event. +[09:05:38]: , for an event, you're a match of what's so good. We then... +[09:05:42]: , so they can host a data. +[09:05:46]: , so that way we have an equal system. +[09:05:50]: , and businesses with the B2B and the B2B. diff --git a/meetingnotes_2025-09-14T11-34-49-983Z.txt b/meetingnotes_2025-09-14T11-34-49-983Z.txt new file mode 100644 index 0000000..a0b4029 --- /dev/null +++ b/meetingnotes_2025-09-14T11-34-49-983Z.txt @@ -0,0 +1,7 @@ +[04:34:02] , hello, hello, hello, hello. +[04:34:06] , what is happening. +[04:34:14] , hello. +[04:34:18] , where am I is this New York? +[04:34:24] the other side. What are you guys doing? What are you doing? What are you doing? What are you doing? What are you doing? What are you doing? What are you doing? What are you doing? What are you doing? What are you doing? What are you doing? What are you doing? What are you doing? What are you doing? What are you doing? What are you doing? What are you doing? What are you doing? What are you doing? What are you doing? What are you doing? What are you doing? What are you doing? What are you doing? What are you doing? What are you doing? What are you doing? What are you doing? What are you doing? What are you doing? What are you doing? What are you doing? What are you doing? What are you doing? What are you doing? What are you doing? What are you doing? What are you doing? What are you doing? What are you doing? What are you doing? What are you doing? What are you doing? What are you doing +[04:34:26] , where are you sleeping so much wake up please? +[04:34:34] , how is it good morning? diff --git a/meetingnotes_2025-09-14T12-29-57-555Z.txt b/meetingnotes_2025-09-14T12-29-57-555Z.txt new file mode 100644 index 0000000..f2eded2 --- /dev/null +++ b/meetingnotes_2025-09-14T12-29-57-555Z.txt @@ -0,0 +1,2 @@ +[05:29:41] , hello, it's Al, how are you guys doing? +[05:29:49] , what is up? Is this New York? diff --git a/meetingnotes_2025-09-14T12-31-12-563Z.txt b/meetingnotes_2025-09-14T12-31-12-563Z.txt new file mode 100644 index 0000000..c594ac9 --- /dev/null +++ b/meetingnotes_2025-09-14T12-31-12-563Z.txt @@ -0,0 +1,4 @@ +[05:30:58] , hi hi hello this is new y'all +[05:31:02] , hello hello and see you. +[05:31:06] , and keep coming, and then keep coming. +[05:31:10] , keep coming in all that. diff --git a/meetingnotes_2025-09-14T13-26-56-089Z.txt b/meetingnotes_2025-09-14T13-26-56-089Z.txt new file mode 100644 index 0000000..2645850 --- /dev/null +++ b/meetingnotes_2025-09-14T13-26-56-089Z.txt @@ -0,0 +1,7 @@ +[06:26:29] , all it is okay with it. +[06:26:33] , all it is okay with that. +[06:26:37] , with it. Yeah, yeah. Oh. +[06:26:41] , what is this guy? I listen to it. +[06:26:45] , what was this? +[06:26:49] the person was done. +[06:26:53] , we should be very smart, you're not full. diff --git a/meetingnotes_2025-09-14T15-01-14-612Z.txt b/meetingnotes_2025-09-14T15-01-14-612Z.txt new file mode 100644 index 0000000..053f936 --- /dev/null +++ b/meetingnotes_2025-09-14T15-01-14-612Z.txt @@ -0,0 +1,9 @@ +[08:00:29] , hello, nor constant. +[08:00:33] , hello, hi nice. +[08:00:37] , okay thank you. I broke that out. The keywords are the +[08:00:41] , is broken. I would be ignored. +[08:00:45] , I am the best. +[08:00:49] , it's a message. +[08:00:53] , yeah. +[08:00:57] , oh we have grown one second so in the other end. +[08:01:12] , it's generally not only after it's done, it's no pressure to do it, it's a buffer. diff --git a/meetingnotes_2025-09-14T15-04-27-507Z.txt b/meetingnotes_2025-09-14T15-04-27-507Z.txt new file mode 100644 index 0000000..2648617 --- /dev/null +++ b/meetingnotes_2025-09-14T15-04-27-507Z.txt @@ -0,0 +1,6 @@ +[08:04:04] , and I'm going to work on this. +[08:04:08] , what do you think about that? +[08:04:13] , I'll go on his voice, but I'm going to work on this. +[08:04:16] the same thing. +[08:04:21] , and then we will get the best of you. And then, yeah, I'll tell you something more. +[08:04:24] the dot net devocable is going to be taking this time you know. diff --git a/meetingnotes_2025-09-14T16-05-55-698Z.txt b/meetingnotes_2025-09-14T16-05-55-698Z.txt new file mode 100644 index 0000000..6ca5dff --- /dev/null +++ b/meetingnotes_2025-09-14T16-05-55-698Z.txt @@ -0,0 +1,27 @@ +[09:04:08] , this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this is a this +[09:04:10] , but it doesn't +[09:04:14] , it is. +[09:04:18] , thanks a lot. +[09:04:27] , I'm not a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. I'm a bad guy. +[09:04:27] , with 26 letters and 10 numbers. +[09:04:30] , but I also forgot to consider other characters. +[09:04:34] the animation. +[09:04:38] , like, at the eight hashtag dollar, or same sign. +[09:04:48] the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character of the character +[09:04:49] , and I'm a bad guy here for two. +[09:04:50] , and I'm so happy to be here. +[09:04:54] , oh my god, this is such a terrible animal. +[09:04:59] , okay. Do you want me to keep talking? Yes, yes. +[09:05:03] , so let's keep talking. Let's talk about something go ahead and like which it can understand in like... +[09:05:06] , yeah. Okay. Yeah. Um, +[09:05:10] , usually is an AI powered event discovery social media platform. +[09:05:14] , that addresses the loneliness epidemic. +[09:05:18] , where 103 people from our generation suffer. +[09:05:22] , where from chronic loneliness and we believe that the best +[09:05:26] , and I think that's why I'm here. +[09:05:30] the same thing. +[09:05:34] , and when you register for the event. +[09:05:38] , for an event, you're a match of what's so good. We then... +[09:05:42] , so they can host a data. +[09:05:46] , so that way we have an equal system. +[09:05:50] , and businesses with the B2B and the B2B. diff --git a/package-lock.json b/package-lock.json index 1f5a2e6..54bfe77 100644 --- a/package-lock.json +++ b/package-lock.json @@ -14,7 +14,6 @@ "node-record-lpcm16": "^1.0.1", "socket.io": "^4.7.2", "socket.io-client": "^4.7.2", - "speaker": "^0.5.4", "wav": "^1.0.2", "ws": "^8.14.2" }, @@ -1975,12 +1974,12 @@ "license": "MIT" }, "node_modules/@types/node": { - "version": "24.3.3", - "resolved": "https://registry.npmjs.org/@types/node/-/node-24.3.3.tgz", - "integrity": "sha512-GKBNHjoNw3Kra1Qg5UXttsY5kiWMEfoHq2TmXb+b1rcm6N7B3wTrFYIf/oSZ1xNQ+hVVijgLkiDZh7jRRsh+Gw==", + "version": "24.4.0", + "resolved": "https://registry.npmjs.org/@types/node/-/node-24.4.0.tgz", + "integrity": "sha512-gUuVEAK4/u6F9wRLznPUU4WGUacSEBDPoC2TrBkw3GAnOLHBL45QdfHOXp1kJ4ypBGLxTOB+t7NJLpKoC3gznQ==", "license": "MIT", "dependencies": { - "undici-types": "~7.10.0" + "undici-types": "~7.11.0" } }, "node_modules/@types/phoenix": { @@ -2638,15 +2637,6 @@ "baseline-browser-mapping": "dist/cli.js" } }, - "node_modules/bindings": { - "version": "1.5.0", - "resolved": "https://registry.npmjs.org/bindings/-/bindings-1.5.0.tgz", - "integrity": "sha512-p2q/t/mhvuOj/UeLlV6566GD/guowlr0hHxClI0W9m7MWYkL1F0hLo+0Aexs9HSPCtR1SXQ0TD3MMKrXZajbiQ==", - "license": "MIT", - "dependencies": { - "file-uri-to-path": "1.0.0" - } - }, "node_modules/bl": { "version": "4.1.0", "resolved": "https://registry.npmjs.org/bl/-/bl-4.1.0.tgz", @@ -3372,6 +3362,7 @@ "version": "4.4.3", "resolved": "https://registry.npmjs.org/debug/-/debug-4.4.3.tgz", "integrity": "sha512-RGwwWnwQvkVfavKVt22FGLw+xYSdzARwm0ru6DhTVA3umU5hZc28V3kO4stgYryrTlLpuvgI9GiijltAjNbcqA==", + "dev": true, "license": "MIT", "dependencies": { "ms": "^2.1.3" @@ -4538,12 +4529,6 @@ "node": "^10.12.0 || >=12.0.0" } }, - "node_modules/file-uri-to-path": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/file-uri-to-path/-/file-uri-to-path-1.0.0.tgz", - "integrity": "sha512-0Zt+s3L7Vf1biwWZ29aARiVYLx7iMGnEUl9x33fbB/j3jR81u/O2LbqK+Bm1CDSNDKVtJ/YjwY7TUd5SkeLQLw==", - "license": "MIT" - }, "node_modules/filelist": { "version": "1.0.4", "resolved": "https://registry.npmjs.org/filelist/-/filelist-1.0.4.tgz", @@ -7826,21 +7811,6 @@ "source-map": "^0.6.0" } }, - "node_modules/speaker": { - "version": "0.5.5", - "resolved": "https://registry.npmjs.org/speaker/-/speaker-0.5.5.tgz", - "integrity": "sha512-IBeMZUITigYBO139h0+1MAgBHNZF55GFJN4U/Box35Sg49cfqYkbCO92TXoCUy22Ast08zfqKuXLvPxq9CWwLw==", - "hasInstallScript": true, - "license": "(MIT AND LGPL-2.1-only)", - "dependencies": { - "bindings": "^1.3.0", - "buffer-alloc": "^1.1.0", - "debug": "^4.0.0" - }, - "engines": { - "node": ">=8.6" - } - }, "node_modules/sprintf-js": { "version": "1.1.3", "resolved": "https://registry.npmjs.org/sprintf-js/-/sprintf-js-1.1.3.tgz", @@ -8299,9 +8269,9 @@ } }, "node_modules/undici-types": { - "version": "7.10.0", - "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.10.0.tgz", - "integrity": "sha512-t5Fy/nfn+14LuOc2KNYg75vZqClpAiqscVvMygNnlsHBFpSXdJaYtXMcdNLpl/Qvc3P2cB3s6lOV51nqsFq4ag==", + "version": "7.11.0", + "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.11.0.tgz", + "integrity": "sha512-kt1ZriHTi7MU+Z/r9DOdAI3ONdaR3M3csEaRc6ewa4f4dTvX4cQCbJ4NkEn0ohE4hHtq85+PhPSTY+pO/1PwgA==", "license": "MIT" }, "node_modules/universalify": { diff --git a/setup_whisper.bat b/setup_whisper.bat new file mode 100644 index 0000000..497d99b --- /dev/null +++ b/setup_whisper.bat @@ -0,0 +1,35 @@ +@echo off +echo Setting up Whisper AI transcription for ScrumAI... + +REM Check if Python is installed +python --version >nul 2>&1 +if %errorlevel% neq 0 ( + echo Python is not installed or not in PATH + echo Please install Python 3.8+ and try again + pause + exit /b 1 +) + +echo Python found! + +REM Create virtual environment for Whisper +echo Creating Python virtual environment... +python -m venv whisper_env + +REM Activate virtual environment +echo Activating virtual environment... +call whisper_env\Scripts\activate.bat + +REM Install minimal requirements +echo Installing Python dependencies... +pip install -r whisper\requirements_minimal.txt + +echo. +echo Whisper setup complete! +echo. +echo To test the integration: +echo 1. Run 'npm start' to start the ScrumAI application +echo 2. Click 'Start Meeting' to begin transcription +echo 3. Speak into your microphone and watch the transcript appear +echo. +pause \ No newline at end of file diff --git a/src/electron/main.js b/src/electron/main.js index e043eda..c1916c5 100644 --- a/src/electron/main.js +++ b/src/electron/main.js @@ -12,10 +12,16 @@ const { app, BrowserWindow, ipcMain } = require('electron'); const path = require('path'); const https = require('https'); +const WhisperService = require('../services/whisperService'); +const ChatbotService = require('../services/chatbotService'); // Keep a global reference of the window object let mainWindow; +// Service instances +let whisperService; +let chatbotService; + // GitHub Integration Configuration const GITHUB_CONFIG = { token: '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', @@ -64,15 +70,189 @@ function createWindow() { * IPC Event Handlers */ +// Initialize Whisper service +function initializeWhisperService() { + whisperService = new WhisperService(); + + // Set up transcript callback to send to renderer + whisperService.onTranscript((data) => { + if (mainWindow && !mainWindow.isDestroyed()) { + mainWindow.webContents.send('whisper-transcript', data); + } + + // Update chatbot with latest transcript file if available + if (chatbotService && data.transcriptFile) { + chatbotService.setLiveTranscriptFile(data.transcriptFile); + } + }); + + // Set up error callback + whisperService.onError((error) => { + console.error('Whisper error:', error); + if (mainWindow && !mainWindow.isDestroyed()) { + mainWindow.webContents.send('whisper-error', error); + } + }); + + // Set up status callback + whisperService.onStatus((status) => { + console.log('Whisper status:', status.message); + if (mainWindow && !mainWindow.isDestroyed()) { + mainWindow.webContents.send('whisper-status', status); + } + }); +} + // Handle meeting start/stop events ipcMain.handle('start-meeting', async (event, meetingData) => { console.log('Starting meeting:', meetingData); - return { success: true, meetingId: Date.now() }; + + try { + if (!whisperService) { + initializeWhisperService(); + } + + await whisperService.start(); + console.log('Whisper transcription started'); + + return { success: true, meetingId: Date.now() }; + } catch (error) { + console.error('Failed to start Whisper transcription:', error); + return { success: false, error: error.message }; + } }); ipcMain.handle('stop-meeting', async (event) => { console.log('Stopping meeting'); - return { success: true }; + + try { + if (whisperService && whisperService.isServiceRunning()) { + await whisperService.stop(); + console.log('Whisper transcription stopped'); + } + + return { success: true }; + } catch (error) { + console.error('Failed to stop Whisper transcription:', error); + return { success: false, error: error.message }; + } +}); + +// Chatbot Service IPC Handlers +ipcMain.handle('initialize-chatbot', async (event) => { + console.log('Initializing chatbot service'); + + try { + if (!chatbotService) { + chatbotService = new ChatbotService(); + + // Set up chatbot event handlers + chatbotService.setOnResponseCallback((response, isStreaming) => { + if (mainWindow) { + mainWindow.webContents.send('chatbot-response', { response, isStreaming }); + } + }); + + chatbotService.setOnErrorCallback((error) => { + if (mainWindow) { + mainWindow.webContents.send('chatbot-error', error.message); + } + }); + + chatbotService.setOnStatusCallback((status) => { + if (mainWindow) { + mainWindow.webContents.send('chatbot-status', status); + } + }); + } + + return { success: true }; + } catch (error) { + console.error('Failed to initialize chatbot service:', error); + return { success: false, error: error.message }; + } +}); + +ipcMain.handle('start-chatbot', async (event) => { + console.log('Starting chatbot service'); + + try { + if (!chatbotService) { + return { success: false, error: 'Chatbot service not initialized' }; + } + + await chatbotService.start(); + console.log('Chatbot service started'); + + return { success: true }; + } catch (error) { + console.error('Failed to start chatbot service:', error); + return { success: false, error: error.message }; + } +}); + +ipcMain.handle('stop-chatbot', async (event) => { + console.log('Stopping chatbot service'); + + try { + if (chatbotService) { + await chatbotService.stop(); + console.log('Chatbot service stopped'); + } + + return { success: true }; + } catch (error) { + console.error('Failed to stop chatbot service:', error); + return { success: false, error: error.message }; + } +}); + +ipcMain.handle('send-chat-message', async (event, message) => { + console.log('Sending chat message:', message); + + try { + if (!chatbotService) { + return { success: false, error: 'Chatbot service not initialized' }; + } + + const response = await chatbotService.sendMessage(message); + return { success: true, data: response }; + } catch (error) { + console.error('Failed to send chat message:', error); + return { success: false, error: error.message }; + } +}); + +// Handle transcript export +ipcMain.handle('save-transcript', async (event, filename) => { + console.log('Saving transcript:', filename); + + try { + if (!whisperService) { + throw new Error('No transcript available - meeting not started'); + } + + const filepath = await whisperService.saveTranscript(filename); + return { success: true, filepath }; + } catch (error) { + console.error('Failed to save transcript:', error); + return { success: false, error: error.message }; + } +}); + +// Handle get full transcript +ipcMain.handle('get-full-transcript', async (event) => { + try { + if (!whisperService) { + return { success: false, error: 'No transcript available - meeting not started' }; + } + + const transcript = whisperService.getFullTranscript(); + return { success: true, transcript }; + } catch (error) { + console.error('Failed to get transcript:', error); + return { success: false, error: error.message }; + } }); // GitHub issue creation handler @@ -170,7 +350,16 @@ app.whenReady().then(() => { }); // Quit when all windows are closed -app.on('window-all-closed', () => { +app.on('window-all-closed', async () => { + // Clean up Whisper service + if (whisperService && whisperService.isServiceRunning()) { + try { + await whisperService.stop(); + } catch (error) { + console.error('Error stopping Whisper service on app quit:', error); + } + } + // On macOS, keep app running even when all windows are closed if (process.platform !== 'darwin') { app.quit(); diff --git a/src/electron/preload.js b/src/electron/preload.js index b0fac15..9935af7 100644 --- a/src/electron/preload.js +++ b/src/electron/preload.js @@ -13,6 +13,29 @@ contextBridge.exposeInMainWorld('electronAPI', { // Meeting management startMeeting: (meetingData) => ipcRenderer.invoke('start-meeting', meetingData), stopMeeting: () => ipcRenderer.invoke('stop-meeting'), + + // Transcript management + saveTranscript: (filename) => ipcRenderer.invoke('save-transcript', filename), + getFullTranscript: () => ipcRenderer.invoke('get-full-transcript'), + + // Event listeners for Whisper events + onWhisperTranscript: (callback) => ipcRenderer.on('whisper-transcript', callback), + onWhisperError: (callback) => ipcRenderer.on('whisper-error', callback), + onWhisperStatus: (callback) => ipcRenderer.on('whisper-status', callback), + + // Chatbot management + initializeChatbot: () => ipcRenderer.invoke('initialize-chatbot'), + startChatbot: () => ipcRenderer.invoke('start-chatbot'), + stopChatbot: () => ipcRenderer.invoke('stop-chatbot'), + sendChatMessage: (message) => ipcRenderer.invoke('send-chat-message', message), + + // Event listeners for Chatbot events + onChatbotResponse: (callback) => ipcRenderer.on('chatbot-response', callback), + onChatbotError: (callback) => ipcRenderer.on('chatbot-error', callback), + onChatbotStatus: (callback) => ipcRenderer.on('chatbot-status', callback), + + // Remove event listeners + removeAllListeners: (channel) => ipcRenderer.removeAllListeners(channel), // GitHub integration createGithubIssue: (issueData) => ipcRenderer.invoke('create-github-issue', issueData), diff --git a/src/renderer/index.html b/src/renderer/index.html index 20c8465..6278963 100644 --- a/src/renderer/index.html +++ b/src/renderer/index.html @@ -19,6 +19,7 @@ + @@ -76,6 +77,10 @@
💬 AnythingLLM Assistant
+Ask questions about your meeting or anything else!
+Start a meeting to see transcript...
'; + } + this.transcriptData = []; + } + + /** + * Escape HTML to prevent XSS + */ + escapeHtml(text) { + const div = document.createElement('div'); + div.textContent = text; + return div.innerHTML; + } + /** * Initialize mock transcript for UI development */ diff --git a/src/renderer/styles/chat.css b/src/renderer/styles/chat.css new file mode 100644 index 0000000..2a4b08f --- /dev/null +++ b/src/renderer/styles/chat.css @@ -0,0 +1,209 @@ +/* Chat Tab Styles */ +.chat-container { + display: flex; + flex-direction: column; + height: 100%; + min-height: 400px; +} + +.chat-messages { + flex: 1; + overflow-y: auto; + padding: 1rem; + background: var(--bg-secondary); + border-radius: 8px; + margin-bottom: 1rem; + max-height: 400px; + border: 1px solid var(--border-primary); +} + +.chat-welcome { + text-align: center; + color: var(--text-secondary); + padding: 2rem; +} + +.chat-welcome p:first-child { + font-size: 1.2rem; + font-weight: 600; + color: var(--text-primary); + margin-bottom: 0.5rem; +} + +.chat-subtitle { + font-size: 0.9rem; + margin: 0; +} + +.chat-message { + margin-bottom: 1rem; + padding: 0.75rem; + border-radius: 8px; + max-width: 85%; + word-wrap: break-word; +} + +.chat-message.user { + background: var(--primary-color); + color: white; + margin-left: auto; + text-align: right; +} + +.chat-message.assistant { + background: var(--bg-tertiary); + color: var(--text-primary); + border: 1px solid var(--border-primary); +} + +.chat-message.error { + background: #fee; + color: #c33; + border: 1px solid #fcc; +} + +.chat-message.streaming { + background: var(--bg-tertiary); + color: var(--text-primary); + border: 1px solid var(--primary-color); + position: relative; +} + +.chat-message.streaming::after { + content: '●'; + animation: pulse 1.5s ease-in-out infinite; + color: var(--primary-color); + margin-left: 0.5rem; +} + +@keyframes pulse { + 0%, 100% { opacity: 1; } + 50% { opacity: 0.3; } +} + +.chat-input-container { + display: flex; + gap: 0.5rem; + padding: 0.75rem; + background: var(--bg-primary); + border: 1px solid var(--border-primary); + border-radius: 8px; +} + +.chat-input { + flex: 1; + padding: 0.75rem; + border: 1px solid var(--border-secondary); + border-radius: 6px; + background: var(--bg-secondary); + color: var(--text-primary); + font-size: 0.9rem; + outline: none; + transition: border-color 0.2s ease; +} + +.chat-input:focus { + border-color: var(--primary-color); + box-shadow: 0 0 0 2px rgba(var(--primary-color-rgb), 0.1); +} + +.chat-input:disabled { + background: var(--bg-disabled); + color: var(--text-disabled); + cursor: not-allowed; +} + +.chat-send-btn { + padding: 0.75rem 1.5rem; + background: var(--primary-color); + color: white; + border: none; + border-radius: 6px; + font-weight: 500; + cursor: pointer; + transition: all 0.2s ease; + white-space: nowrap; +} + +.chat-send-btn:hover:not(:disabled) { + background: var(--primary-hover); + transform: translateY(-1px); +} + +.chat-send-btn:disabled { + background: var(--bg-disabled); + color: var(--text-disabled); + cursor: not-allowed; + transform: none; +} + +.chat-status { + padding: 0.5rem; + text-align: center; + margin-top: 0.5rem; +} + +.chat-status-text { + font-size: 0.8rem; + padding: 0.25rem 0.75rem; + border-radius: 12px; + display: inline-block; +} + +.chat-status-text.connected { + background: #d4edda; + color: #155724; + border: 1px solid #c3e6cb; +} + +.chat-status-text.connecting { + background: #fff3cd; + color: #856404; + border: 1px solid #ffeaa7; +} + +.chat-status-text.disconnected { + background: #f8d7da; + color: #721c24; + border: 1px solid #f5c6cb; +} + +.chat-status-text.error { + background: #f8d7da; + color: #721c24; + border: 1px solid #f5c6cb; +} + +/* Scrollbar Styles for Chat Messages */ +.chat-messages::-webkit-scrollbar { + width: 6px; +} + +.chat-messages::-webkit-scrollbar-track { + background: var(--bg-primary); + border-radius: 3px; +} + +.chat-messages::-webkit-scrollbar-thumb { + background: var(--border-secondary); + border-radius: 3px; +} + +.chat-messages::-webkit-scrollbar-thumb:hover { + background: var(--border-primary); +} + +/* Responsive Adjustments */ +@media (max-width: 768px) { + .chat-message { + max-width: 95%; + } + + .chat-input-container { + padding: 0.5rem; + } + + .chat-send-btn { + padding: 0.75rem 1rem; + } +} \ No newline at end of file diff --git a/src/renderer/styles/components.css b/src/renderer/styles/components.css index 261e2f2..e0beb6c 100644 --- a/src/renderer/styles/components.css +++ b/src/renderer/styles/components.css @@ -7,6 +7,70 @@ /* Additional component styles can be added here as needed */ +/* Transcript Entry Styles */ +.transcript-entry { + display: flex; + flex-direction: column; + padding: 8px 12px; + margin-bottom: 8px; + background: #1a1a1a; + border-radius: 6px; + border-left: 3px solid #22d3ee; + transition: background-color 0.2s ease; +} + +.transcript-entry:hover { + background: #242424; +} + +.transcript-timestamp { + font-size: 11px; + color: #9ca3af; + font-family: 'JetBrains Mono', monospace; + margin-bottom: 4px; +} + +.transcript-text { + color: #e5e7eb; + line-height: 1.4; + font-size: 13px; +} + +/* Transcript Content Container */ +.transcript-content { + max-height: 400px; + overflow-y: auto; + padding: 16px; + background: #111; + border-radius: 8px; + border: 1px solid #333; +} + +.transcript-content:empty::before { + content: "Transcripts will appear here when the meeting starts..."; + color: #6b7280; + font-style: italic; +} + +/* Custom scrollbar for transcript content */ +.transcript-content::-webkit-scrollbar { + width: 6px; +} + +.transcript-content::-webkit-scrollbar-track { + background: #1f1f1f; + border-radius: 3px; +} + +.transcript-content::-webkit-scrollbar-thumb { + background: #404040; + border-radius: 3px; +} + +.transcript-content::-webkit-scrollbar-thumb:hover { + background: #525252; +} + /* Focus states for accessibility */ .control-btn:focus, .tab-btn:focus { diff --git a/src/renderer/styles/main.css b/src/renderer/styles/main.css index 36591fe..4e861f3 100644 --- a/src/renderer/styles/main.css +++ b/src/renderer/styles/main.css @@ -320,8 +320,12 @@ html, body { border: none; background: transparent; width: auto; - min-width: auto; + min-width: 0; max-width: none; + display: flex; + align-items: center; + justify-content: center; + text-align: center; } /* Post-meeting tabs get special styling */ @@ -373,6 +377,8 @@ html, body { .tab-icon { font-size: 16px; + flex-shrink: 0; + margin-right: 4px; } .tab-count { @@ -384,6 +390,8 @@ html, body { font-weight: 600; min-width: 18px; text-align: center; + flex-shrink: 0; + margin-left: 4px; } .tab-btn.active .tab-count { @@ -1612,6 +1620,93 @@ html, body { } } +/* Chat Tab Styles */ +.chat-container { + display: flex; + flex-direction: column; + height: 100%; + padding: 12px; +} + +.chat-header { + display: flex; + justify-content: space-between; + align-items: center; + margin-bottom: 16px; + padding-bottom: 12px; + border-bottom: 1px solid #333; +} + +.chat-header h3 { + font-size: 18px; + font-weight: 600; + color: #ffffff; + margin: 0; +} + +.chat-controls { + display: flex; + gap: 8px; +} + +.chat-messages { + flex: 1; + overflow-y: auto; + padding: 12px; + background: #111; + border: 1px solid #333; + border-radius: 6px; + margin-bottom: 12px; + max-height: 400px; + scrollbar-width: thin; + scrollbar-color: #374151 #1a1a1a; +} + +.chat-input-container { + display: flex; + gap: 8px; + align-items: center; +} + +#chat-input { + flex: 1; + padding: 10px 12px; + background: #111; + border: 1px solid #333; + border-radius: 6px; + color: #ffffff; + font-size: 14px; +} + +#chat-input:focus { + outline: none; + border-color: #3b82f6; + box-shadow: 0 0 0 1px rgba(59, 130, 246, 0.3); +} + +#chat-input:disabled { + opacity: 0.5; + cursor: not-allowed; +} + +.chat-messages::-webkit-scrollbar { + width: 6px; +} + +.chat-messages::-webkit-scrollbar-track { + background: #1a1a1a; + border-radius: 3px; +} + +.chat-messages::-webkit-scrollbar-thumb { + background: #374151; + border-radius: 3px; +} + +.chat-messages::-webkit-scrollbar-thumb:hover { + background: #4b5563; +} + /* Scrollbar styling */ ::-webkit-scrollbar { width: 6px; diff --git a/src/services/chatbotService.js b/src/services/chatbotService.js new file mode 100644 index 0000000..9635bcb --- /dev/null +++ b/src/services/chatbotService.js @@ -0,0 +1,372 @@ +/** + * AnythingLLM Chatbot Service + * + * This service manages the AnythingLLM chatbot process by spawning + * a Python child process and handling real-time communication. + * Similar pattern to whisperService.js + */ + +const { spawn } = require('child_process'); +const path = require('path'); +const fs = require('fs'); + +class ChatbotService { + constructor() { + this.chatbotProcess = null; + this.isRunning = false; + this.onResponseCallback = null; + this.onErrorCallback = null; + this.onStatusCallback = null; + this.messageQueue = []; + this.isProcessingMessage = false; + } + + /** + * Start the chatbot service + */ + async start() { + if (this.isRunning) { + throw new Error('Chatbot service is already running'); + } + + try { + // Check if Python is available + const pythonPath = await this.findPython(); + if (!pythonPath) { + throw new Error('Python not found. Please ensure Python is installed and in PATH'); + } + + // Check if required files exist + this.checkRequiredFiles(); + + // Start the chatbot process + const scriptPath = path.join(__dirname, '..', '..', 'whisper', 'anythingLLM', 'chatbot_client.py'); + const workingDir = path.join(__dirname, '..', '..', 'whisper', 'anythingLLM'); + + console.log('Starting AnythingLLM chatbot process...'); + console.log('Script path:', scriptPath); + console.log('Working directory:', workingDir); + + this.chatbotProcess = spawn(pythonPath, [scriptPath], { + cwd: workingDir, + stdio: ['pipe', 'pipe', 'pipe'] + }); + + this.isRunning = true; + this.setupProcessHandlers(); + + // Give the process a moment to start + await new Promise(resolve => setTimeout(resolve, 1000)); + + console.log('Chatbot service started successfully'); + + if (this.onStatusCallback) { + this.onStatusCallback('started'); + } + + return true; + + } catch (error) { + console.error('Failed to start chatbot service:', error); + this.isRunning = false; + + if (this.onErrorCallback) { + this.onErrorCallback(error); + } + + throw error; + } + } + + /** + * Stop the chatbot service + */ + async stop() { + if (!this.isRunning || !this.chatbotProcess) { + return; + } + + console.log('Stopping chatbot service...'); + + try { + // Send quit command + if (this.chatbotProcess.stdin && !this.chatbotProcess.stdin.destroyed) { + this.chatbotProcess.stdin.write('quit\n'); + this.chatbotProcess.stdin.end(); + } + + // Wait for graceful shutdown + await new Promise((resolve) => { + const timeout = setTimeout(() => { + if (this.chatbotProcess) { + this.chatbotProcess.kill('SIGTERM'); + } + resolve(); + }, 3000); + + this.chatbotProcess.on('close', () => { + clearTimeout(timeout); + resolve(); + }); + }); + + } catch (error) { + console.error('Error during chatbot service shutdown:', error); + } finally { + this.isRunning = false; + this.chatbotProcess = null; + this.messageQueue = []; + this.isProcessingMessage = false; + + if (this.onStatusCallback) { + this.onStatusCallback('stopped'); + } + + console.log('Chatbot service stopped'); + } + } + + /** + * Set the live transcript file path + */ + setLiveTranscriptFile(filePath) { + this.liveTranscriptFile = filePath; + console.log('Live transcript file set:', filePath); + } + + /** + * Send a message to the chatbot with intelligent context switching + */ + async sendMessage(message, stream = false, useRAG = false) { + if (!this.isRunning || !this.chatbotProcess) { + throw new Error('Chatbot service is not running'); + } + + // Determine which mode to use + const command = this._determineCommand(message, stream, useRAG); + const data = JSON.stringify({ + command, + message, + transcript_file: this.liveTranscriptFile + }); + + return new Promise((resolve, reject) => { + this.messageQueue.push({ data, resolve, reject, stream }); + this.processMessageQueue(); + }); + } + + /** + * Determine which command to use based on message type and preferences + */ + _determineCommand(message, stream, useRAG) { + if (stream) { + return 'stream'; + } + + // Use RAG for complex analytical questions + if (useRAG || this._isComplexQuery(message)) { + return 'chat_rag'; + } + + // Use live context for recent/immediate questions + return 'chat'; + } + + /** + * Determine if a query requires RAG capabilities + */ + _isComplexQuery(message) { + const complexKeywords = [ + 'summarize', 'summary', 'overview', 'analyze', 'analysis', + 'compare', 'contrast', 'trend', 'pattern', 'insight', + 'decision', 'conclusion', 'recommendation', 'action item', + 'meeting notes', 'key points', 'takeaway' + ]; + + const lowerMessage = message.toLowerCase(); + return complexKeywords.some(keyword => lowerMessage.includes(keyword)); + } + + /** + * Process the message queue + */ + processMessageQueue() { + if (this.isProcessingMessage || this.messageQueue.length === 0) { + return; + } + + this.isProcessingMessage = true; + const { data, resolve, reject, stream } = this.messageQueue.shift(); + + let responseBuffer = ''; + let streamBuffer = []; + const timeout = setTimeout(() => { + reject(new Error('Message timeout')); + this.isProcessingMessage = false; + this.processMessageQueue(); + }, 30000); + + const handleResponse = (chunk) => { + try { + const lines = chunk.toString().split('\n').filter(line => line.trim()); + + for (const line of lines) { + const parsed = JSON.parse(line); + + if (parsed.type === 'response') { + clearTimeout(timeout); + this.isProcessingMessage = false; + resolve(parsed.data); + this.processMessageQueue(); + return; + } + + if (parsed.type === 'stream_chunk') { + streamBuffer.push(parsed.data); + if (this.onResponseCallback) { + this.onResponseCallback(parsed.data, true); + } + } + + if (parsed.type === 'stream_end') { + clearTimeout(timeout); + this.isProcessingMessage = false; + resolve(streamBuffer.join('')); + this.processMessageQueue(); + return; + } + + if (parsed.type === 'error') { + clearTimeout(timeout); + this.isProcessingMessage = false; + reject(new Error(parsed.data)); + this.processMessageQueue(); + return; + } + } + } catch (error) { + // Accumulate response if not valid JSON yet + responseBuffer += chunk.toString(); + } + }; + + this.chatbotProcess.stdout.once('data', handleResponse); + this.chatbotProcess.stdin.write(data + '\n'); + } + + /** + * Set up process event handlers + */ + setupProcessHandlers() { + this.chatbotProcess.stdout.on('data', (data) => { + // Handle streaming responses in processMessageQueue + }); + + this.chatbotProcess.stderr.on('data', (data) => { + console.error('Chatbot stderr:', data.toString()); + if (this.onErrorCallback) { + this.onErrorCallback(new Error(data.toString())); + } + }); + + this.chatbotProcess.on('close', (code) => { + console.log('Chatbot process closed with code:', code); + this.isRunning = false; + this.chatbotProcess = null; + + if (this.onStatusCallback) { + this.onStatusCallback('closed'); + } + }); + + this.chatbotProcess.on('error', (error) => { + console.error('Chatbot process error:', error); + this.isRunning = false; + + if (this.onErrorCallback) { + this.onErrorCallback(error); + } + }); + } + + /** + * Check if required files exist + */ + checkRequiredFiles() { + const scriptPath = path.join(__dirname, '..', '..', 'whisper', 'anythingLLM', 'chatbot_client.py'); + const configPath = path.join(__dirname, '..', '..', 'whisper', 'anythingLLM', 'config.yaml'); + + if (!fs.existsSync(scriptPath)) { + throw new Error(`Chatbot script not found: ${scriptPath}`); + } + + if (!fs.existsSync(configPath)) { + throw new Error(`Chatbot config not found: ${configPath}. Please create and configure it.`); + } + } + + /** + * Find Python executable + */ + async findPython() { + const { spawn } = require('child_process'); + + const pythonCommands = ['python', 'python3', 'py']; + + for (const cmd of pythonCommands) { + try { + const result = await new Promise((resolve) => { + const proc = spawn(cmd, ['--version'], { stdio: 'pipe' }); + proc.on('close', (code) => { + resolve(code === 0 ? cmd : null); + }); + proc.on('error', () => resolve(null)); + }); + + if (result) { + return result; + } + } catch (error) { + continue; + } + } + + return null; + } + + /** + * Set callback for responses + */ + setOnResponseCallback(callback) { + this.onResponseCallback = callback; + } + + /** + * Set callback for errors + */ + setOnErrorCallback(callback) { + this.onErrorCallback = callback; + } + + /** + * Set callback for status changes + */ + setOnStatusCallback(callback) { + this.onStatusCallback = callback; + } + + /** + * Get service status + */ + getStatus() { + return { + isRunning: this.isRunning, + hasProcess: !!this.chatbotProcess, + queueLength: this.messageQueue.length, + isProcessing: this.isProcessingMessage + }; + } +} + +module.exports = ChatbotService; \ No newline at end of file diff --git a/src/services/whisperService.js b/src/services/whisperService.js new file mode 100644 index 0000000..f44f9c9 --- /dev/null +++ b/src/services/whisperService.js @@ -0,0 +1,343 @@ +/** + * Whisper Transcription Service + * + * This service manages the Whisper AI transcription process by spawning + * a Python child process and handling real-time communication. + */ + +const { spawn } = require('child_process'); +const path = require('path'); +const fs = require('fs'); + +class WhisperService { + constructor() { + this.transcriptionProcess = null; + this.isRunning = false; + this.transcriptBuffer = []; + this.onTranscriptCallback = null; + this.onErrorCallback = null; + this.onStatusCallback = null; + } + + /** + * Start the Whisper transcription service + */ + async start() { + if (this.isRunning) { + throw new Error('Whisper service is already running'); + } + + try { + // Check if Python is available + const pythonPath = await this.findPython(); + if (!pythonPath) { + throw new Error('Python not found. Please ensure Python is installed and in PATH'); + } + + // Check if required files exist + this.checkRequiredFiles(); + + // Start the transcription process using new meeting transcriber + const scriptPath = path.join(__dirname, '..', '..', 'whisper', 'meeting_transcriber.py'); + const workingDir = path.join(__dirname, '..', '..', 'whisper'); + + console.log('Starting Whisper transcription process...'); + console.log('Script path:', scriptPath); + console.log('Working directory:', workingDir); + + this.transcriptionProcess = spawn(pythonPath, [scriptPath], { + cwd: workingDir, + stdio: ['pipe', 'pipe', 'pipe'] + }); + + this.isRunning = true; + this.setupProcessHandlers(); + + // Give the process a moment to start + await new Promise(resolve => setTimeout(resolve, 1000)); + + console.log('Whisper service started successfully'); + return true; + + } catch (error) { + console.error('Failed to start Whisper service:', error); + this.isRunning = false; + throw error; + } + } + + /** + * Stop the Whisper transcription service + */ + async stop() { + if (!this.isRunning || !this.transcriptionProcess) { + return; + } + + try { + console.log('Stopping Whisper service...'); + + // Send SIGINT to gracefully stop the process + this.transcriptionProcess.kill('SIGINT'); + + // Wait for process to exit + await new Promise((resolve) => { + this.transcriptionProcess.on('exit', () => { + resolve(); + }); + + // Force kill after 5 seconds if not exited + setTimeout(() => { + if (this.transcriptionProcess && !this.transcriptionProcess.killed) { + this.transcriptionProcess.kill('SIGKILL'); + resolve(); + } + }, 5000); + }); + + this.transcriptionProcess = null; + this.isRunning = false; + console.log('Whisper service stopped'); + + } catch (error) { + console.error('Error stopping Whisper service:', error); + this.isRunning = false; + } + } + + /** + * Get the full transcript as formatted text + */ + getFullTranscript() { + let transcript = ''; + for (const entry of this.transcriptBuffer) { + if (entry.type === 'transcript') { + transcript += `[${entry.timestamp}] ${entry.text}\n`; + } + } + return transcript; + } + + /** + * Save transcript to a file + */ + async saveTranscript(filename = null) { + const transcript = this.getFullTranscript(); + if (!transcript.trim()) { + throw new Error('No transcript to save'); + } + + const timestamp = new Date().toISOString().replace(/[:.]/g, '-'); + const defaultFilename = `meetingnotes_${timestamp}.txt`; + const filepath = filename || path.join(require('os').homedir(), defaultFilename); + + await fs.promises.writeFile(filepath, transcript, 'utf8'); + console.log(`Transcript saved to: ${filepath}`); + return filepath; + } + + /** + * Clear the transcript buffer + */ + clearTranscript() { + this.transcriptBuffer = []; + } + + /** + * Set callback for new transcriptions + */ + onTranscript(callback) { + this.onTranscriptCallback = callback; + } + + /** + * Set callback for errors + */ + onError(callback) { + this.onErrorCallback = callback; + } + + /** + * Set callback for status updates + */ + onStatus(callback) { + this.onStatusCallback = callback; + } + + /** + * Find Python executable + */ + async findPython() { + // First try virtual environment + const venvPython = path.join(__dirname, '..', '..', 'whisper_env', 'Scripts', 'python.exe'); + if (fs.existsSync(venvPython)) { + console.log(`Found Python in virtual environment: ${venvPython}`); + return venvPython; + } + + // Fallback to system Python + const candidates = ['python', 'python3', 'py']; + + for (const candidate of candidates) { + try { + const { spawn } = require('child_process'); + const result = await new Promise((resolve) => { + const proc = spawn(candidate, ['--version'], { stdio: 'pipe' }); + let output = ''; + + proc.stdout.on('data', (data) => { + output += data.toString(); + }); + + proc.stderr.on('data', (data) => { + output += data.toString(); + }); + + proc.on('close', (code) => { + resolve({ code, output }); + }); + + proc.on('error', () => { + resolve({ code: -1, output: '' }); + }); + }); + + if (result.code === 0 && result.output.includes('Python')) { + console.log(`Found Python: ${candidate} (${result.output.trim()})`); + return candidate; + } + } catch (error) { + // Continue to next candidate + continue; + } + } + + return null; + } + + /** + * Check if all required files exist + */ + checkRequiredFiles() { + const requiredFiles = [ + 'whisper/transcriber_for_nodejs.py', + 'whisper/standalone_model.py', + 'whisper/standalone_whisper.py', + 'whisper/config.yaml', + 'whisper/mel_filters.npz', + 'whisper/models/WhisperEncoder.onnx', + 'whisper/models/WhisperDecoder.onnx' + ]; + + const workingDir = path.join(__dirname, '..', '..'); + + for (const file of requiredFiles) { + const filePath = path.join(workingDir, file); + if (!fs.existsSync(filePath)) { + throw new Error(`Required file not found: ${file}`); + } + } + + console.log('All required Whisper files found'); + } + + /** + * Setup handlers for the transcription process + */ + setupProcessHandlers() { + if (!this.transcriptionProcess) return; + + // Handle stdout (JSON transcription data) + this.transcriptionProcess.stdout.on('data', (data) => { + const lines = data.toString().split('\n').filter(line => line.trim()); + + for (const line of lines) { + try { + const parsed = JSON.parse(line); + this.handleTranscriptionData(parsed); + } catch (error) { + // Non-JSON output, treat as plain text + console.log('Whisper output:', line); + } + } + }); + + // Handle stderr + this.transcriptionProcess.stderr.on('data', (data) => { + console.error('Whisper error:', data.toString()); + if (this.onErrorCallback) { + this.onErrorCallback(data.toString()); + } + }); + + // Handle process exit + this.transcriptionProcess.on('close', (code) => { + console.log(`Whisper process exited with code ${code}`); + this.isRunning = false; + this.transcriptionProcess = null; + }); + + // Handle process errors + this.transcriptionProcess.on('error', (error) => { + console.error('Whisper process error:', error); + this.isRunning = false; + if (this.onErrorCallback) { + this.onErrorCallback(error.message); + } + }); + } + + /** + * Handle transcription data from the Python process + */ + handleTranscriptionData(data) { + // Add to buffer + this.transcriptBuffer.push(data); + + // Limit buffer size to prevent memory issues + if (this.transcriptBuffer.length > 1000) { + this.transcriptBuffer = this.transcriptBuffer.slice(-800); + } + + // Call appropriate callback based on data type + switch (data.type) { + case 'transcript': + console.log(`[${data.timestamp}] Transcript: ${data.text}`); + if (this.onTranscriptCallback) { + // Include transcript file path in the callback data + const callbackData = { + ...data, + transcriptFile: data.transcriptFile + }; + this.onTranscriptCallback(callbackData); + } + break; + + case 'status': + console.log(`[${data.timestamp}] Status: ${data.message}`); + if (this.onStatusCallback) { + this.onStatusCallback(data); + } + break; + + case 'error': + console.error(`[${data.timestamp}] Error: ${data.error}`); + if (this.onErrorCallback) { + this.onErrorCallback(data.error); + } + break; + + default: + console.log('Unknown data type:', data); + } + } + + /** + * Check if the service is running + */ + isServiceRunning() { + return this.isRunning && this.transcriptionProcess && !this.transcriptionProcess.killed; + } +} + +module.exports = WhisperService; \ No newline at end of file diff --git a/whisper/.claude/settings.local.json b/whisper/.claude/settings.local.json new file mode 100644 index 0000000..92a1cce --- /dev/null +++ b/whisper/.claude/settings.local.json @@ -0,0 +1,9 @@ +{ + "permissions": { + "allow": [ + "Read(//c/Users/qc_de/TestSCRUM/scrumAI/src/electron/**)" + ], + "deny": [], + "ask": [] + } +} \ No newline at end of file diff --git a/whisper/LiveTranscriber_standalone.py b/whisper/LiveTranscriber_standalone.py new file mode 100644 index 0000000..83a815b --- /dev/null +++ b/whisper/LiveTranscriber_standalone.py @@ -0,0 +1,251 @@ +import numpy as np +import os +import queue +import sounddevice as sd +import sys +import threading +import yaml +import traceback + +from concurrent.futures import ThreadPoolExecutor + +# Add src directory to path for imports +current_dir = os.path.dirname(os.path.abspath(__file__)) +if current_dir not in sys.path: + sys.path.insert(0, current_dir) + +# Handle imports for both direct Python execution and PyInstaller +try: + from standalone_model import StandaloneWhisperModel +except ImportError: + from .standalone_model import StandaloneWhisperModel + + +def flush_output(): + """Force flush stdout and stderr for better console output in executables""" + sys.stdout.flush() + sys.stderr.flush() + + +def process_transcription( + whisper_model: StandaloneWhisperModel, + chunk: np.ndarray, + silence_threshold: float, + sample_rate: int +) -> None: + """ + Process a chunk of audio data and transcribe it using the Whisper model. + This function is run in a separate thread to allow for concurrent processing. + """ + + try: + if np.abs(chunk).mean() > silence_threshold: + transcript = whisper_model.transcribe(chunk, sample_rate) + if transcript.strip(): + print(f"Transcript: {transcript}") + flush_output() + except Exception as e: + print(f"[ERROR] Error in transcription: {e}") + traceback.print_exc() + flush_output() + + +def process_audio( + whisper_model: StandaloneWhisperModel, + audio_queue: queue.Queue, + stop_event: threading.Event, + max_workers: int, + queue_timeout: float, + chunk_samples: int, + silence_threshold: float, + sample_rate: int +) -> None: + """ + Process audio data from the queue and transcribe it using the Whisper model. + """ + + buffer = np.empty((0,), dtype=np.float32) + + with ThreadPoolExecutor(max_workers=max_workers) as executor: + futures = [] + + while not stop_event.is_set(): + try: + audio_chunk = audio_queue.get(timeout=queue_timeout) + audio_chunk = audio_chunk.flatten() + buffer = np.concatenate([buffer, audio_chunk]) + + while len(buffer) >= chunk_samples: + current_chunk = buffer[:chunk_samples] + buffer = buffer[chunk_samples:] + + future = executor.submit( + process_transcription, + whisper_model, + current_chunk, + silence_threshold, + sample_rate + ) + futures = [f for f in futures if not f.done()] + [future] + + except queue.Empty: + continue + except Exception as e: + print(f"[ERROR] Error in audio processing: {e}") + traceback.print_exc() + flush_output() + + # Wait for transcription futures to complete + for future in futures: + try: + future.result() + except Exception as e: + print(f"[ERROR] Error in future result: {e}") + flush_output() + + +def record_audio( + audio_queue: queue.Queue, + stop_event: threading.Event, + sample_rate: int, + channels: int +) -> None: + """ + Record audio from the microphone and put it into the audio queue. + """ + + def audio_callback(indata, frames, time, status): + """Callback function for audio input stream.""" + if not stop_event.is_set(): + audio_queue.put(indata.copy()) + + try: + with sd.InputStream( + samplerate=sample_rate, + channels=channels, + callback=audio_callback + ): + print("[AUDIO] Microphone stream initialized... (Press Ctrl+C to stop)") + print("=" * 50) + flush_output() + stop_event.wait() + except Exception as e: + print(f"[ERROR] Error in audio recording: {e}") + traceback.print_exc() + flush_output() + + +class StandaloneLiveTranscriber: + def __init__(self): + print("[INIT] Starting Standalone Whisper Transcription") + flush_output() + + try: + with open("config.yaml", "r") as f: + config = yaml.safe_load(f) + + print("[CONFIG] Configuration loaded successfully") + flush_output() + + # audio settings + self.sample_rate = config.get("sample_rate", 16000) + self.chunk_duration = config.get("chunk_duration", 4) + self.channels = config.get("channels", 1) + + # processing settings + self.max_workers = config.get("max_workers", 4) + self.silence_threshold = config.get("silence_threshold", 0.001) + self.queue_timeout = config.get("queue_timeout", 1.0) + self.chunk_samples = int(self.sample_rate * self.chunk_duration) + + # model paths + self.encoder_path = config.get("encoder_path", "models/WhisperEncoder.onnx") + self.decoder_path = config.get("decoder_path", "models/WhisperDecoder.onnx") + + # check that the model paths exist + if not os.path.exists(self.encoder_path): + print(f"[ERROR] Encoder model not found at {self.encoder_path}") + flush_output() + sys.exit(f"Encoder model not found at {self.encoder_path}.") + + if not os.path.exists(self.decoder_path): + print(f"[ERROR] Decoder model not found at {self.decoder_path}") + flush_output() + sys.exit(f"Decoder model not found at {self.decoder_path}.") + + print("[FILES] Model files found") + flush_output() + + # initialize the model + print("🤖 Loading Standalone Whisper model...") + flush_output() + + self.model = StandaloneWhisperModel(self.encoder_path, self.decoder_path) + + print("[MODEL] Model loaded successfully!") + flush_output() + + # initialize the audio queue and stop event + self.audio_queue = queue.Queue() + self.stop_event = threading.Event() + + except Exception as e: + print(f"[ERROR] Error during initialization: {e}") + traceback.print_exc() + flush_output() + sys.exit(1) + + def run(self): + """Run the live transcription.""" + + try: + # launch the audio processing and recording threads + process_thread = threading.Thread( + target=process_audio, + args=( + self.model, + self.audio_queue, + self.stop_event, + self.max_workers, + self.queue_timeout, + self.chunk_samples, + self.silence_threshold, + self.sample_rate + ) + ) + process_thread.start() + + record_thread = threading.Thread( + target=record_audio, + args=( + self.audio_queue, + self.stop_event, + self.sample_rate, + self.channels + ) + ) + record_thread.start() + + # wait for threads to finish + try: + while True: + record_thread.join(timeout=0.1) + if not record_thread.is_alive(): + break + except KeyboardInterrupt: + print("\nStopping transcription...") + flush_output() + finally: + self.stop_event.set() + record_thread.join() + process_thread.join() + + except Exception as e: + print(f"[ERROR] Error during execution: {e}") + traceback.print_exc() + flush_output() + + +if __name__ == "__main__": + transcriber = StandaloneLiveTranscriber() + transcriber.run() diff --git a/whisper/__pycache__/standalone_model.cpython-313.pyc b/whisper/__pycache__/standalone_model.cpython-313.pyc new file mode 100644 index 0000000..73105f5 Binary files /dev/null and b/whisper/__pycache__/standalone_model.cpython-313.pyc differ diff --git a/whisper/__pycache__/standalone_whisper.cpython-313.pyc b/whisper/__pycache__/standalone_whisper.cpython-313.pyc new file mode 100644 index 0000000..0c7a1a3 Binary files /dev/null and b/whisper/__pycache__/standalone_whisper.cpython-313.pyc differ diff --git a/whisper/anythingLLM/auth_utils.py b/whisper/anythingLLM/auth_utils.py new file mode 100644 index 0000000..bade62c --- /dev/null +++ b/whisper/anythingLLM/auth_utils.py @@ -0,0 +1,64 @@ +import requests +import yaml +from pathlib import Path + +def auth(api_key: str, base_url: str) -> dict: + """ + Confirms the auth token is valid + + Returns: + dict: {"success": bool, "message": str, "data": dict} + """ + auth_url = base_url + "/auth" + + headers = { + "accept": "application/json", + "Authorization": "Bearer " + api_key + } + + try: + auth_response = requests.get(auth_url, headers=headers) + + if auth_response.status_code == 200: + return { + "success": True, + "message": "Authentication successful", + "data": auth_response.json() + } + else: + return { + "success": False, + "message": f"Authentication failed with status {auth_response.status_code}", + "data": auth_response.json() if auth_response.text else {} + } + except Exception as e: + return { + "success": False, + "message": f"Authentication error: {str(e)}", + "data": {} + } + +def test_auth(config_path=None): + """Test authentication with config file""" + if config_path is None: + config_path = Path(__file__).parent / "config.yaml" + + try: + with open(config_path, "r") as file: + config = yaml.safe_load(file) + + api_key = config["api_key"] + base_url = config["model_server_base_url"] + + return auth(api_key, base_url) + + except Exception as e: + return { + "success": False, + "message": f"Config error: {str(e)}", + "data": {} + } + +if __name__ == "__main__": + result = test_auth() + print(result) \ No newline at end of file diff --git a/whisper/anythingLLM/chatbot_client.py b/whisper/anythingLLM/chatbot_client.py new file mode 100644 index 0000000..bf13387 --- /dev/null +++ b/whisper/anythingLLM/chatbot_client.py @@ -0,0 +1,260 @@ +import requests +import yaml +import json +import sys +import asyncio +import httpx +from pathlib import Path + +class ChatbotClient: + def __init__(self, config_path=None): + if config_path is None: + config_path = Path(__file__).parent / "config.yaml" + + with open(config_path, "r") as file: + config = yaml.safe_load(file) + + self.api_key = config["api_key"] + self.base_url = config["model_server_base_url"] + self.stream = config["stream"] + self.stream_timeout = config["stream_timeout"] + self.workspace_slug = config["workspace_slug"] + + if self.stream: + self.chat_url = f"{self.base_url}/v1/workspace/{self.workspace_slug}/stream-chat" + else: + self.chat_url = f"{self.base_url}/v1/workspace/{self.workspace_slug}/chat" + + self.headers = { + "accept": "application/json", + "Content-Type": "application/json", + "Authorization": "Bearer " + self.api_key + } + + # Live transcript file path for immediate context + self.live_transcript_file = None + self.max_context_chars = 8000 # Limit context to prevent token overflow + + def set_live_transcript_file(self, file_path: str): + """Set the live transcript file path for immediate context""" + self.live_transcript_file = file_path + + def _get_live_context(self) -> str: + """Read live transcript file for immediate context""" + if not self.live_transcript_file: + return "" + + try: + with open(self.live_transcript_file, 'r', encoding='utf-8') as f: + content = f.read() + + # Limit context size to prevent token overflow + if len(content) > self.max_context_chars: + lines = content.split('\n') + # Keep the header and recent lines + header_lines = [] + content_lines = [] + in_header = True + + for line in lines: + if in_header and (line.startswith('=') or not line.strip()): + header_lines.append(line) + if line.startswith('='): + in_header = False + else: + content_lines.append(line) + + # Take recent transcript lines that fit within limit + header_text = '\n'.join(header_lines) + remaining_chars = self.max_context_chars - len(header_text) + + recent_lines = [] + char_count = 0 + for line in reversed(content_lines): + if char_count + len(line) + 1 <= remaining_chars: + recent_lines.insert(0, line) + char_count += len(line) + 1 + else: + break + + content = header_text + '\n' + '\n'.join(recent_lines) + + return content + except Exception as e: + print(f"Error reading live transcript: {e}") + return "" + + def chat(self, message: str) -> str: + """ + Send a chat request in non-streaming mode. + Uses live transcript for immediate context. + """ + # Get live context + live_context = self._get_live_context() + + # Enhance message with context if available + if live_context.strip(): + enhanced_message = f"""Meeting Context (Live Transcript): +{live_context} + +User Question: {message}""" + else: + enhanced_message = message + + data = { + "message": enhanced_message, + "mode": "query" + } + try: + chat_response = requests.post( + self.chat_url, + headers=self.headers, + json=data + ) + response_text = chat_response.text.strip() + + # Handle streaming response format + if response_text.startswith('data: '): + response_text = response_text[6:].strip() + + response_data = json.loads(response_text) + return response_data.get('textResponse', str(response_data)) + except ValueError: + return f"Response is not valid JSON. Raw response: {chat_response.text if 'chat_response' in locals() else 'No response'}" + except Exception as e: + return f"Chat request failed. Error: {e}" + + def chat_with_rag(self, message: str) -> str: + """ + Send a chat request using only AnythingLLM's RAG capabilities. + This uses embedded documents for semantic search. + """ + data = { + "message": message, + "mode": "query" + } + try: + chat_response = requests.post( + self.chat_url, + headers=self.headers, + json=data + ) + response_text = chat_response.text.strip() + + # Handle streaming response format + if response_text.startswith('data: '): + response_text = response_text[6:].strip() + + response_data = json.loads(response_text) + return response_data.get('textResponse', str(response_data)) + except ValueError: + return f"Response is not valid JSON. Raw response: {chat_response.text if 'chat_response' in locals() else 'No response'}" + except Exception as e: + return f"Chat request failed. Error: {e}" + + def streaming_chat(self, message: str): + """ + Generator for streaming chat responses + """ + data = { + "message": message, + "mode": "query" + } + + response_text = "" + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + + async def async_stream(): + buffer = "" + try: + async with httpx.AsyncClient(timeout=self.stream_timeout) as client: + async with client.stream("POST", self.chat_url, headers=self.headers, json=data) as response: + async for chunk in response.aiter_text(): + if chunk: + buffer += chunk + while "\n" in buffer: + line, buffer = buffer.split("\n", 1) + if line.startswith("data: "): + line = line[len("data: "):] + try: + parsed_chunk = json.loads(line.strip()) + yield parsed_chunk.get("textResponse", "") + except json.JSONDecodeError: + continue + except Exception as e: + yield f"Error processing chunk: {e}" + except httpx.RequestError as e: + yield f"Streaming chat request failed. Error: {e}" + + agen = async_stream() + try: + while True: + chunk = loop.run_until_complete(agen.__anext__()) + response_text += chunk + yield response_text + except StopAsyncIteration: + pass + finally: + loop.close() + yield response_text + +def main(): + """CLI interface for testing""" + client = ChatbotClient() + + try: + while True: + line = input().strip() + if not line: + continue + + if line.lower() == 'quit': + break + + # Parse JSON input + try: + data = json.loads(line) + command = data.get('command') + message = data.get('message') + transcript_file = data.get('transcript_file') + + # Set live transcript file if provided + if transcript_file: + client.set_live_transcript_file(transcript_file) + + if command == 'chat': + response = client.chat(message) + print(json.dumps({"type": "response", "data": response})) + sys.stdout.flush() + + elif command == 'chat_rag': + response = client.chat_with_rag(message) + print(json.dumps({"type": "response", "data": response})) + sys.stdout.flush() + + elif command == 'stream': + print(json.dumps({"type": "stream_start"})) + sys.stdout.flush() + + for chunk in client.streaming_chat(message): + print(json.dumps({"type": "stream_chunk", "data": chunk})) + sys.stdout.flush() + + print(json.dumps({"type": "stream_end"})) + sys.stdout.flush() + + except json.JSONDecodeError: + print(json.dumps({"type": "error", "data": "Invalid JSON input"})) + sys.stdout.flush() + except Exception as e: + print(json.dumps({"type": "error", "data": str(e)})) + sys.stdout.flush() + + except KeyboardInterrupt: + pass + except EOFError: + pass + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/whisper/anythingLLM/config.yaml b/whisper/anythingLLM/config.yaml new file mode 100644 index 0000000..1adcc3a --- /dev/null +++ b/whisper/anythingLLM/config.yaml @@ -0,0 +1,8 @@ +# AnythingLLM Configuration +# Copy this from your AnythingLLM build and update with your settings + +api_key: "EVKQEJK-81B40VM-KXZTMPV-GSJQFWT" +model_server_base_url: "http://localhost:3001/api" +workspace_slug: "testing123" +stream: true +stream_timeout: 30 \ No newline at end of file diff --git a/whisper/anythingLLM/workspace_utils.py b/whisper/anythingLLM/workspace_utils.py new file mode 100644 index 0000000..0cc4c17 --- /dev/null +++ b/whisper/anythingLLM/workspace_utils.py @@ -0,0 +1,65 @@ +import requests +import yaml +from pathlib import Path + +def workspaces(api_key: str, base_url: str) -> dict: + """ + Get available workspaces info + + Returns: + dict: {"success": bool, "message": str, "data": list} + """ + workspaces_url = base_url + "/workspaces" + + headers = { + "accept": "application/json", + "Content-Type": "application/json", + "Authorization": "Bearer " + api_key + } + + try: + workspaces_response = requests.get(workspaces_url, headers=headers) + + if workspaces_response.status_code == 200: + return { + "success": True, + "message": "Workspaces retrieved successfully", + "data": workspaces_response.json() + } + else: + return { + "success": False, + "message": f"Failed to get workspaces with status {workspaces_response.status_code}", + "data": workspaces_response.json() if workspaces_response.text else {} + } + except Exception as e: + return { + "success": False, + "message": f"Workspaces error: {str(e)}", + "data": {} + } + +def get_workspaces(config_path=None): + """Get workspaces with config file""" + if config_path is None: + config_path = Path(__file__).parent / "config.yaml" + + try: + with open(config_path, "r") as file: + config = yaml.safe_load(file) + + api_key = config["api_key"] + base_url = config["model_server_base_url"] + + return workspaces(api_key, base_url) + + except Exception as e: + return { + "success": False, + "message": f"Config error: {str(e)}", + "data": {} + } + +if __name__ == "__main__": + result = get_workspaces() + print(result) \ No newline at end of file diff --git a/whisper/config.yaml b/whisper/config.yaml new file mode 100644 index 0000000..61e5f8d --- /dev/null +++ b/whisper/config.yaml @@ -0,0 +1,13 @@ +# audio settings +sample_rate: 16000 # Audio sample rate in Hz +chunk_duration: 4 # Duration of each audio chunk in seconds +channels: 1 # Number of audio channels (1 for mono) + +# processing settings +max_workers: 4 # Number of parallel transcription workers +silence_threshold: 0.001 # Threshold for silence detection +queue_timeout: 1.0 # Timeout for audio queue operations + +# model paths +encoder_path: "models/WhisperEncoder.onnx" +decoder_path: "models/WhisperDecoder.onnx" \ No newline at end of file diff --git a/whisper/meeting_transcriber.py b/whisper/meeting_transcriber.py new file mode 100644 index 0000000..6cdfaed --- /dev/null +++ b/whisper/meeting_transcriber.py @@ -0,0 +1,451 @@ +import numpy as np +import os +import queue +import sys +import threading +import time +import yaml +import requests +import json +from datetime import datetime +from concurrent.futures import ThreadPoolExecutor +from pathlib import Path +from dataclasses import dataclass +from typing import List, Optional, Callable + +# Add chatbot path for AnythingLLM client +chatbot_path = Path("C:/Users/qc_de/local/simple-npu-chatbot/src") +if str(chatbot_path) not in sys.path: + sys.path.insert(0, str(chatbot_path)) + +# Import existing components +from standalone_model import StandaloneWhisperModel + + +@dataclass +class TranscriptSegment: + """A segment of transcribed text with timestamp""" + text: str + timestamp: datetime + confidence: float = 0.0 + + +class AnythingLLMClient: + """Client for AnythingLLM API with document upload capabilities""" + + def __init__(self, config_path: str = None): + config_path = config_path or str(chatbot_path / "config.yaml") + + with open(config_path, "r") as f: + config = yaml.safe_load(f) + + self.api_key = config["api_key"] + self.base_url = config["model_server_base_url"] + self.workspace_slug = config["workspace_slug"] + + self.headers = { + "accept": "application/json", + "Authorization": f"Bearer {self.api_key}" + } + + self.chat_headers = { + **self.headers, + "Content-Type": "application/json" + } + + print(f"Connected to AnythingLLM workspace: {self.workspace_slug}") + + def upload_transcript_document(self, content: str, filename: str) -> bool: + """Upload transcript content as a document to AnythingLLM""" + try: + # Create temporary file for upload + temp_file_path = f"temp_{filename}.txt" + with open(temp_file_path, 'w', encoding='utf-8') as f: + f.write(content) + + # Upload file to AnythingLLM + upload_url = f"{self.base_url}/document/upload" + + with open(temp_file_path, 'rb') as f: + files = {'file': (filename + '.txt', f, 'text/plain')} + response = requests.post(upload_url, headers=self.headers, files=files) + + # Clean up temp file + os.remove(temp_file_path) + + if response.status_code == 200: + result = response.json() + print(f"[UPLOAD] Document uploaded: {filename}") + + # Add document to workspace + self.add_document_to_workspace(result.get('location')) + return True + else: + print(f"[ERROR] Upload failed: {response.status_code}") + return False + + except Exception as e: + print(f"[ERROR] Document upload error: {e}") + return False + + def add_document_to_workspace(self, document_location: str) -> bool: + """Add an uploaded document to the workspace""" + try: + add_url = f"{self.base_url}/workspace/{self.workspace_slug}/update-embeddings" + + data = { + "adds": [document_location] + } + + response = requests.post(add_url, headers=self.chat_headers, json=data) + + if response.status_code == 200: + print(f"[SUCCESS] Document added to workspace") + return True + else: + print(f"[ERROR] Failed to add to workspace: {response.status_code}") + return False + + except Exception as e: + print(f"[ERROR] Add to workspace error: {e}") + return False + + def chat(self, message: str, session_id: str = None) -> str: + """Send a chat message to AnythingLLM""" + try: + chat_url = f"{self.base_url}/workspace/{self.workspace_slug}/chat" + + data = { + "message": message, + "mode": "chat", + "sessionId": session_id or f"meeting-{int(time.time())}", + "attachments": [] + } + + response = requests.post(chat_url, headers=self.chat_headers, json=data) + + if response.status_code == 200: + return response.json().get('textResponse', 'No response received') + else: + return f"Error: HTTP {response.status_code}" + + except Exception as e: + return f"Chat error: {e}" + + +class MeetingTranscriber: + """Main class for live meeting transcription with AnythingLLM integration""" + + def __init__(self, whisper_config_path: str = "config.yaml", llm_config_path: str = None): + self.session_id = f"meeting_{datetime.now().strftime('%Y%m%d_%H%M%S')}" + + # Load whisper configuration + self._load_whisper_config(whisper_config_path) + + # Initialize components + self.whisper_model = StandaloneWhisperModel(self.encoder_path, self.decoder_path) + self.llm_client = AnythingLLMClient(llm_config_path) + + # Audio processing + self.audio_queue = queue.Queue() + self.stop_event = threading.Event() + + # Transcript management + self.transcript_segments: List[TranscriptSegment] = [] + self.transcript_buffer = [] + self.buffer_size = 10 # Segments to buffer before uploading + + # Live transcript file for real-time access + self.live_transcript_file = f"../meetingnotes_{datetime.now().strftime('%Y-%m-%dT%H-%M-%S-%f')[:-3]}Z.txt" + self.transcript_lock = threading.Lock() + + # Initialize live transcript file + with open(self.live_transcript_file, 'w', encoding='utf-8') as f: + f.write(f"Meeting Session: {self.session_id}\n") + f.write(f"Started: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n") + f.write("=" * 60 + "\n\n") + + # Callbacks + self.on_transcript_callback: Optional[Callable[[str], None]] = None + + print(f"[INIT] Meeting Transcriber initialized (Session: {self.session_id})") + print(f"[TRANSCRIPT] Live transcript: {self.live_transcript_file}") + + def _load_whisper_config(self, config_path: str): + """Load whisper configuration""" + with open(config_path, "r") as f: + config = yaml.safe_load(f) + + self.sample_rate = config.get("sample_rate", 16000) + self.chunk_duration = config.get("chunk_duration", 4) + self.channels = config.get("channels", 1) + self.max_workers = config.get("max_workers", 4) + self.silence_threshold = config.get("silence_threshold", 0.001) + self.queue_timeout = config.get("queue_timeout", 1.0) + self.chunk_samples = int(self.sample_rate * self.chunk_duration) + + self.encoder_path = config.get("encoder_path", "models/WhisperEncoder.onnx") + self.decoder_path = config.get("decoder_path", "models/WhisperDecoder.onnx") + + # Verify model files + for path in [self.encoder_path, self.decoder_path]: + if not os.path.exists(path): + raise FileNotFoundError(f"Model file not found: {path}") + + def set_transcript_callback(self, callback: Callable[[str], None]): + """Set callback for real-time transcript updates""" + self.on_transcript_callback = callback + + def start_meeting(self): + """Start the meeting transcription""" + print("[START] Starting meeting transcription...") + + # Start threads + record_thread = threading.Thread(target=self._record_audio, daemon=True) + process_thread = threading.Thread(target=self._process_audio, daemon=True) + + record_thread.start() + process_thread.start() + + return record_thread, process_thread + + def stop_meeting(self): + """Stop transcription and finalize meeting""" + print("[STOP] Stopping meeting...") + self.stop_event.set() + + # Upload any remaining transcript buffer + if self.transcript_buffer: + self._upload_transcript_batch() + + # Upload full meeting transcript + self._upload_final_transcript() + + def ask_question(self, question: str) -> str: + """Ask a question about the meeting using AnythingLLM's RAG""" + return self.llm_client.chat(question, self.session_id) + + def generate_meeting_notes(self) -> str: + """Generate meeting notes using AnythingLLM""" + prompt = """ + Based on our meeting conversation, please generate comprehensive meeting notes including: + 1. Key discussion points and topics covered + 2. Important decisions made + 3. Action items and next steps + 4. Main takeaways and conclusions + + Format the response as structured meeting notes. + """ + return self.llm_client.chat(prompt, self.session_id) + + def _record_audio(self): + """Record audio from microphone""" + import sounddevice as sd + + def audio_callback(indata, frames, time, status): + if not self.stop_event.is_set(): + self.audio_queue.put(indata.copy()) + + try: + with sd.InputStream( + samplerate=self.sample_rate, + channels=self.channels, + callback=audio_callback + ): + print("[AUDIO] Microphone active - recording started") + self.stop_event.wait() + except Exception as e: + print(f"[ERROR] Audio recording error: {e}") + + def _process_audio(self): + """Process audio chunks and generate transcripts""" + buffer = np.empty((0,), dtype=np.float32) + + with ThreadPoolExecutor(max_workers=self.max_workers) as executor: + while not self.stop_event.is_set(): + try: + audio_chunk = self.audio_queue.get(timeout=self.queue_timeout) + audio_chunk = audio_chunk.flatten() + buffer = np.concatenate([buffer, audio_chunk]) + + while len(buffer) >= self.chunk_samples: + current_chunk = buffer[:self.chunk_samples] + buffer = buffer[self.chunk_samples:] + + executor.submit(self._transcribe_chunk, current_chunk) + + except queue.Empty: + continue + except Exception as e: + print(f"[ERROR] Audio processing error: {e}") + + def _transcribe_chunk(self, audio_chunk: np.ndarray): + """Transcribe a single audio chunk""" + try: + if np.abs(audio_chunk).mean() > self.silence_threshold: + transcript_text = self.whisper_model.transcribe(audio_chunk, self.sample_rate) + + if transcript_text.strip(): + segment = TranscriptSegment( + text=transcript_text.strip(), + timestamp=datetime.now() + ) + + # Add to segments and buffer + self.transcript_segments.append(segment) + self.transcript_buffer.append(segment) + + # Write to live transcript file immediately + self._write_to_live_transcript(segment) + + # Output for Node.js integration + timestamp_str = segment.timestamp.strftime('%H:%M:%S') + transcript_data = { + "timestamp": timestamp_str, + "text": segment.text, + "type": "transcript", + "transcriptFile": os.path.abspath(self.live_transcript_file) + } + + # Output JSON for Node.js + print(json.dumps(transcript_data)) + sys.stdout.flush() + + # Call callback + if self.on_transcript_callback: + self.on_transcript_callback(f"[{timestamp_str}]: {segment.text}") + + # Upload batch if buffer is full + if len(self.transcript_buffer) >= self.buffer_size: + self._upload_transcript_batch() + + except Exception as e: + error_data = { + "type": "error", + "error": str(e), + "timestamp": datetime.now().strftime('%H:%M:%S') + } + print(json.dumps(error_data)) + sys.stdout.flush() + + def _write_to_live_transcript(self, segment: TranscriptSegment): + """Write segment to live transcript file immediately""" + try: + with self.transcript_lock: + with open(self.live_transcript_file, 'a', encoding='utf-8') as f: + timestamp_str = segment.timestamp.strftime('%H:%M:%S') + f.write(f"[{timestamp_str}]: {segment.text}\n") + f.flush() # Ensure immediate write to disk + except Exception as e: + print(f"[ERROR] Live transcript write error: {e}") + + def get_live_transcript_path(self): + """Get the path to the live transcript file""" + return self.live_transcript_file + + def _upload_transcript_batch(self): + """Upload current transcript buffer to AnythingLLM""" + if not self.transcript_buffer: + return + + try: + # Create batch content + batch_content = f"Meeting Transcript Batch - {datetime.now().strftime('%H:%M:%S')}\n" + batch_content += "=" * 60 + "\n\n" + + for segment in self.transcript_buffer: + timestamp_str = segment.timestamp.strftime('%H:%M:%S') + batch_content += f"[{timestamp_str}]: {segment.text}\n" + + # Upload to AnythingLLM + batch_filename = f"{self.session_id}_batch_{len(self.transcript_segments)}" + success = self.llm_client.upload_transcript_document(batch_content, batch_filename) + + if success: + print(f"[UPLOAD] Uploaded batch with {len(self.transcript_buffer)} segments") + + # Clear buffer + self.transcript_buffer = [] + + except Exception as e: + print(f"[ERROR] Batch upload error: {e}") + + def _upload_final_transcript(self): + """Upload complete meeting transcript""" + if not self.transcript_segments: + return + + try: + # Create full transcript + full_content = f"Complete Meeting Transcript - {self.session_id}\n" + full_content += f"Meeting Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n" + full_content += f"Duration: {len(self.transcript_segments)} segments\n" + full_content += "=" * 80 + "\n\n" + + for segment in self.transcript_segments: + timestamp_str = segment.timestamp.strftime('%H:%M:%S') + full_content += f"[{timestamp_str}]: {segment.text}\n" + + # Upload final transcript + filename = f"{self.session_id}_complete_transcript" + success = self.llm_client.upload_transcript_document(full_content, filename) + + if success: + print(f"[UPLOAD] Final transcript uploaded with {len(self.transcript_segments)} segments") + + except Exception as e: + print(f"[ERROR] Final transcript upload error: {e}") + + +def main(): + """Main function to run the meeting transcriber""" + transcriber = MeetingTranscriber() + + try: + # Start meeting + threads = transcriber.start_meeting() + + print("\n" + "="*70) + print("[ACTIVE] ANYTHINGLLM MEETING TRANSCRIBER ACTIVE") + print(" • Live transcription streaming to your workspace") + print(" • RAG-powered context available for questions") + print(" • Type 'ask: