calmmage
diff --git a/‎.github/workflows/main.yml
+2-1 b/‎.github/workflows/main.yml
+2-1
diff --git a/‎.gitignore
+1-1 b/‎.gitignore
+1-1
diff --git a/‎dev/michael_example.py ‎archive/dev/michael_example.py
+1-2 b/‎dev/michael_example.py ‎archive/dev/michael_example.py
+1-2
diff --git a/‎archive/dev/todo.md
+55 b/‎archive/dev/todo.md
+55
diff --git a/‎dev/work_with_audio.ipynb ‎archive/dev/work_with_audio.ipynb b/‎dev/work_with_audio.ipynb ‎archive/dev/work_with_audio.ipynb
diff --git a/‎archive/useless/run.py
+16 b/‎archive/useless/run.py
+16
diff --git a/‎tests/test_app.py ‎archive/useless/tests/test_app.py b/‎tests/test_app.py ‎archive/useless/tests/test_app.py
diff --git a/‎tests/test_bot.py ‎archive/useless/tests/test_bot.py b/‎tests/test_bot.py ‎archive/useless/tests/test_bot.py
diff --git a/‎archive/whisper_bot/__init__.py b/‎archive/whisper_bot/__init__.py
diff --git a/‎whisper_bot/core/__init__.py ‎archive/whisper_bot/core/__init__.py
+1-1 b/‎whisper_bot/core/__init__.py ‎archive/whisper_bot/core/__init__.py
+1-1
diff --git a/‎whisper_bot/core/app.py ‎archive/whisper_bot/core/app.py
+4-3 b/‎whisper_bot/core/app.py ‎archive/whisper_bot/core/app.py
+4-3
diff --git a/‎whisper_bot/core/app_config.py ‎archive/whisper_bot/core/app_config.py b/‎whisper_bot/core/app_config.py ‎archive/whisper_bot/core/app_config.py
diff --git a/‎whisper_bot/core/telegram_bot.py ‎archive/whisper_bot/core/telegram_bot.py
+43-25 b/‎whisper_bot/core/telegram_bot.py ‎archive/whisper_bot/core/telegram_bot.py
+43-25
diff --git a/‎whisper_bot/utils/text_utils.py ‎archive/whisper_bot/utils/text_utils.py
+2-3 b/‎whisper_bot/utils/text_utils.py ‎archive/whisper_bot/utils/text_utils.py
+2-3
@@ -15,13 +15,14 @@ jobs:
         #         3.7 doesn't support f"{a=}" syntax
         #        python-version: [ "3.8", "3.9", "3.10", "3.11" ]
         #         don't really need anything older than 3.10
-        python-version: [ "3.10", "3.11" ]
+        python-version: [ "3.12" ]
     steps:
       - uses: actions/checkout@v3
       - name: Set up Python ${{ matrix.python-version }}
         uses: actions/setup-python@v4
         with:
           python-version: ${{ matrix.python-version }}
+          # todo: install ... ffmpeg for tests, I guess
       - name: Install Poetry
         run: |
           curl -sSL https://install.python-poetry.org | python3 -
 
@@ -157,4 +157,4 @@ cython_debug/
 #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
 #  and can be added to the global gitignore or merged into this file.  For a more nuclear
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
-#.idea/
+.idea/
@@ -1,6 +1,5 @@
-import pickle
-
 import openai
+import pickle
 from loguru import logger
 from pydub import AudioSegment
 from pydub.silence import detect_silence
 
@@ -0,0 +1,55 @@
+# Section 1 - improving existing bot
+
+## feature 1.1 - improve logging
+
+- [ ] Delete the 'generating' message from telegram once finished
+- [ ] Track time for generation - add to log
+- [ ] Report progress and remaining time to telegram
+    - [ ] Can I have text-based tqdm?
+    - [ ] I can probably send updates every 5 seconds
+- [ ] For sequential mode report the text generated so far
+
+## feature 1.2 - add parallel processing
+
+- [ ] Report processing mode in the status message
+- [ ] Calculate expected processing time
+- [ ] use parallel mode in the bot
+
+## feature 1.3 - add config settings commands
+
+- [ ] s1 add a command to configure parallel mode
+- [ ] s1 add parallel mode to config class
+- [ ] s4 add automatic command generation for all config fields
+
+# Section 2 - developing new features
+
+## feature 2.1 - gpt summary
+
+- [ ] Generate punctuation with gpt
+
+# Section 3 - common features (for bot base)
+
+## feature 3.1 - gpt utils
+
+- [ ] extract gpt utils to separate file
+- [ ] extract gpt utils to a separate lib
+
+# Section 4 - unsorted
+
+## feature 0.2 - gpt fill/extract the info
+
+## feature 0.3 - gpt guess speakers?
+
+## feature 3 - add retries
+
+- On file download
+- On openai api
+    - Option 1: decorator
+    - Option 2: write a custom method
+
+## feature 5 -
+
+## feature 6 - add video support
+
+- [ ] detect and download video from telegram (add handler)
+- [ ] extract audio from video
@@ -0,0 +1,16 @@
+# run the app
+from bot_base.data_model.mongo_utils import connect_to_db
+from bot_base.utils.logging_utils import setup_logger
+from dotenv import load_dotenv
+from whisper_bot.core.app import WhisperApp
+
+if __name__ == "__main__":
+    load_dotenv()
+    # connect to db
+    connect_to_db()
+
+    # setup logger
+    setup_logger()
+
+    app = WhisperApp()
+    app.run()
@@ -1,3 +1,3 @@
+from .app import WhisperApp
 from .app_config import WhisperAppConfig
 from .telegram_bot import WhisperTelegramBot
-from .app import WhisperApp
@@ -3,15 +3,14 @@
 - Database for storing processed whispers and logs
 -
 """
-from functools import partial
-
 from bot_base.core import App
 from bot_base.utils.gpt_utils import (
     split_by_weight,
     get_token_count,
     amap_gpt_command,
     token_limit_by_model,
 )
+from functools import partial
 from whisper_bot.core import WhisperAppConfig
 from whisper_bot.core import WhisperTelegramBot
 from whisper_bot.core.app_config import WhisperTelegramBotConfig
@@ -37,7 +36,9 @@ async def merge_and_format_chunks(self, chunks):
         )
         # step 2: merge chunks in each group
         self.logger.info(f"Merging chunks in each group")
-        merged_groups = [merge_all_chunks(group, logger=self.logger) for group in groups]
+        merged_groups = [
+            merge_all_chunks(group, logger=self.logger) for group in groups
+        ]
         # step 3: format each group
         self.logger.info(f"Formatting each group")
         formatted_groups = await amap_gpt_command(
 
@@ -1,14 +1,15 @@
-from datetime import datetime
-from textwrap import dedent
-from typing import TYPE_CHECKING
-
 from aiogram import F
 from aiogram import types
-
 from bot_base.core import mark_command
 from bot_base.core.telegram_bot import TelegramBot
-from bot_base.utils.text_utils import DEFAULT_CHUNK_SIZE, DEFAULT_CHUNK_OVERLAP, \
-    split_text_with_overlap
+from bot_base.utils.text_utils import (
+    DEFAULT_CHUNK_SIZE,
+    DEFAULT_CHUNK_OVERLAP,
+    split_text_with_overlap,
+)
+from datetime import datetime
+from textwrap import dedent
+from typing import TYPE_CHECKING
 from whisper_bot.core.app_config import WhisperTelegramBotConfig
 from whisper_bot.utils.text_utils import (
     merge_all_chunks,
@@ -43,11 +44,14 @@ async def process_audio(self, message: types.Message):
         raw_transcript = "\n\n".join(chunks)
         self.logger.info(f"Raw transcript", data=raw_transcript)
         if self.config.send_raw_transcript:
-            filename = f"raw_transcript_{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.txt"
+            filename = (
+                f"raw_transcript_{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.txt"
+            )
             await self.send_safe(
-                chat_id=message.chat.id, text=raw_transcript,
+                chat_id=message.chat.id,
+                text=raw_transcript,
                 reply_to_message_id=message.message_id,
-                filename=filename
+                filename=filename,
             )
 
         # subsplit large chunks
@@ -58,8 +62,10 @@ async def process_audio(self, message: types.Message):
             self.logger.info("Transcript", data=transcript)
             filename = f"transcript_{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.txt"
             await self.send_safe(
-                chat_id=message.chat.id, text=transcript, reply_to_message_id=message.message_id,
-                filename=filename
+                chat_id=message.chat.id,
+                text=transcript,
+                reply_to_message_id=message.message_id,
+                filename=filename,
             )
 
         await placeholder.delete()
@@ -90,8 +96,9 @@ async def chat_message_handler(self, message: types.Message):
         return message_text
 
     @staticmethod
-    def subsplit_large_chunks(chunks, chunk_limit=DEFAULT_CHUNK_SIZE,
-                              overlap=DEFAULT_CHUNK_OVERLAP):
+    def subsplit_large_chunks(
+        chunks, chunk_limit=DEFAULT_CHUNK_SIZE, overlap=DEFAULT_CHUNK_OVERLAP
+    ):
         res_chunks = []
         for chunk in chunks:
             res_chunks += split_text_with_overlap(chunk, chunk_limit, overlap)
@@ -107,7 +114,8 @@ async def merge_chunks_command(self, message: types.Message):
         text = await self._extract_text_from_message(message)
 
         # remove command from text
-        if text.startswith("/"):  _, text = text.split(maxsplit=1)
+        if text.startswith("/"):
+            _, text = text.split(maxsplit=1)
 
         # step 2: split chunks
         chunks = text.split("\n\n")
@@ -122,8 +130,10 @@ async def merge_chunks_command(self, message: types.Message):
         # send back the result
         filename = f"merged_text_{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.txt"
         await self.send_safe(
-            chat_id=message.chat.id, text=result, reply_to_message_id=message.message_id,
-            filename=filename
+            chat_id=message.chat.id,
+            text=result,
+            reply_to_message_id=message.message_id,
+            filename=filename,
         )
 
     # ------------------------------------------------------------
@@ -141,7 +151,8 @@ async def format_text_command(self, message: types.Message):
         text = await self._extract_text_from_message(message)
 
         # remove command from text
-        if text.startswith("/"):  _, text = text.split(maxsplit=1)
+        if text.startswith("/"):
+            _, text = text.split(maxsplit=1)
 
         # format the text
         result = await format_text_with_gpt(
@@ -155,8 +166,10 @@ async def format_text_command(self, message: types.Message):
         # send back the result
         filename = f"formatted_text_{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.txt"
         await self.send_safe(
-            chat_id=message.chat.id, text=result, reply_to_message_id=message.message_id,
-            filename=filename
+            chat_id=message.chat.id,
+            text=result,
+            reply_to_message_id=message.message_id,
+            filename=filename,
         )
 
     @mark_command("fix_grammar")
@@ -167,7 +180,8 @@ async def fix_grammar_command(self, message: types.Message):
         text = await self._extract_text_from_message(message)
 
         # remove command from text
-        if text.startswith("/"):  _, text = text.split(maxsplit=1)
+        if text.startswith("/"):
+            _, text = text.split(maxsplit=1)
 
         # format the text
         result = await format_text_with_gpt(
@@ -181,8 +195,10 @@ async def fix_grammar_command(self, message: types.Message):
         # send back the result
         filename = f"formatted_text_{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.txt"
         await self.send_safe(
-            chat_id=message.chat.id, text=result, reply_to_message_id=message.message_id,
-            filename=filename
+            chat_id=message.chat.id,
+            text=result,
+            reply_to_message_id=message.message_id,
+            filename=filename,
         )
 
     # ------------------------------------------------------------
@@ -208,8 +224,10 @@ async def merge_and_format_command(self, message: types.Message):
         # send back the result
         filename = f"formatted_text_{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.txt"
         await self.send_safe(
-            chat_id=message.chat.id, text=result, reply_to_message_id=message.message_id,
-            filename=filename
+            chat_id=message.chat.id,
+            text=result,
+            reply_to_message_id=message.message_id,
+            filename=filename,
         )
 
     # ------------------------------------------------------------
 
@@ -1,12 +1,11 @@
 import loguru
-from difflib import SequenceMatcher
-from typing import Iterable
-
 from bot_base.utils.gpt_utils import (
     arun_command_with_gpt,
     token_limit_by_model,
     get_token_count,
 )
+from difflib import SequenceMatcher
+from typing import Iterable
 
 
 def normalize_text(text):