From 912e0167c149cb98cb657d97348d4263fb989415 Mon Sep 17 00:00:00 2001
From: jkroll <j-kroll@gmx.de>
Date: Mon, 23 Mar 2026 14:17:13 +0100
Subject: [PATCH] improve timestamp handling in transcripts

---
 prompt.txt                       | 4 ++--
 scripts/create_viral_segments.py | 7 +++----
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/prompt.txt b/prompt.txt
index cab3dad..0b3a6da 100644
--- a/prompt.txt
+++ b/prompt.txt
@@ -5,8 +5,8 @@ Your goal is to extract segments that have **Perfect Narrative Completeness** (S
 
 ### INPUT FORMAT EXPLAINED
 The transcript below is a continuous text stream with embedded **Time Tags** like `(12s)`.
-- Example: `"Hello world (0s). Today we are going to (3s) fly to the moon."`
-- These tags represent the approximate timestamp of the *preceding* text.
+- Example: `"(0s) Hello world. Today we are going to (3s) fly to the moon."`
+- These tags represent the timestamp of the word following the tag.
 - Use them to calculate duration. Duration = (End Tag - Start Tag).
 
 ### STRICT VIRAL RULES (The "ViralCutter Standard"):
diff --git a/scripts/create_viral_segments.py b/scripts/create_viral_segments.py
index cee04b2..d557398 100644
--- a/scripts/create_viral_segments.py
+++ b/scripts/create_viral_segments.py
@@ -181,7 +181,7 @@ def preprocess_transcript_for_ai(segments):
     
     # Try to start with (0s) based on first segment
     first_start = segments[0].get('start', 0)
-    full_text += f"({int(first_start)}s) "
+    full_text += f"({first_start:.2f}s) "
     last_tag_time = first_start
 
     for seg in segments:
@@ -190,9 +190,8 @@ def preprocess_transcript_for_ai(segments):
         
         full_text += text + " "
         
-        if end_time - last_tag_time >= 4:
-            full_text += f"({int(end_time)}s) "
-            last_tag_time = end_time
+        full_text += f"({int(end_time)}s) "
+        last_tag_time = end_time
 
     return full_text.strip()