From 912e0167c149cb98cb657d97348d4263fb989415 Mon Sep 17 00:00:00 2001 From: jkroll Date: Mon, 23 Mar 2026 14:17:13 +0100 Subject: [PATCH] improve timestamp handling in transcripts --- prompt.txt | 4 ++-- scripts/create_viral_segments.py | 7 +++---- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/prompt.txt b/prompt.txt index cab3dad..0b3a6da 100644 --- a/prompt.txt +++ b/prompt.txt @@ -5,8 +5,8 @@ Your goal is to extract segments that have **Perfect Narrative Completeness** (S ### INPUT FORMAT EXPLAINED The transcript below is a continuous text stream with embedded **Time Tags** like `(12s)`. -- Example: `"Hello world (0s). Today we are going to (3s) fly to the moon."` -- These tags represent the approximate timestamp of the *preceding* text. +- Example: `"(0s) Hello world. Today we are going to (3s) fly to the moon."` +- These tags represent the timestamp of the word following the tag. - Use them to calculate duration. Duration = (End Tag - Start Tag). ### STRICT VIRAL RULES (The "ViralCutter Standard"): diff --git a/scripts/create_viral_segments.py b/scripts/create_viral_segments.py index cee04b2..d557398 100644 --- a/scripts/create_viral_segments.py +++ b/scripts/create_viral_segments.py @@ -181,7 +181,7 @@ def preprocess_transcript_for_ai(segments): # Try to start with (0s) based on first segment first_start = segments[0].get('start', 0) - full_text += f"({int(first_start)}s) " + full_text += f"({first_start:.2f}s) " last_tag_time = first_start for seg in segments: @@ -190,9 +190,8 @@ def preprocess_transcript_for_ai(segments): full_text += text + " " - if end_time - last_tag_time >= 4: - full_text += f"({int(end_time)}s) " - last_tag_time = end_time + full_text += f"({int(end_time)}s) " + last_tag_time = end_time return full_text.strip()