roboflow · eminaruk · Sep 19, 2025 · Sep 19, 2025
@@ -30,11 +30,46 @@ def resolve_source(source_path: str) -> np.ndarray | None:
 
     image = cv2.imread(source_path)
     if image is not None:
-        return image
+        return resize_to_fit_screen(image)
 
     frame_generator = sv.get_video_frames_generator(source_path=source_path)
     frame = next(frame_generator)
-    return frame
+    return resize_to_fit_screen(frame)
+
+
+def resize_to_fit_screen(
+    image: np.ndarray, max_width: int = 1200, max_height: int = 800
+) -> np.ndarray:
+    """
+    Resize image to fit screen while maintaining aspect ratio.
+
+    Args:
+        image: Input image
+        max_width: Maximum width for display
+        max_height: Maximum height for display
+
+    Returns:
+        Resized image
+    """
+    height, width = image.shape[:2]
+
+    # Calculate scaling factor
+    scale_w = max_width / width
+    scale_h = max_height / height
+    scale = min(scale_w, scale_h, 1.0)  # Don't upscale if image is smaller
+
+    if scale < 1.0:
+        new_width = int(width * scale)
+        new_height = int(height * scale)
+        resized = cv2.resize(
+            image, (new_width, new_height), interpolation=cv2.INTER_AREA
+        )
+        print(
+            f"Video resolution resized from {width}x{height} -> {new_width}x{new_height}"
+        )
+        return resized
+
+    return image
 
 
 def mouse_event(event: int, x: int, y: int, flags: int, param: Any) -> None:
@@ -119,8 +154,46 @@ def redraw_polygons(image: np.ndarray) -> None:
             )
 
 
-def save_polygons_to_json(polygons, target_path):
+def convert_coordinates_to_original(polygons, original_size, display_size):
+    """
+    Convert coordinates from display size back to original video size.
+
+    Args:
+        polygons: List of polygons with display coordinates
+        original_size: (width, height) of original video
+        display_size: (width, height) of display window
+
+    Returns:
+        List of polygons with original coordinates
+    """
+    orig_w, orig_h = original_size
+    disp_w, disp_h = display_size
+
+    scale_x = orig_w / disp_w
+    scale_y = orig_h / disp_h
+
+    converted_polygons = []
+    for polygon in polygons:
+        if polygon:  # Skip empty polygons
+            converted_polygon = []
+            for x, y in polygon:
+                orig_x = int(x * scale_x)
+                orig_y = int(y * scale_y)
+                converted_polygon.append([orig_x, orig_y])
+            converted_polygons.append(converted_polygon)
+
+    return converted_polygons
+
+
+def save_polygons_to_json(polygons, target_path, original_size=None, display_size=None):
     data_to_save = polygons if polygons[-1] else polygons[:-1]
+
+    # Convert coordinates back to original size if needed
+    if original_size and display_size:
+        data_to_save = convert_coordinates_to_original(
+            data_to_save, original_size, display_size
+        )
+
     with open(target_path, "w") as f:
         json.dump(data_to_save, f)
 
@@ -132,6 +205,18 @@ def main(source_path: str, zone_configuration_path: str) -> None:
         print("Failed to load source image.")
         return
 
+    # Get original video dimensions
+    cap = cv2.VideoCapture(source_path)
+    original_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+    original_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+    cap.release()
+
+    # Get display dimensions
+    display_height, display_width = original_image.shape[:2]
+
+    print(f"Original video size: {original_width}x{original_height}")
+    print(f"Display size: {display_width}x{display_height}")
+
     image = original_image.copy()
     cv2.imshow(WINDOW_NAME, image)
     cv2.setMouseCallback(WINDOW_NAME, mouse_event, image)
@@ -144,8 +229,14 @@ def main(source_path: str, zone_configuration_path: str) -> None:
             POLYGONS[-1] = []
             current_mouse_position = None
         elif key == KEY_SAVE:
-            save_polygons_to_json(POLYGONS, zone_configuration_path)
+            save_polygons_to_json(
+                POLYGONS,
+                zone_configuration_path,
+                (original_width, original_height),
+                (display_width, display_height),
+            )
             print(f"Polygons saved to {zone_configuration_path}")
+            print("Coordinates converted to original video size.")
             break
         redraw(image, original_image)
         if key == KEY_QUIT:

@@ -23,6 +23,7 @@ def main(
     confidence: float,
     iou: float,
     classes: list[int],
+    output_video_path: str = None,
 ) -> None:
     model = YOLO(weights)
     tracker = sv.ByteTrack(minimum_matching_threshold=0.5)
@@ -39,6 +40,32 @@ def main(
     ]
     timers = [FPSBasedTimer(video_info.fps) for _ in zones]
 
+    # Video writer setup
+    video_writer = None
+    if output_video_path:
+        # Use Twitter-compatible codec - try H264 first, then XVID
+        try:
+            fourcc = cv2.VideoWriter_fourcc(*"H264")
+            video_writer = cv2.VideoWriter(
+                output_video_path,
+                fourcc,
+                video_info.fps,
+                (video_info.width, video_info.height),
+            )
+            print(f"Video output being saved: {output_video_path}")
+            print("Using Twitter-compatible H.264 codec")
+        except:
+            # H264 desteklenmiyorsa XVID kullan
+            fourcc = cv2.VideoWriter_fourcc(*"XVID")
+            video_writer = cv2.VideoWriter(
+                output_video_path,
+                fourcc,
+                video_info.fps,
+                (video_info.width, video_info.height),
+            )
+            print(f"Video output being saved: {output_video_path}")
+            print("Using XVID codec (convert with FFmpeg for Twitter)")
+
     for frame in frames_generator:
         results = model(frame, verbose=False, device=device, conf=confidence)[0]
         detections = sv.Detections.from_ultralytics(results)
@@ -74,8 +101,19 @@ def main(
             )
 
         cv2.imshow("Processed Video", annotated_frame)
+
+        # Save frame to output video if writer is available
+        if video_writer is not None:
+            video_writer.write(annotated_frame)
+
         if cv2.waitKey(1) & 0xFF == ord("q"):
             break
+
+    # Cleanup
+    if video_writer is not None:
+        video_writer.release()
+        print(f"Video successfully saved: {output_video_path}")
+
     cv2.destroyAllWindows()
 
 
@@ -126,6 +164,12 @@ def main(
         default=[],
         help="List of class IDs to track. If empty, all classes are tracked.",
     )
+    parser.add_argument(
+        "--output_video_path",
+        type=str,
+        default=None,
+        help="Path to save the output video. If not provided, video will only be displayed.",
+    )
     args = parser.parse_args()
 
     main(
@@ -136,4 +180,5 @@ def main(
         confidence=args.confidence_threshold,
         iou=args.iou_threshold,
         classes=args.classes,
+        output_video_path=args.output_video_path,
     )