Skip to content

Commit 684e4b4

Browse files
committed
[cli] Add minimum code to make CLI work with VFR videos with some commands (e.g. list-scenes) #168
Minimum scene length must still be zero for now.
1 parent 355fe1a commit 684e4b4

File tree

6 files changed

+126
-56
lines changed

6 files changed

+126
-56
lines changed

scenedetect/backends/moviepy.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
from moviepy.video.io.ffmpeg_reader import FFMPEG_VideoReader
2525

2626
from scenedetect.backends.opencv import VideoStreamCv2
27-
from scenedetect.common import FrameTimecode
27+
from scenedetect.common import _USE_PTS_IN_DEVELOPMENT, FrameTimecode
2828
from scenedetect.platform import get_file_name
2929
from scenedetect.video_stream import SeekError, VideoOpenFailure, VideoStream
3030

@@ -173,6 +173,10 @@ def seek(self, target: ty.Union[FrameTimecode, float, int]):
173173
ValueError: `target` is not a valid value (i.e. it is negative).
174174
"""
175175
success = False
176+
if _USE_PTS_IN_DEVELOPMENT:
177+
# TODO(https://scenedetect.com/issue/168): Need to handle PTS here.
178+
raise NotImplementedError()
179+
176180
if not isinstance(target, FrameTimecode):
177181
target = FrameTimecode(target, self.frame_rate)
178182
try:

scenedetect/backends/opencv.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -226,6 +226,10 @@ def seek(self, target: ty.Union[FrameTimecode, float, int]):
226226
if target < 0:
227227
raise ValueError("Target seek position cannot be negative!")
228228

229+
if _USE_PTS_IN_DEVELOPMENT:
230+
# TODO(https://scenedetect.com/issue/168): Shouldn't use frames for VFR video here.
231+
raise NotImplementedError()
232+
229233
# Have to seek one behind and call grab() after to that the VideoCapture
230234
# returns a valid timestamp when using CAP_PROP_POS_MSEC.
231235
target_frame_cv2 = (self.base_timecode + target).frame_num
@@ -429,8 +433,8 @@ def frame_size(self) -> ty.Tuple[int, int]:
429433
@property
430434
def duration(self) -> ty.Optional[FrameTimecode]:
431435
"""Duration of the stream as a FrameTimecode, or None if non terminating."""
432-
# TODO(v0.7): This will be incorrect for VFR. See if there is another property we can use
433-
# to estimate the video length correctly.
436+
# TODO(https://scenedetect.com/issue/168): This will be incorrect for VFR. See if there is
437+
# another property we can use to estimate the video length correctly.
434438
frame_count = math.trunc(self._cap.get(cv2.CAP_PROP_FRAME_COUNT))
435439
if frame_count > 0:
436440
return self.base_timecode + frame_count

scenedetect/backends/pyav.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -249,6 +249,11 @@ def seek(self, target: ty.Union[FrameTimecode, float, int]) -> None:
249249
if target < 0:
250250
raise ValueError("Target cannot be negative!")
251251
beginning = target == 0
252+
253+
if _USE_PTS_IN_DEVELOPMENT:
254+
# TODO(https://scenedetect.com/issue/168): Need to handle PTS here.
255+
raise NotImplementedError()
256+
252257
target = self.base_timecode + target
253258
if target >= 1:
254259
target = target - 1

scenedetect/common.py

Lines changed: 108 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,8 @@
7070

7171
import cv2
7272

73+
# TODO(https://scenedetect.com/issue/168): Ensure both CFR and VFR videos work as intended with this
74+
# flag enabled. When this feature is stable, we can then work on a roll-out plan.
7375
_USE_PTS_IN_DEVELOPMENT = False
7476

7577
##
@@ -176,12 +178,14 @@ def __init__(
176178
self._framerate = fps
177179
self._frame_num = None
178180
self._timecode: ty.Optional[Timecode] = None
181+
self._seconds: ty.Optional[float] = None
179182

180183
# Copy constructor.
181184
if isinstance(timecode, FrameTimecode):
182185
self._framerate = timecode._framerate if fps is None else fps
183186
self._frame_num = timecode._frame_num
184187
self._timecode = timecode._timecode
188+
self._seconds = timecode._seconds
185189
return
186190

187191
# Timecode.
@@ -205,21 +209,27 @@ def __init__(
205209
self._framerate = float(fps)
206210
# Process the timecode value, storing it as an exact number of frames.
207211
if isinstance(timecode, str):
208-
# TODO(v0.7): This will be incorrect for VFR videos. Need to represent this format
209-
# differently so we can support start/end times and min_scene_len correctly.
210-
self._frame_num = self._parse_timecode_string(timecode)
212+
self._seconds = self._timecode_to_seconds(timecode)
213+
self._frame_num = self._seconds_to_frames(self._timecode_to_seconds(timecode))
211214
else:
212215
self._frame_num = self._parse_timecode_number(timecode)
213216

214-
# TODO(v0.7): Add a PTS property as well and slowly transition over to that, since we don't
215-
# always know the position as a "frame number". However, for the reverse case, we CAN state
216-
# the presentation time if we know the frame number (for a fixed framerate video).
217217
@property
218218
def frame_num(self) -> ty.Optional[int]:
219+
if self._timecode:
220+
warnings.warn(
221+
message="TODO(https://scenedetect.com/issue/168): Update caller to handle VFR.",
222+
stacklevel=2,
223+
category=UserWarning,
224+
)
225+
# We can calculate the approx. # of frames by taking the presentation time and the
226+
# time base itself.
227+
(num, den) = (self._timecode.time_base * self._timecode.pts).as_integer_ratio()
228+
return num / den
219229
return self._frame_num
220230

221231
@property
222-
def framerate(self) -> ty.Optional[int]:
232+
def framerate(self) -> ty.Optional[float]:
223233
return self._framerate
224234

225235
def get_frames(self) -> int:
@@ -250,7 +260,7 @@ def get_framerate(self) -> float:
250260
)
251261
return self.framerate
252262

253-
# TODO(v0.7): Figure out how to deal with VFR here.
263+
# TODO(https://scenedetect.com/issue/168): Figure out how to deal with VFR here.
254264
def equal_framerate(self, fps) -> bool:
255265
"""Equal Framerate: Determines if the passed framerate is equal to that of this object.
256266
@@ -261,14 +271,17 @@ def equal_framerate(self, fps) -> bool:
261271
bool: True if passed fps matches the FrameTimecode object's framerate, False otherwise.
262272
263273
"""
264-
# TODO(v0.7): Support this comparison in the case FPS is not set but a timecode is.
274+
# TODO(https://scenedetect.com/issue/168): Support this comparison in the case FPS is not
275+
# set but a timecode is.
265276
return math.fabs(self.framerate - fps) < MAX_FPS_DELTA
266277

267278
@property
268279
def seconds(self) -> float:
269280
"""The frame's position in number of seconds."""
270281
if self._timecode:
271282
return self._timecode.seconds
283+
if self._seconds:
284+
return self._seconds
272285
# Assume constant framerate if we don't have timing information.
273286
return float(self._frame_num) / self._framerate
274287

@@ -355,14 +368,13 @@ def _parse_timecode_number(self, timecode: ty.Union[int, float]) -> int:
355368
else:
356369
raise TypeError("Timecode format/type unrecognized.")
357370

358-
def _parse_timecode_string(self, input: str) -> int:
359-
"""Parses a string based on the three possible forms (in timecode format,
360-
as an integer number of frames, or floating-point seconds, ending with 's').
361-
362-
Requires that the `framerate` property is set before calling this method.
363-
Assuming a framerate of 30.0 FPS, the strings '00:05:00.000', '00:05:00',
364-
'9000', '300s', and '300.0' are all possible valid values, all representing
365-
a period of time equal to 5 minutes, 300 seconds, or 9000 frames (at 30 FPS).
371+
def _timecode_to_seconds(self, input: str) -> float:
372+
"""Parses a string based on the three possible forms (in timecode format, as an integer
373+
number of frames, or floating-point seconds, ending with 's'). Exact frame numbers (int)
374+
requires the `framerate` property was set when the timecode was created. Assuming a
375+
framerate of 30.0 FPS, the strings '00:05:00.000', '00:05:00', '9000', '300s', and
376+
'300.0' are all possible valid values. These values represent periods of time equal to
377+
5 minutes, 300 seconds, or 9000 frames (at 30 FPS).
366378
367379
Raises:
368380
ValueError: Value could not be parsed correctly.
@@ -374,7 +386,7 @@ def _parse_timecode_string(self, input: str) -> int:
374386
timecode = int(input)
375387
if timecode < 0:
376388
raise ValueError("Timecode frame number must be positive.")
377-
return timecode
389+
return timecode * self.framerate
378390
# Timecode in string format 'HH:MM:SS[.nnn]' or 'MM:SS[.nnn]'
379391
elif input.find(":") >= 0:
380392
values = input.split(":")
@@ -392,7 +404,7 @@ def _parse_timecode_string(self, input: str) -> int:
392404
if not (hrs >= 0 and mins >= 0 and secs >= 0 and mins < 60 and secs < 60):
393405
raise ValueError("Invalid timecode range (values outside allowed range).")
394406
secs += (hrs * 60 * 60) + (mins * 60)
395-
return self._seconds_to_frames(secs)
407+
return secs
396408
# Try to parse the number as seconds in the format 1234.5 or 1234s
397409
if input.endswith("s"):
398410
input = input[:-1]
@@ -401,7 +413,7 @@ def _parse_timecode_string(self, input: str) -> int:
401413
as_float = float(input)
402414
if as_float < 0.0:
403415
raise ValueError("Timecode seconds value must be positive.")
404-
return self._seconds_to_frames(as_float)
416+
return as_float
405417

406418
def _get_other_as_frames(self, other: ty.Union[int, float, str, "FrameTimecode"]) -> int:
407419
"""Get the frame number from `other` for arithmetic operations."""
@@ -410,7 +422,7 @@ def _get_other_as_frames(self, other: ty.Union[int, float, str, "FrameTimecode"]
410422
if isinstance(other, float):
411423
return self._seconds_to_frames(other)
412424
if isinstance(other, str):
413-
return self._parse_timecode_string(other)
425+
return self._seconds_to_frames(self._timecode_to_seconds(other))
414426
if isinstance(other, FrameTimecode):
415427
# If comparing two FrameTimecodes, they must have the same framerate for frame-based operations.
416428
if self._framerate and other._framerate and not self.equal_framerate(other._framerate):
@@ -421,53 +433,73 @@ def _get_other_as_frames(self, other: ty.Union[int, float, str, "FrameTimecode"]
421433
return other._frame_num
422434
# If other has no frame_num, it must have a timecode. Convert to frames.
423435
return self._seconds_to_frames(other.seconds)
424-
raise TypeError("Unsupported type for performing arithmetic with FrameTimecode.")
436+
raise TypeError("Cannot obtain frame number for this timecode.")
425437

426438
def __eq__(self, other: ty.Union[int, float, str, "FrameTimecode"]) -> bool:
427439
if other is None:
428440
return False
429-
if self._timecode:
441+
if self._timecode or self._seconds is not None:
430442
return self.seconds == self._get_other_as_seconds(other)
431443
return self.frame_num == self._get_other_as_frames(other)
432444

433445
def __ne__(self, other: ty.Union[int, float, str, "FrameTimecode"]) -> bool:
434446
if other is None:
435447
return True
436-
if self._timecode:
448+
if self._timecode or self._seconds is not None:
437449
return self.seconds != self._get_other_as_seconds(other)
438450
return self.frame_num != self._get_other_as_frames(other)
439451

440452
def __lt__(self, other: ty.Union[int, float, str, "FrameTimecode"]) -> bool:
441-
if self._timecode:
453+
if self._timecode or self._seconds is not None:
442454
return self.seconds < self._get_other_as_seconds(other)
443455
return self.frame_num < self._get_other_as_frames(other)
444456

445457
def __le__(self, other: ty.Union[int, float, str, "FrameTimecode"]) -> bool:
446-
if self._timecode:
458+
if self._timecode or self._seconds is not None:
447459
return self.seconds <= self._get_other_as_seconds(other)
448460
return self.frame_num <= self._get_other_as_frames(other)
449461

450462
def __gt__(self, other: ty.Union[int, float, str, "FrameTimecode"]) -> bool:
451-
if self._timecode:
463+
if self._timecode or self._seconds is not None:
452464
return self.seconds > self._get_other_as_seconds(other)
453465
return self.frame_num > self._get_other_as_frames(other)
454466

455467
def __ge__(self, other: ty.Union[int, float, str, "FrameTimecode"]) -> bool:
456-
if self._timecode:
468+
if self._timecode or self._seconds is not None:
457469
return self.seconds >= self._get_other_as_seconds(other)
458470
return self.frame_num >= self._get_other_as_frames(other)
459471

460472
def __iadd__(self, other: ty.Union[int, float, str, "FrameTimecode"]) -> "FrameTimecode":
461-
if self._timecode:
462-
new_seconds = self.seconds + self._get_other_as_seconds(other)
463-
# TODO: This is incorrect for VFR, need a better way to handle this.
464-
# For now, we convert back to a frame number.
465-
self._frame_num = self._seconds_to_frames(new_seconds)
466-
self._timecode = None
467-
else:
468-
self._frame_num += self._get_other_as_frames(other)
469-
if self._frame_num < 0: # Required to allow adding negative seconds/frames.
470-
self._frame_num = 0
473+
other_has_timecode = isinstance(other, FrameTimecode) and other._timecode
474+
475+
if self._timecode and other_has_timecode:
476+
if self._timecode.time_base != other._timecode.time_base:
477+
raise ValueError("timecodes have different time bases")
478+
self._timecode = Timecode(
479+
pts=max(0, self._timecode.pts + other._timecode.pts),
480+
time_base=self._timecode.time_base,
481+
)
482+
return self
483+
484+
# If either input is a timecode, the output shall also be one. The input which isn't a
485+
# timecode is converted into seconds, after which the equivalent timecode is computed.
486+
if self._timecode or other_has_timecode:
487+
timecode: Timecode = self._timecode if self._timecode else other._timecode
488+
seconds: float = self._get_other_as_seconds(other) if self._timecode else self.seconds
489+
self._timecode = Timecode(
490+
pts=max(0, timecode.pts + round(seconds / timecode.time_base)),
491+
time_base=timecode.time_base,
492+
)
493+
self._seconds = None
494+
self._framerate = None
495+
self._frame_num = None
496+
return self
497+
498+
if self._seconds and other._seconds:
499+
self._seconds = max(0, self._seconds + other._seconds)
500+
return self
501+
502+
self._frame_num = max(0, self._frame_num + self._get_other_as_frames(other))
471503
return self
472504

473505
def __add__(self, other: ty.Union[int, float, str, "FrameTimecode"]) -> "FrameTimecode":
@@ -476,16 +508,36 @@ def __add__(self, other: ty.Union[int, float, str, "FrameTimecode"]) -> "FrameTi
476508
return to_return
477509

478510
def __isub__(self, other: ty.Union[int, float, str, "FrameTimecode"]) -> "FrameTimecode":
479-
if self._timecode:
480-
new_seconds = self.seconds - self._get_other_as_seconds(other)
481-
# TODO: This is incorrect for VFR, need a better way to handle this.
482-
# For now, we convert back to a frame number.
483-
self._frame_num = self._seconds_to_frames(new_seconds)
484-
self._timecode = None
485-
else:
486-
self._frame_num -= self._get_other_as_frames(other)
487-
if self._frame_num < 0:
488-
self._frame_num = 0
511+
other_has_timecode = isinstance(other, FrameTimecode) and other._timecode
512+
513+
if self._timecode and other_has_timecode:
514+
if self._timecode.time_base != other._timecode.time_base:
515+
raise ValueError("timecodes have different time bases")
516+
self._timecode = Timecode(
517+
pts=max(0, self._timecode.pts - other._timecode.pts),
518+
time_base=self._timecode.time_base,
519+
)
520+
return self
521+
522+
# If either input is a timecode, the output shall also be one. The input which isn't a
523+
# timecode is converted into seconds, after which the equivalent timecode is computed.
524+
if self._timecode or other_has_timecode:
525+
timecode: Timecode = self._timecode if self._timecode else other._timecode
526+
seconds: float = self._get_other_as_seconds(other) if self._timecode else self.seconds
527+
self._timecode = Timecode(
528+
pts=max(0, timecode.pts - round(seconds / timecode.time_base)),
529+
time_base=timecode.time_base,
530+
)
531+
self._seconds = None
532+
self._framerate = None
533+
self._frame_num = None
534+
return self
535+
536+
if self._seconds and other._seconds:
537+
self._seconds = max(0, self._seconds - other._seconds)
538+
return self
539+
540+
self._frame_num = max(0, self._frame_num - self._get_other_as_frames(other))
489541
return self
490542

491543
def __sub__(self, other: ty.Union[int, float, str, "FrameTimecode"]) -> "FrameTimecode":
@@ -518,13 +570,18 @@ def __hash__(self) -> int:
518570
def _get_other_as_seconds(self, other: ty.Union[int, float, str, "FrameTimecode"]) -> float:
519571
"""Get the time in seconds from `other` for arithmetic operations."""
520572
if isinstance(other, int):
573+
if self._timecode:
574+
# TODO(https://scenedetect.com/issue/168): We need to convert every place that uses
575+
# frame numbers with timestamps to convert to a non-frame based way of temporal
576+
# logic and instead use seconds-based.
577+
if _USE_PTS_IN_DEVELOPMENT and other == 1:
578+
return self.seconds
579+
raise NotImplementedError()
521580
return float(other) / self._framerate
522581
if isinstance(other, float):
523582
return other
524583
if isinstance(other, str):
525-
# This is not ideal, but we need a framerate to parse strings.
526-
# We create a temporary FrameTimecode to do this.
527-
return FrameTimecode(timecode=other, fps=self._framerate).seconds
584+
return self._timecode_to_seconds(other)
528585
if isinstance(other, FrameTimecode):
529586
return other.seconds
530587
raise TypeError("Unsupported type for performing arithmetic with FrameTimecode.")

scenedetect/detectors/content_detector.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,7 @@ def __init__(
137137
raise ValueError("kernel_size must be odd integer >= 3")
138138
self._kernel = numpy.ones((kernel_size, kernel_size), numpy.uint8)
139139
self._frame_score: ty.Optional[float] = None
140-
# TODO(v0.7): Handle timecodes in filter.
140+
# TODO(https://scenedetect.com/issue/168): Handle timecodes in filter.
141141
self._flash_filter = FlashFilter(mode=filter_mode, length=min_scene_len)
142142

143143
def get_metrics(self):

scenedetect/detectors/threshold_detector.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@ def process_frame(
111111
ty.List[int]: List of frames where scene cuts have been detected. There may be 0
112112
or more frames in the list, and not necessarily the same as frame_num.
113113
"""
114-
# TODO(v0.7): We need to consider PTS here instead. The methods below using frame numbers
114+
# TODO(https://scenedetect.com/issue/168): We need to consider PTS here instead. The methods below using frame numbers
115115
# won't work for variable framerates.
116116
frame_num = timecode.frame_num
117117

0 commit comments

Comments
 (0)