Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 25 additions & 7 deletions exiftool/exiftool.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
# ---------- standard Python imports ----------
import select
import subprocess
import time
import os
import shutil
from pathlib import Path # requires Python 3.4+
Expand All @@ -45,7 +46,6 @@
import ctypes



# ---------- Typing Imports ----------
# for static analysis / type checking - Python 3.5+
from collections.abc import Callable
Expand Down Expand Up @@ -107,7 +107,7 @@ def _get_buffer_end(buffer_list: List[bytes], bytes_needed: int) -> bytes:
return buf_tail_joined


def _read_fd_endswith(fd, b_endswith: bytes, block_size: int) -> bytes:
def _read_fd_endswith(fd, b_endswith: bytes, block_size: int, timeout: Optional[int] = 10) -> bytes:
""" read an fd and keep reading until it endswith the seq_ends

this allows a consolidated read function that is platform indepdent
Expand All @@ -120,6 +120,9 @@ def _read_fd_endswith(fd, b_endswith: bytes, block_size: int) -> bytes:
# this value can be bigger to capture more bytes at the "tail" of the read, but if it's too small, the whitespace might miss the detection
endswith_count = len(b_endswith) + 4

# counting attempts to prevent infinite loops
_attempts_counter = 0

# I believe doing a splice, then a strip is more efficient in memory hence the original code did it this way.
# need to benchmark to see if in large strings, strip()[-endswithcount:] is more expensive or not
while not _get_buffer_end(output_list, endswith_count).strip().endswith(b_endswith):
Expand All @@ -129,10 +132,17 @@ def _read_fd_endswith(fd, b_endswith: bytes, block_size: int) -> bytes:
output_list.append(os.read(fd, block_size))
else: # pytest-cov:windows: no cover
# this does NOT work on windows... and it may not work on other systems... in that case, put more things to use the original code above
inputready, outputready, exceptready = select.select([fd], [], [])
inputready, _, _ = select.select([fd], [], [], 1.0)
for i in inputready:
if i == fd:
output_list.append(os.read(fd, block_size))
else:
# nothing to read, wait a bit to retry
time.sleep(0.1)
_attempts_counter += 1
if timeout is not None and _attempts_counter >= timeout:
warnings.warn("Timeout to read from output, verify stdeer.")
raise TimeoutError("Timeout waiting for ExifTool output")

return b"".join(output_list)

Expand Down Expand Up @@ -269,7 +279,6 @@ def __init__(self,
self._executable: Union[str, Path] = constants.DEFAULT_EXECUTABLE # executable absolute path (default set to just the executable name, so it can't be None)
self._config_file: Optional[str] = None # config file that can only be set when exiftool is not running
self._common_args: Optional[List[str]] = None
self._logger = None
self._encoding: Optional[str] = None
self._json_loads: Callable = json.loads # variable points to the actual callable method
self._json_loads_kwargs: dict = {} # default optional params to pass into json.loads() call
Expand Down Expand Up @@ -1064,13 +1073,22 @@ def execute(self, *params: Union[str, bytes], raw_bytes: bool = False) -> Union[
#
# The data that comes back from exiftool falls into this, and so unbuffered reads are done with os.read()

raw_stdout = b''
raw_stderr = b''

fdout = self._process.stdout.fileno()
raw_stdout = _read_fd_endswith(fdout, seq_ready.encode(self._encoding), self._block_size)
try:
raw_stdout = _read_fd_endswith(fdout, seq_ready.encode(self._encoding), self._block_size)
except TimeoutError:
# in case of timeout, we still want to try to read stderr out
raw_stdout = b"TimeoutError to read stdout"

# when it's ready, we can safely read all of stderr out, as the command is already done
fderr = self._process.stderr.fileno()
raw_stderr = _read_fd_endswith(fderr, seq_err_post.encode(self._encoding), self._block_size)

try:
raw_stderr = _read_fd_endswith(fderr, seq_err_post.encode(self._encoding), self._block_size)
except TimeoutError:
raw_stderr = b"TimeoutError to read stderr"

if not raw_bytes:
# decode if not returning bytes
Expand Down