Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Arpy based ar extractor #1134

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 10 additions & 3 deletions python/unblob/file_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
import unicodedata
from collections.abc import Iterable, Iterator
from pathlib import Path
from typing import Literal, Optional, Union
from typing import Literal, Optional, Protocol, Union

from dissect.cstruct import cstruct
from structlog import get_logger
Expand Down Expand Up @@ -269,8 +269,15 @@
file.seek(initial_position)


class RandomReader(Protocol):
# File implements this interface

def read(self, n: Optional[int] = None) -> bytes: ...
def seek(self, pos: int, whence: int = io.SEEK_SET) -> int: ...


def iterate_file(
file: File,
file: RandomReader,
start_offset: int,
size: int,
# default buffer size in shutil for unix based systems
Expand All @@ -297,7 +304,7 @@
yield data


def carve(carve_path: Path, file: File, start_offset: int, size: int):
def carve(carve_path: Path, file: RandomReader, start_offset: int, size: int):
"""Extract part of a file."""
carve_path.parent.mkdir(parents=True, exist_ok=True)

Expand Down
60 changes: 56 additions & 4 deletions python/unblob/handlers/archive/ar.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
import io
import os
from pathlib import Path
from typing import Optional

import arpy
from structlog import get_logger

from ...extractors import Command
from ...file_utils import OffsetFile
from ...models import File, Handler, HexString, ValidChunk
from ...file_utils import FileSystem, OffsetFile, iterate_file
from ...models import Extractor, ExtractResult, File, Handler, HexString, ValidChunk
from ...report import ExtractionProblem

logger = get_logger()

Expand All @@ -15,6 +17,56 @@
SIGNATURE_LENGTH = 0x8


class RandomReader:
"""Adapter for file_utils.RandomReader.

Changes the parameter names, as they are different for arpy and unblob.File.
"""

def __init__(self, arpy_file: arpy.ArchiveFileData):
self._arpy_file = arpy_file

def read(self, n: Optional[int] = None) -> bytes:
return self._arpy_file.read(n)

def seek(self, pos: int, whence: int = io.SEEK_SET) -> int:
return self._arpy_file.seek(pos, whence)


class ArExtractor(Extractor):
def extract(self, inpath: Path, outdir: Path) -> Optional[ExtractResult]:
fs = FileSystem(outdir)

with arpy.Archive(inpath.as_posix()) as archive:
archive.read_all_headers()

for name in sorted(archive.archived_files):
archived_file = archive.archived_files[name]

try:
path = Path(name.decode())
except UnicodeDecodeError:
path = Path(name.decode(errors="replace"))
fs.record_problem(
ExtractionProblem(
path=repr(name),
problem="Path is not a valid UTF/8 string",
resolution=f"Converted to {path}",
)
)

fs.write_chunks(
path,
chunks=iterate_file(
RandomReader(archived_file),
0,
archived_file.header.size,
),
)

return ExtractResult(reports=fs.problems)


class ARHandler(Handler):
NAME = "ar"

Expand All @@ -27,7 +79,7 @@ class ARHandler(Handler):
)
]

EXTRACTOR = Command("unar", "-no-directory", "-o", "{outdir}", "{inpath}")
EXTRACTOR = ArExtractor()

def calculate_chunk(self, file: File, start_offset: int) -> Optional[ValidChunk]:
offset_file = OffsetFile(file, start_offset)
Expand Down
3 changes: 3 additions & 0 deletions tests/integration/archive/ar/__input__/bsd_mixed.ar
Git LFS file not shown
3 changes: 3 additions & 0 deletions tests/integration/archive/ar/__input__/bsd_multi_names.ar
Git LFS file not shown
3 changes: 3 additions & 0 deletions tests/integration/archive/ar/__input__/bsd_single_name.ar
Git LFS file not shown
3 changes: 3 additions & 0 deletions tests/integration/archive/ar/__input__/contents.ar
Git LFS file not shown
3 changes: 3 additions & 0 deletions tests/integration/archive/ar/__input__/empty.ar
Git LFS file not shown
3 changes: 3 additions & 0 deletions tests/integration/archive/ar/__input__/gnu_mixed.ar
Git LFS file not shown
3 changes: 3 additions & 0 deletions tests/integration/archive/ar/__input__/gnu_multi_names.ar
Git LFS file not shown
3 changes: 3 additions & 0 deletions tests/integration/archive/ar/__input__/gnu_single_name.ar
Git LFS file not shown
3 changes: 3 additions & 0 deletions tests/integration/archive/ar/__input__/msvc_lib.ar
Git LFS file not shown
3 changes: 3 additions & 0 deletions tests/integration/archive/ar/__input__/normal.ar
Git LFS file not shown
3 changes: 3 additions & 0 deletions tests/integration/archive/ar/__input__/sym.ar
Git LFS file not shown
3 changes: 3 additions & 0 deletions tests/integration/archive/ar/__input__/windows.ar
Git LFS file not shown
Empty file.
Git LFS file not shown
Git LFS file not shown
Empty file.
Empty file.
Empty file.
Empty file.
Loading