Skip to content

Commit 28b4422

Browse files
authored
Merge pull request #1134 from onekey-sec/arpy-based-ar-extractor
Arpy based ar extractor
2 parents e7ba949 + f6f0ed0 commit 28b4422

File tree

33 files changed

+100
-7
lines changed

33 files changed

+100
-7
lines changed

python/unblob/file_utils.py

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
import unicodedata
1313
from collections.abc import Iterable, Iterator
1414
from pathlib import Path
15-
from typing import Literal, Optional, Union
15+
from typing import Literal, Optional, Protocol, Union, overload
1616

1717
from dissect.cstruct import cstruct
1818
from structlog import get_logger
@@ -269,8 +269,18 @@ def iterate_patterns(
269269
file.seek(initial_position)
270270

271271

272+
class RandomReader(Protocol):
273+
# File implements this interface
274+
275+
@overload
276+
def read(self) -> bytes: ...
277+
@overload
278+
def read(self, n: int, /) -> bytes: ...
279+
def seek(self, pos: int, /, whence: int = io.SEEK_SET) -> int: ...
280+
281+
272282
def iterate_file(
273-
file: File,
283+
file: RandomReader,
274284
start_offset: int,
275285
size: int,
276286
# default buffer size in shutil for unix based systems
@@ -297,7 +307,7 @@ def iterate_file(
297307
yield data
298308

299309

300-
def carve(carve_path: Path, file: File, start_offset: int, size: int):
310+
def carve(carve_path: Path, file: RandomReader, start_offset: int, size: int):
301311
"""Extract part of a file."""
302312
carve_path.parent.mkdir(parents=True, exist_ok=True)
303313

python/unblob/handlers/archive/ar.py

Lines changed: 39 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,13 @@
11
import os
2+
from pathlib import Path
23
from typing import Optional
34

45
import arpy
56
from structlog import get_logger
67

7-
from ...extractors import Command
8-
from ...file_utils import OffsetFile
9-
from ...models import File, Handler, HexString, ValidChunk
8+
from ...file_utils import FileSystem, OffsetFile, iterate_file
9+
from ...models import Extractor, ExtractResult, File, Handler, HexString, ValidChunk
10+
from ...report import ExtractionProblem
1011

1112
logger = get_logger()
1213

@@ -15,6 +16,40 @@
1516
SIGNATURE_LENGTH = 0x8
1617

1718

19+
class ArExtractor(Extractor):
20+
def extract(self, inpath: Path, outdir: Path) -> Optional[ExtractResult]:
21+
fs = FileSystem(outdir)
22+
23+
with arpy.Archive(inpath.as_posix()) as archive:
24+
archive.read_all_headers()
25+
26+
for name in sorted(archive.archived_files):
27+
archived_file = archive.archived_files[name]
28+
29+
try:
30+
path = Path(name.decode())
31+
except UnicodeDecodeError:
32+
path = Path(name.decode(errors="replace"))
33+
fs.record_problem(
34+
ExtractionProblem(
35+
path=repr(name),
36+
problem="Path is not a valid UTF/8 string",
37+
resolution=f"Converted to {path}",
38+
)
39+
)
40+
41+
fs.write_chunks(
42+
path,
43+
chunks=iterate_file(
44+
archived_file,
45+
0,
46+
archived_file.header.size,
47+
),
48+
)
49+
50+
return ExtractResult(reports=fs.problems)
51+
52+
1853
class ARHandler(Handler):
1954
NAME = "ar"
2055

@@ -27,7 +62,7 @@ class ARHandler(Handler):
2762
)
2863
]
2964

30-
EXTRACTOR = Command("unar", "-no-directory", "-o", "{outdir}", "{inpath}")
65+
EXTRACTOR = ArExtractor()
3166

3267
def calculate_chunk(self, file: File, start_offset: int) -> Optional[ValidChunk]:
3368
offset_file = OffsetFile(file, start_offset)
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
version https://git-lfs.github.com/spec/v1
2+
oid sha256:7981c6fabc0cb521c443d3b2e8e54713cae8bbccf8208b1950a078367567c3bb
3+
size 194
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
version https://git-lfs.github.com/spec/v1
2+
oid sha256:1822aa7c0a030fa6ae9ea664b4ba2d804bb0bf5014cc7366d199374a1d64cc7c
3+
size 358
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
version https://git-lfs.github.com/spec/v1
2+
oid sha256:9b57b0fc4cff4d4c8c37dbbc1f2f51ed49859f90a931da3465bb27b7ea75a412
3+
size 552
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
version https://git-lfs.github.com/spec/v1
2+
oid sha256:1d74d5cbf166638434c696563f88b13db1438d34f8e58675ae6bcf337edbcf9c
3+
size 298
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
version https://git-lfs.github.com/spec/v1
2+
oid sha256:34c790979dc722273410bc18d2d83712f763e111aa252f4f248d7785a5b2014c
3+
size 160
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
version https://git-lfs.github.com/spec/v1
2+
oid sha256:f0a17a43c74d2fe5474fa2fd29c8f14799e777d7d75a2cc4d11c20a6e7b161c5
3+
size 8
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
version https://git-lfs.github.com/spec/v1
2+
oid sha256:a43964b9c61030b12b2320f7782e25f2bab411e0529b85b7e92da969ca1d92a9
3+
size 274
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
version https://git-lfs.github.com/spec/v1
2+
oid sha256:3aee5062fe584e07862f99d75a4a28613cff6b261d49eab336d59273268fa6d8
3+
size 372

0 commit comments

Comments
 (0)