Skip to content

Commit 1209fe6

Browse files
authored
Handle include and exclude the same way bsdtar does (#4)
* Include and exclude paths should apply after strip * Revert "Include and exclude paths should apply after strip" This reverts commit 9d033af. * wip * wip
1 parent 41f1ef3 commit 1209fe6

2 files changed

Lines changed: 173 additions & 68 deletions

File tree

pkgutil.c

Lines changed: 119 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -79,9 +79,17 @@ static char *strip_components_path(const char *path, int strip);
7979
static int apply_strip_components(struct archive_entry *e, int strip);
8080
static int path_component_count(const char *path);
8181
static char *normalize_rel_path(const char *path);
82-
static int match_excluded_with_prefix(struct archive *match,
83-
struct archive_entry *e,
84-
const char *prefix);
82+
struct pattern_list {
83+
char **items;
84+
size_t len;
85+
size_t cap;
86+
};
87+
static void pattern_list_add(struct pattern_list *list, const char *pattern);
88+
static void pattern_list_free(struct pattern_list *list);
89+
static int should_extract_path(struct archive *matching, const char *path);
90+
static char *join_prefix_path(const char *prefix, const char *path);
91+
static int has_include_descendant(const struct pattern_list *includes,
92+
const char *path);
8593

8694
static int pkg_getopt(int *argc, char ***argv, const char **arg) {
8795
enum { state_start = 0, state_next_word, state_short, state_long };
@@ -263,7 +271,7 @@ static int astream_close_cb(struct archive *a, void *client_data) {
263271

264272
static void extract_nested_archive_from_stream(struct astream *in,
265273
const char *outdir, int flags,
266-
struct archive *match,
274+
struct archive *matching,
267275
int strip_components,
268276
const char *prefix) {
269277
struct archive *a = archive_read_new();
@@ -308,14 +316,14 @@ static void extract_nested_archive_from_stream(struct astream *in,
308316
char *rel = normalize_rel_path(p);
309317
archive_entry_set_pathname(e, rel);
310318

311-
if (match != NULL) {
312-
r = match_excluded_with_prefix(match, e, prefix);
313-
if (r != 0) {
314-
archive_read_data_skip(a);
315-
free(rel);
316-
continue;
317-
}
319+
char *logical_path = join_prefix_path(prefix, rel);
320+
if (!should_extract_path(matching, logical_path)) {
321+
archive_read_data_skip(a);
322+
free(logical_path);
323+
free(rel);
324+
continue;
318325
}
326+
free(logical_path);
319327

320328
if (apply_strip_components(e, strip_components)) {
321329
archive_read_data_skip(a);
@@ -456,41 +464,79 @@ static int path_component_count(const char *path) {
456464
return (count);
457465
}
458466

459-
static int match_excluded_with_prefix(struct archive *match,
460-
struct archive_entry *e,
461-
const char *prefix) {
462-
if (match == NULL) {
463-
return (0);
467+
static void pattern_list_add(struct pattern_list *list, const char *pattern) {
468+
if (list->len == list->cap) {
469+
size_t new_cap = list->cap == 0 ? 8 : list->cap * 2;
470+
char **new_items = realloc(list->items, new_cap * sizeof(*new_items));
471+
if (new_items == NULL) {
472+
fail_errno("realloc");
473+
}
474+
list->items = new_items;
475+
list->cap = new_cap;
464476
}
465-
if (prefix == NULL || prefix[0] == '\0') {
466-
return archive_match_excluded(match, e);
477+
char *dup = strdup(pattern);
478+
if (dup == NULL) {
479+
fail_errno("strdup");
467480
}
468-
const char *orig = archive_entry_pathname(e);
469-
if (orig == NULL) {
470-
return archive_match_excluded(match, e);
481+
list->items[list->len++] = dup;
482+
}
483+
484+
static void pattern_list_free(struct pattern_list *list) {
485+
for (size_t i = 0; i < list->len; i++) {
486+
free(list->items[i]);
471487
}
472-
char *orig_copy = strdup(orig);
473-
if (orig_copy == NULL) {
474-
fail_errno("strdup");
488+
free(list->items);
489+
list->items = NULL;
490+
list->len = 0;
491+
list->cap = 0;
492+
}
493+
494+
static int should_extract_path(struct archive *matching, const char *path) {
495+
struct archive_entry *entry = archive_entry_new();
496+
if (entry == NULL) {
497+
fail_errno("archive_entry_new");
498+
}
499+
archive_entry_set_pathname(entry, path);
500+
int excluded = archive_match_excluded(matching, entry);
501+
archive_entry_free(entry);
502+
if (excluded < 0) {
503+
fail_archive(matching, "archive_match_excluded");
504+
}
505+
return (excluded == 0);
506+
}
507+
508+
static int has_include_descendant(const struct pattern_list *includes,
509+
const char *path) {
510+
size_t plen = strlen(path);
511+
for (size_t i = 0; i < includes->len; i++) {
512+
const char *pat = includes->items[i];
513+
if (strncmp(pat, path, plen) == 0 && pat[plen] == '/') {
514+
return (1);
515+
}
516+
}
517+
return (0);
518+
}
519+
520+
static char *join_prefix_path(const char *prefix, const char *path) {
521+
if (prefix == NULL || prefix[0] == '\0' ||
522+
(prefix[0] == '.' && prefix[1] == '\0')) {
523+
char *dup = strdup(path);
524+
if (dup == NULL) {
525+
fail_errno("strdup");
526+
}
527+
return (dup);
475528
}
476529
size_t plen = strlen(prefix);
477-
size_t olen = strlen(orig_copy);
478-
size_t total = plen + 1 + olen + 1;
530+
size_t path_len = strlen(path);
531+
size_t total = plen + 1 + path_len + 1;
479532
char *buf = malloc(total);
480533
if (buf == NULL) {
481-
free(orig_copy);
482534
fail_errno("malloc");
483535
}
484536
memcpy(buf, prefix, plen);
485537
buf[plen] = '/';
486-
memcpy(buf + plen + 1, orig_copy, olen + 1);
487-
488-
archive_entry_set_pathname(e, buf);
489-
int r = archive_match_excluded(match, e);
490-
archive_entry_set_pathname(e, orig_copy);
491-
free(buf);
492-
free(orig_copy);
493-
return (r);
538+
memcpy(buf + plen + 1, path, path_len + 1);
539+
return (buf);
494540
}
495541

496542
static int contains_dotdot_segment(const char *path) {
@@ -582,7 +628,8 @@ int main(int argc, char **argv) {
582628
const char *xar_path = NULL;
583629
const char *outdir = NULL;
584630
struct archive *xar;
585-
struct archive *match = NULL;
631+
struct archive *matching;
632+
struct pattern_list includes = {0};
586633
struct archive *disk;
587634
struct archive_entry *e;
588635
int r;
@@ -594,6 +641,11 @@ int main(int argc, char **argv) {
594641
int strip_components = 0;
595642
int flags;
596643

644+
matching = archive_match_new();
645+
if (matching == NULL) {
646+
fail_errno("archive_match_new");
647+
}
648+
597649
while ((opt = pkg_getopt(&argc, &argv, &arg)) != -1) {
598650
switch (opt) {
599651
case 'f':
@@ -611,27 +663,14 @@ int main(int argc, char **argv) {
611663
do_expand_full = 1;
612664
break;
613665
case opt_include:
614-
if (match == NULL) {
615-
match = archive_match_new();
616-
if (match == NULL) {
617-
fail_errno("archive_match_new");
618-
}
619-
}
620-
r = archive_match_include_pattern(match, arg);
621-
if (r != ARCHIVE_OK) {
622-
fail_archive(match, "include pattern");
666+
pattern_list_add(&includes, arg);
667+
if (archive_match_include_pattern(matching, arg) != ARCHIVE_OK) {
668+
fail_archive(matching, "archive_match_include_pattern");
623669
}
624670
break;
625671
case opt_exclude:
626-
if (match == NULL) {
627-
match = archive_match_new();
628-
if (match == NULL) {
629-
fail_errno("archive_match_new");
630-
}
631-
}
632-
r = archive_match_exclude_pattern(match, arg);
633-
if (r != ARCHIVE_OK) {
634-
fail_archive(match, "exclude pattern");
672+
if (archive_match_exclude_pattern(matching, arg) != ARCHIVE_OK) {
673+
fail_archive(matching, "archive_match_exclude_pattern");
635674
}
636675
break;
637676
case opt_strip_components:
@@ -667,10 +706,6 @@ int main(int argc, char **argv) {
667706
fail_errno("archive_read_new");
668707
}
669708

670-
if (match != NULL) {
671-
archive_match_set_inclusion_recursion(match, 1);
672-
}
673-
674709
disk = archive_write_disk_new();
675710
if (disk == NULL) {
676711
fail_errno("archive_write_disk_new");
@@ -707,20 +742,29 @@ int main(int argc, char **argv) {
707742
fail_errno("chdir(outdir)");
708743
}
709744

745+
if (archive_match_set_inclusion_recursion(matching, 1) != ARCHIVE_OK) {
746+
fail_archive(matching, "archive_match_set_inclusion_recursion");
747+
}
748+
710749
while ((r = archive_read_next_header(xar, &e)) == ARCHIVE_OK) {
711750
const char *p = archive_entry_pathname(e);
712751
char *rel = normalize_rel_path(p);
713752
archive_entry_set_pathname(e, rel);
714-
if (match != NULL) {
715-
r = archive_match_excluded(match, e);
716-
if (r != 0) {
753+
int is_nested = should_be_treated_as_nested_archive(rel);
754+
if (do_expand_full && is_nested) {
755+
char *logical_path = join_prefix_path(NULL, rel);
756+
int include_nested = should_extract_path(matching, logical_path);
757+
if (!include_nested && includes.len > 0 &&
758+
has_include_descendant(&includes, logical_path)) {
759+
include_nested = 1;
760+
}
761+
free(logical_path);
762+
if (!include_nested) {
717763
archive_read_data_skip(xar);
718764
free(rel);
719765
continue;
720766
}
721-
}
722-
int is_nested = should_be_treated_as_nested_archive(rel);
723-
if (do_expand_full && is_nested) {
767+
724768
char *nested_outdir = strip_components_path(rel, strip_components);
725769
int nested_strip = strip_components;
726770
int rel_components = path_component_count(rel);
@@ -749,12 +793,20 @@ int main(int argc, char **argv) {
749793
.eof = 0,
750794
};
751795

752-
extract_nested_archive_from_stream(&in, nested_outdir, flags, match,
796+
extract_nested_archive_from_stream(&in, nested_outdir, flags, matching,
753797
nested_strip, rel);
754798
}
755799
free(nested_outdir);
756800
free(rel);
757801
} else {
802+
char *logical_path = join_prefix_path(NULL, rel);
803+
if (!should_extract_path(matching, logical_path)) {
804+
archive_read_data_skip(xar);
805+
free(logical_path);
806+
free(rel);
807+
continue;
808+
}
809+
free(logical_path);
758810
if (apply_strip_components(e, strip_components)) {
759811
archive_read_data_skip(xar);
760812
free(rel);
@@ -771,8 +823,7 @@ int main(int argc, char **argv) {
771823

772824
archive_write_free(disk);
773825
archive_read_free(xar);
774-
if (match != NULL) {
775-
archive_match_free(match);
776-
}
826+
archive_match_free(matching);
827+
pattern_list_free(&includes);
777828
return (0);
778829
}

tests/BUILD.bazel

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,32 @@ run_binary(
6464
tags = ["manual"],
6565
)
6666

67+
run_binary(
68+
name = "pkgutil_component_expand_full_strip_6_usr_filter_action",
69+
tool = "//:pkgutil",
70+
srcs = [
71+
"@component_pkg//file",
72+
],
73+
args = [
74+
"--include",
75+
"Payload/Library/Developer/CommandLineTools/SDKs/MacOSX15.5.sdk/usr/*",
76+
"--include",
77+
"Payload/Library/Developer/CommandLineTools/SDKs/MacOSX15.5.sdk/System/*",
78+
"--exclude",
79+
"Payload/Library/Developer/CommandLineTools/SDKs/MacOSX15.5.sdk/System/Library/Frameworks/Ruby.framework/Versions/2.6/Headers/ruby/ruby",
80+
"--exclude",
81+
"Payload/Library/Developer/CommandLineTools/SDKs/MacOSX15.5.sdk/usr/share/*",
82+
"--expand-full",
83+
"--strip-components",
84+
"6",
85+
"$(location @component_pkg//file)",
86+
"$@",
87+
],
88+
out_dirs = ["pkgutil-component-expand-full-strip-6-usr-filter"],
89+
testonly = True,
90+
tags = ["manual"],
91+
)
92+
6793
run_binary(
6894
name = "pkgutil_product_expand_action",
6995
tool = "//:pkgutil",
@@ -169,6 +195,34 @@ exec_test(
169195
],
170196
)
171197

198+
exec_test(
199+
native_test,
200+
name = "pkgutil_component_expand_full_strip_6_usr_filter_test",
201+
src = ":test",
202+
args = [
203+
"-e",
204+
"$(location :pkgutil_component_expand_full_strip_6_usr_filter_action)/usr",
205+
"$(location :pkgutil_component_expand_full_strip_6_usr_filter_action)/System",
206+
"$(location :pkgutil_component_expand_full_strip_6_usr_filter_action)/usr/lib/libNFC_HAL.tbd",
207+
],
208+
data = [
209+
":pkgutil_component_expand_full_strip_6_usr_filter_action",
210+
],
211+
)
212+
213+
exec_test(
214+
native_test,
215+
name = "pkgutil_component_expand_full_strip_6_usr_filter_missing_test",
216+
src = ":test",
217+
args = [
218+
"-ne",
219+
"$(location :pkgutil_component_expand_full_strip_6_usr_filter_action)/usr/share",
220+
],
221+
data = [
222+
":pkgutil_component_expand_full_strip_6_usr_filter_action",
223+
],
224+
)
225+
172226
exec_test(
173227
native_test,
174228
name = "pkgutil_product_expand_test",

0 commit comments

Comments
 (0)