@@ -79,9 +79,17 @@ static char *strip_components_path(const char *path, int strip);
7979static int apply_strip_components (struct archive_entry * e , int strip );
8080static int path_component_count (const char * path );
8181static char * normalize_rel_path (const char * path );
82- static int match_excluded_with_prefix (struct archive * match ,
83- struct archive_entry * e ,
84- const char * prefix );
82+ struct pattern_list {
83+ char * * items ;
84+ size_t len ;
85+ size_t cap ;
86+ };
87+ static void pattern_list_add (struct pattern_list * list , const char * pattern );
88+ static void pattern_list_free (struct pattern_list * list );
89+ static int should_extract_path (struct archive * matching , const char * path );
90+ static char * join_prefix_path (const char * prefix , const char * path );
91+ static int has_include_descendant (const struct pattern_list * includes ,
92+ const char * path );
8593
8694static int pkg_getopt (int * argc , char * * * argv , const char * * arg ) {
8795 enum { state_start = 0 , state_next_word , state_short , state_long };
@@ -263,7 +271,7 @@ static int astream_close_cb(struct archive *a, void *client_data) {
263271
264272static void extract_nested_archive_from_stream (struct astream * in ,
265273 const char * outdir , int flags ,
266- struct archive * match ,
274+ struct archive * matching ,
267275 int strip_components ,
268276 const char * prefix ) {
269277 struct archive * a = archive_read_new ();
@@ -308,14 +316,14 @@ static void extract_nested_archive_from_stream(struct astream *in,
308316 char * rel = normalize_rel_path (p );
309317 archive_entry_set_pathname (e , rel );
310318
311- if (match != NULL ) {
312- r = match_excluded_with_prefix (match , e , prefix );
313- if (r != 0 ) {
314- archive_read_data_skip (a );
315- free (rel );
316- continue ;
317- }
319+ char * logical_path = join_prefix_path (prefix , rel );
320+ if (!should_extract_path (matching , logical_path )) {
321+ archive_read_data_skip (a );
322+ free (logical_path );
323+ free (rel );
324+ continue ;
318325 }
326+ free (logical_path );
319327
320328 if (apply_strip_components (e , strip_components )) {
321329 archive_read_data_skip (a );
@@ -456,41 +464,79 @@ static int path_component_count(const char *path) {
456464 return (count );
457465}
458466
459- static int match_excluded_with_prefix (struct archive * match ,
460- struct archive_entry * e ,
461- const char * prefix ) {
462- if (match == NULL ) {
463- return (0 );
467+ static void pattern_list_add (struct pattern_list * list , const char * pattern ) {
468+ if (list -> len == list -> cap ) {
469+ size_t new_cap = list -> cap == 0 ? 8 : list -> cap * 2 ;
470+ char * * new_items = realloc (list -> items , new_cap * sizeof (* new_items ));
471+ if (new_items == NULL ) {
472+ fail_errno ("realloc" );
473+ }
474+ list -> items = new_items ;
475+ list -> cap = new_cap ;
464476 }
465- if (prefix == NULL || prefix [0 ] == '\0' ) {
466- return archive_match_excluded (match , e );
477+ char * dup = strdup (pattern );
478+ if (dup == NULL ) {
479+ fail_errno ("strdup" );
467480 }
468- const char * orig = archive_entry_pathname (e );
469- if (orig == NULL ) {
470- return archive_match_excluded (match , e );
481+ list -> items [list -> len ++ ] = dup ;
482+ }
483+
484+ static void pattern_list_free (struct pattern_list * list ) {
485+ for (size_t i = 0 ; i < list -> len ; i ++ ) {
486+ free (list -> items [i ]);
471487 }
472- char * orig_copy = strdup (orig );
473- if (orig_copy == NULL ) {
474- fail_errno ("strdup" );
488+ free (list -> items );
489+ list -> items = NULL ;
490+ list -> len = 0 ;
491+ list -> cap = 0 ;
492+ }
493+
494+ static int should_extract_path (struct archive * matching , const char * path ) {
495+ struct archive_entry * entry = archive_entry_new ();
496+ if (entry == NULL ) {
497+ fail_errno ("archive_entry_new" );
498+ }
499+ archive_entry_set_pathname (entry , path );
500+ int excluded = archive_match_excluded (matching , entry );
501+ archive_entry_free (entry );
502+ if (excluded < 0 ) {
503+ fail_archive (matching , "archive_match_excluded" );
504+ }
505+ return (excluded == 0 );
506+ }
507+
508+ static int has_include_descendant (const struct pattern_list * includes ,
509+ const char * path ) {
510+ size_t plen = strlen (path );
511+ for (size_t i = 0 ; i < includes -> len ; i ++ ) {
512+ const char * pat = includes -> items [i ];
513+ if (strncmp (pat , path , plen ) == 0 && pat [plen ] == '/' ) {
514+ return (1 );
515+ }
516+ }
517+ return (0 );
518+ }
519+
520+ static char * join_prefix_path (const char * prefix , const char * path ) {
521+ if (prefix == NULL || prefix [0 ] == '\0' ||
522+ (prefix [0 ] == '.' && prefix [1 ] == '\0' )) {
523+ char * dup = strdup (path );
524+ if (dup == NULL ) {
525+ fail_errno ("strdup" );
526+ }
527+ return (dup );
475528 }
476529 size_t plen = strlen (prefix );
477- size_t olen = strlen (orig_copy );
478- size_t total = plen + 1 + olen + 1 ;
530+ size_t path_len = strlen (path );
531+ size_t total = plen + 1 + path_len + 1 ;
479532 char * buf = malloc (total );
480533 if (buf == NULL ) {
481- free (orig_copy );
482534 fail_errno ("malloc" );
483535 }
484536 memcpy (buf , prefix , plen );
485537 buf [plen ] = '/' ;
486- memcpy (buf + plen + 1 , orig_copy , olen + 1 );
487-
488- archive_entry_set_pathname (e , buf );
489- int r = archive_match_excluded (match , e );
490- archive_entry_set_pathname (e , orig_copy );
491- free (buf );
492- free (orig_copy );
493- return (r );
538+ memcpy (buf + plen + 1 , path , path_len + 1 );
539+ return (buf );
494540}
495541
496542static int contains_dotdot_segment (const char * path ) {
@@ -582,7 +628,8 @@ int main(int argc, char **argv) {
582628 const char * xar_path = NULL ;
583629 const char * outdir = NULL ;
584630 struct archive * xar ;
585- struct archive * match = NULL ;
631+ struct archive * matching ;
632+ struct pattern_list includes = {0 };
586633 struct archive * disk ;
587634 struct archive_entry * e ;
588635 int r ;
@@ -594,6 +641,11 @@ int main(int argc, char **argv) {
594641 int strip_components = 0 ;
595642 int flags ;
596643
644+ matching = archive_match_new ();
645+ if (matching == NULL ) {
646+ fail_errno ("archive_match_new" );
647+ }
648+
597649 while ((opt = pkg_getopt (& argc , & argv , & arg )) != -1 ) {
598650 switch (opt ) {
599651 case 'f' :
@@ -611,27 +663,14 @@ int main(int argc, char **argv) {
611663 do_expand_full = 1 ;
612664 break ;
613665 case opt_include :
614- if (match == NULL ) {
615- match = archive_match_new ();
616- if (match == NULL ) {
617- fail_errno ("archive_match_new" );
618- }
619- }
620- r = archive_match_include_pattern (match , arg );
621- if (r != ARCHIVE_OK ) {
622- fail_archive (match , "include pattern" );
666+ pattern_list_add (& includes , arg );
667+ if (archive_match_include_pattern (matching , arg ) != ARCHIVE_OK ) {
668+ fail_archive (matching , "archive_match_include_pattern" );
623669 }
624670 break ;
625671 case opt_exclude :
626- if (match == NULL ) {
627- match = archive_match_new ();
628- if (match == NULL ) {
629- fail_errno ("archive_match_new" );
630- }
631- }
632- r = archive_match_exclude_pattern (match , arg );
633- if (r != ARCHIVE_OK ) {
634- fail_archive (match , "exclude pattern" );
672+ if (archive_match_exclude_pattern (matching , arg ) != ARCHIVE_OK ) {
673+ fail_archive (matching , "archive_match_exclude_pattern" );
635674 }
636675 break ;
637676 case opt_strip_components :
@@ -667,10 +706,6 @@ int main(int argc, char **argv) {
667706 fail_errno ("archive_read_new" );
668707 }
669708
670- if (match != NULL ) {
671- archive_match_set_inclusion_recursion (match , 1 );
672- }
673-
674709 disk = archive_write_disk_new ();
675710 if (disk == NULL ) {
676711 fail_errno ("archive_write_disk_new" );
@@ -707,20 +742,29 @@ int main(int argc, char **argv) {
707742 fail_errno ("chdir(outdir)" );
708743 }
709744
745+ if (archive_match_set_inclusion_recursion (matching , 1 ) != ARCHIVE_OK ) {
746+ fail_archive (matching , "archive_match_set_inclusion_recursion" );
747+ }
748+
710749 while ((r = archive_read_next_header (xar , & e )) == ARCHIVE_OK ) {
711750 const char * p = archive_entry_pathname (e );
712751 char * rel = normalize_rel_path (p );
713752 archive_entry_set_pathname (e , rel );
714- if (match != NULL ) {
715- r = archive_match_excluded (match , e );
716- if (r != 0 ) {
753+ int is_nested = should_be_treated_as_nested_archive (rel );
754+ if (do_expand_full && is_nested ) {
755+ char * logical_path = join_prefix_path (NULL , rel );
756+ int include_nested = should_extract_path (matching , logical_path );
757+ if (!include_nested && includes .len > 0 &&
758+ has_include_descendant (& includes , logical_path )) {
759+ include_nested = 1 ;
760+ }
761+ free (logical_path );
762+ if (!include_nested ) {
717763 archive_read_data_skip (xar );
718764 free (rel );
719765 continue ;
720766 }
721- }
722- int is_nested = should_be_treated_as_nested_archive (rel );
723- if (do_expand_full && is_nested ) {
767+
724768 char * nested_outdir = strip_components_path (rel , strip_components );
725769 int nested_strip = strip_components ;
726770 int rel_components = path_component_count (rel );
@@ -749,12 +793,20 @@ int main(int argc, char **argv) {
749793 .eof = 0 ,
750794 };
751795
752- extract_nested_archive_from_stream (& in , nested_outdir , flags , match ,
796+ extract_nested_archive_from_stream (& in , nested_outdir , flags , matching ,
753797 nested_strip , rel );
754798 }
755799 free (nested_outdir );
756800 free (rel );
757801 } else {
802+ char * logical_path = join_prefix_path (NULL , rel );
803+ if (!should_extract_path (matching , logical_path )) {
804+ archive_read_data_skip (xar );
805+ free (logical_path );
806+ free (rel );
807+ continue ;
808+ }
809+ free (logical_path );
758810 if (apply_strip_components (e , strip_components )) {
759811 archive_read_data_skip (xar );
760812 free (rel );
@@ -771,8 +823,7 @@ int main(int argc, char **argv) {
771823
772824 archive_write_free (disk );
773825 archive_read_free (xar );
774- if (match != NULL ) {
775- archive_match_free (match );
776- }
826+ archive_match_free (matching );
827+ pattern_list_free (& includes );
777828 return (0 );
778829}
0 commit comments