@@ -133,6 +133,7 @@ pub enum Value {
133
133
Variant ( Cow < ' static , str > , Box < Value > ) ,
134
134
Seq ( Vec < Value > ) ,
135
135
Mapped ( Box < Value > , Box < Value > ) ,
136
+ Fallback ( bool , Box < Value > ) ,
136
137
Format ( Box < Format > ) ,
137
138
}
138
139
@@ -466,6 +467,8 @@ pub enum Format {
466
467
RepeatUntilLast ( Expr , Box < Format > ) ,
467
468
/// Repeat a format until a condition is satisfied by the sequence
468
469
RepeatUntilSeq ( Expr , Box < Format > ) ,
470
+ /// Repeat an eager narrow format, but continue with a broader item if necessary upon recoverable failure
471
+ RepeatFallback ( Box < Format > , Box < Format > ) ,
469
472
/// Parse a format without advancing the stream position afterwards
470
473
Peek ( Box < Format > ) ,
471
474
/// Attempt to parse a format and fail if it succeeds
@@ -660,6 +663,11 @@ impl FormatModule {
660
663
let t = self . infer_format_type ( scope, a) ?;
661
664
Ok ( ValueType :: Seq ( Box :: new ( t) ) )
662
665
}
666
+ Format :: RepeatFallback ( narrow, wide) => {
667
+ let mut t = self . infer_format_type ( scope, narrow) ?;
668
+ t = t. unify ( & self . infer_format_type ( scope, wide) ?) ?;
669
+ Ok ( ValueType :: Seq ( Box :: new ( t) ) )
670
+ }
663
671
Format :: Peek ( a) => self . infer_format_type ( scope, a) ,
664
672
Format :: PeekNot ( _a) => Ok ( ValueType :: Tuple ( vec ! [ ] ) ) ,
665
673
Format :: Slice ( _expr, a) => self . infer_format_type ( scope, a) ,
@@ -771,6 +779,7 @@ enum Decoder {
771
779
Record ( Vec < ( Cow < ' static , str > , Decoder ) > ) ,
772
780
While ( MatchTree , Box < Decoder > ) ,
773
781
Until ( MatchTree , Box < Decoder > ) ,
782
+ RepeatFallback ( MatchTree , Box < Decoder > , Box < Decoder > ) ,
774
783
RepeatCount ( Expr , Box < Decoder > ) ,
775
784
RepeatUntilLast ( Expr , Box < Decoder > ) ,
776
785
RepeatUntilSeq ( Expr , Box < Decoder > ) ,
@@ -1310,6 +1319,7 @@ impl Format {
1310
1319
Format :: RepeatCount ( expr, f) => f. match_bounds ( module) * expr. bounds ( ) ,
1311
1320
Format :: RepeatUntilLast ( _, f) => f. match_bounds ( module) * Bounds :: new ( 1 , None ) ,
1312
1321
Format :: RepeatUntilSeq ( _, _f) => Bounds :: new ( 0 , None ) ,
1322
+ Format :: RepeatFallback ( _narrow, _wide) => Bounds :: new ( 0 , None ) ,
1313
1323
Format :: Peek ( _) => Bounds :: exact ( 0 ) ,
1314
1324
Format :: PeekNot ( _) => Bounds :: exact ( 0 ) ,
1315
1325
Format :: Slice ( expr, _) => expr. bounds ( ) ,
@@ -1354,6 +1364,7 @@ impl Format {
1354
1364
Format :: Record ( fields) => fields. iter ( ) . any ( |( _, f) | f. depends_on_next ( module) ) ,
1355
1365
Format :: Repeat ( _) => true ,
1356
1366
Format :: Repeat1 ( _) => true ,
1367
+ Format :: RepeatFallback ( _narrow, _wide) => true ,
1357
1368
Format :: RepeatCount ( _, _f) => false ,
1358
1369
Format :: RepeatUntilLast ( _, _f) => false ,
1359
1370
Format :: RepeatUntilSeq ( _, _f) => false ,
@@ -1409,6 +1420,14 @@ impl Format {
1409
1420
}
1410
1421
}
1411
1422
1423
+ pub fn is_char_format ( & self , module : & FormatModule ) -> bool {
1424
+ match self {
1425
+ // NOTE - currently only true for named formats matching `/.*char.*/`
1426
+ Format :: ItemVar ( level, _args) => module. get_name ( * level) . contains ( "char" ) ,
1427
+ _ => false ,
1428
+ }
1429
+ }
1430
+
1412
1431
/// Returns `true` if values associated to this format should be handled as multi-character ASCII strings
1413
1432
pub fn is_ascii_string_format ( & self , module : & FormatModule ) -> bool {
1414
1433
match self {
@@ -1666,6 +1685,19 @@ impl<'a> MatchTreeStep<'a> {
1666
1685
Format :: RepeatUntilSeq ( _expr, _a) => {
1667
1686
Self :: accept ( ) // FIXME
1668
1687
}
1688
+ Format :: RepeatFallback ( narrow, wide) => {
1689
+ let tree = Self :: add_next ( module, next. clone ( ) ) ;
1690
+ tree. union ( Self :: add (
1691
+ module,
1692
+ narrow,
1693
+ Rc :: new ( Next :: Repeat ( narrow, next. clone ( ) ) ) ,
1694
+ ) )
1695
+ . union ( Self :: add (
1696
+ module,
1697
+ wide,
1698
+ Rc :: new ( Next :: Repeat ( wide, next. clone ( ) ) ) ,
1699
+ ) )
1700
+ }
1669
1701
Format :: Peek ( a) => {
1670
1702
let tree = Self :: add_next ( module, next. clone ( ) ) ;
1671
1703
let peek = Self :: add ( module, a, Rc :: new ( Next :: Empty ) ) ;
@@ -2334,6 +2366,33 @@ impl Decoder {
2334
2366
Err ( format ! ( "cannot build match tree for {:?}" , format) )
2335
2367
}
2336
2368
}
2369
+ Format :: RepeatFallback ( narrow, wide) => {
2370
+ if narrow. is_nullable ( compiler. module ) || wide. is_nullable ( compiler. module ) {
2371
+ return Err ( format ! (
2372
+ "Cannot repeat nullable format: Repeat({narrow:?} ⊂ {wide:?})"
2373
+ ) ) ;
2374
+ }
2375
+
2376
+ let dnarrow = Box :: new ( Decoder :: compile_next (
2377
+ compiler,
2378
+ narrow,
2379
+ Rc :: new ( Next :: Repeat ( narrow, next. clone ( ) ) ) ,
2380
+ ) ?) ;
2381
+
2382
+ let dwide = Box :: new ( Decoder :: compile_next (
2383
+ compiler,
2384
+ wide,
2385
+ Rc :: new ( Next :: Repeat ( wide, next. clone ( ) ) ) ,
2386
+ ) ?) ;
2387
+
2388
+ if let Some ( tree) =
2389
+ MatchTree :: build ( compiler. module , std:: slice:: from_ref ( format) , next)
2390
+ {
2391
+ Ok ( Decoder :: RepeatFallback ( tree, dnarrow, dwide) )
2392
+ } else {
2393
+ Err ( format ! ( "canot build match treee for {:?}" , format) )
2394
+ }
2395
+ }
2337
2396
Format :: Repeat1 ( a) => {
2338
2397
if a. is_nullable ( compiler. module ) {
2339
2398
return Err ( format ! ( "cannot repeat nullable format: {a:?}" ) ) ;
@@ -2545,6 +2604,44 @@ impl Decoder {
2545
2604
}
2546
2605
Ok ( ( Value :: Seq ( v) , input) )
2547
2606
}
2607
+ Decoder :: RepeatFallback ( _tree, subset, superset) => {
2608
+ let mut accum = Vec :: new ( ) ;
2609
+ let mut incr_input = input;
2610
+
2611
+ loop {
2612
+ match subset. parse ( program, scope, incr_input) {
2613
+ Ok ( ( b, next_input) ) => {
2614
+ accum. push ( b) ;
2615
+ incr_input = next_input;
2616
+ }
2617
+ Err ( _err) => break ,
2618
+ }
2619
+ }
2620
+
2621
+ // track the last offset we reached to see if we get any further
2622
+ let checkpoint = incr_input. offset ;
2623
+
2624
+ loop {
2625
+ match superset. parse ( program, scope, incr_input) {
2626
+ Ok ( ( b, next_input) ) => {
2627
+ accum. push ( b) ;
2628
+ incr_input = next_input;
2629
+ }
2630
+ Err ( _err) => break ,
2631
+ }
2632
+ }
2633
+
2634
+ if let Err ( e) = ( Decoder :: EndOfInput ) . parse ( program, scope, incr_input) {
2635
+ return Err ( e) ;
2636
+ }
2637
+
2638
+ let fellthrough = incr_input. offset > checkpoint;
2639
+
2640
+ Ok ( (
2641
+ Value :: Fallback ( fellthrough, Box :: new ( Value :: Seq ( accum) ) ) ,
2642
+ incr_input,
2643
+ ) ) // index 1
2644
+ }
2548
2645
Decoder :: RepeatCount ( expr, a) => {
2549
2646
let mut input = input;
2550
2647
let count = expr. eval_value ( scope) . unwrap_usize ( ) ;
0 commit comments