@@ -224,14 +224,23 @@ def extract_comp_dir_map(text):
224224 comp_dir_pattern = re .compile (r"DW_AT_comp_dir\s+\(\"([^\"]+)\"\)" )
225225
226226 map_stmt_list_to_comp_dir = {}
227- chunks = compile_unit_pattern .split (text ) # DW_TAG_compile_unit
228- for chunk in chunks [1 :]:
229- stmt_list_match = stmt_list_pattern .search (chunk ) # DW_AT_stmt_list
227+ iterator = compile_unit_pattern .finditer (text )
228+ current_match = next (iterator , None )
229+
230+ while current_match :
231+ next_match = next (iterator , None )
232+ start = current_match .end ()
233+ end = next_match .start () if next_match else len (text )
234+
235+ stmt_list_match = stmt_list_pattern .search (text , start , end )
230236 if stmt_list_match is not None :
231237 stmt_list = stmt_list_match .group (1 )
232- comp_dir_match = comp_dir_pattern .search (chunk ) # DW_AT_comp_dir
238+ comp_dir_match = comp_dir_pattern .search (text , start , end )
233239 comp_dir = decode_octal_encoded_utf8 (comp_dir_match .group (1 )) if comp_dir_match is not None else ''
234240 map_stmt_list_to_comp_dir [stmt_list ] = comp_dir
241+
242+ current_match = next_match
243+
235244 return map_stmt_list_to_comp_dir
236245
237246
@@ -313,54 +322,60 @@ def extract_func_ranges(text):
313322 # DW_AT_high_pc (0x00000083)
314323 # ...
315324
316- tag_pattern = re .compile (r'\r?\n(?=0x[0-9a-f]+:)' )
317- subprogram_pattern = re .compile (r"0x[0-9a-f]+:\s+DW_TAG_subprogram" )
318- inlined_pattern = re .compile (r"0x[0-9a-f]+:\s+DW_TAG_inlined_subroutine" )
325+ # Pattern to find the start of the NEXT DWARF tag (boundary marker)
326+ next_tag_pattern = re .compile (r'\n0x[0-9a-f]+:' )
327+ # Pattern to find DWARF tags for functions (Subprogram or Inlined) directly
328+ func_pattern = re .compile (r'DW_TAG_(?:subprogram|inlined_subroutine)' )
329+
319330 low_pc_pattern = re .compile (r'DW_AT_low_pc\s+\(0x([0-9a-f]+)\)' )
320331 high_pc_pattern = re .compile (r'DW_AT_high_pc\s+\(0x([0-9a-f]+)\)' )
321332 abstract_origin_pattern = re .compile (r'DW_AT_abstract_origin\s+\(0x[0-9a-f]+\s+"([^"]+)"\)' )
322333 linkage_name_pattern = re .compile (r'DW_AT_linkage_name\s+\("([^"]+)"\)' )
323334 name_pattern = re .compile (r'DW_AT_name\s+\("([^"]+)"\)' )
324335 specification_pattern = re .compile (r'DW_AT_specification\s+\(0x[0-9a-f]+\s+"([^"]+)"\)' )
325336
326- func_ranges = []
327- dw_tags = tag_pattern .split (text )
328-
329- def get_name_from_tag (tag ):
330- m = linkage_name_pattern .search (tag ) # DW_AT_linkage_name
337+ def get_name_from_tag (start , end ):
338+ m = linkage_name_pattern .search (text , start , end )
331339 if m :
332340 return m .group (1 )
333- m = name_pattern .search (tag ) # DW_AT_name
341+ m = name_pattern .search (text , start , end )
334342 if m :
335343 return m .group (1 )
336344 # If name is missing, check for DW_AT_specification annotation
337- m = specification_pattern .search (tag )
345+ m = specification_pattern .search (text , start , end )
338346 if m :
339347 return m .group (1 )
340348 return None
341349
342- for tag in dw_tags :
343- is_subprogram = subprogram_pattern .search (tag ) # DW_TAG_subprogram
344- is_inlined = inlined_pattern .search (tag ) # DW_TAG_inlined_subroutine
350+ func_ranges = []
351+ for match in func_pattern .finditer (text ):
352+ # Search from the end of the tag name (e.g. after "DW_TAG_subprogram").
353+ # Attributes are expected to follow.
354+ search_start = match .end ()
355+
356+ # Search until the beginning of the next tag
357+ m_next = next_tag_pattern .search (text , search_start )
358+ search_end = m_next .start () if m_next else len (text )
359+
360+ name = None
361+ low_pc = None
362+ high_pc = None
363+ m = low_pc_pattern .search (text , search_start , search_end )
364+ if m :
365+ low_pc = int (m .group (1 ), 16 )
366+ m = high_pc_pattern .search (text , search_start , search_end )
367+ if m :
368+ high_pc = int (m .group (1 ), 16 )
345369
346- if is_subprogram or is_inlined :
347- name = None
348- low_pc = None
349- high_pc = None
350- m = low_pc_pattern .search (tag ) # DW_AT_low_pc
351- if m :
352- low_pc = int (m .group (1 ), 16 )
353- m = high_pc_pattern .search (tag ) # DW_AT_high_pc
370+ if 'DW_TAG_subprogram' in match .group (0 ):
371+ name = get_name_from_tag (search_start , search_end )
372+ else : # is_inlined
373+ m = abstract_origin_pattern .search (text , search_start , search_end )
354374 if m :
355- high_pc = int (m .group (1 ), 16 )
356- if is_subprogram :
357- name = get_name_from_tag (tag )
358- else : # is_inlined
359- m = abstract_origin_pattern .search (tag ) # DW_AT_abstract_origin
360- if m :
361- name = m .group (1 )
362- if name and low_pc is not None and high_pc is not None :
363- func_ranges .append (FuncRange (name , low_pc , high_pc ))
375+ name = m .group (1 )
376+
377+ if name and low_pc is not None and high_pc is not None :
378+ func_ranges .append (FuncRange (name , low_pc , high_pc ))
364379
365380 # Demangle names
366381 all_names = [item .name for item in func_ranges ]
@@ -401,9 +416,23 @@ def read_dwarf_info(wasm, options):
401416 line_pattern = re .compile (r"\n0x([0-9a-f]+)\s+(\d+)\s+(\d+)\s+(\d+)(.*?end_sequence)?" )
402417
403418 entries = []
404- debug_line_chunks = debug_line_pattern .split (output )
405- map_stmt_list_to_comp_dir = extract_comp_dir_map (debug_line_chunks [0 ])
406- for stmt_list , line_chunk in zip (debug_line_chunks [1 ::2 ], debug_line_chunks [2 ::2 ], strict = True ):
419+ iterator = debug_line_pattern .finditer (output )
420+ try :
421+ current_match = next (iterator )
422+ debug_info_end = current_match .start () # end of .debug_info contents
423+ except StopIteration :
424+ debug_info_end = len (output )
425+
426+ debug_info = output [:debug_info_end ] # .debug_info contents
427+ map_stmt_list_to_comp_dir = extract_comp_dir_map (debug_info )
428+
429+ while current_match :
430+ next_match = next (iterator , None )
431+
432+ stmt_list = current_match .group (1 )
433+ start = current_match .end ()
434+ end = next_match .start () if next_match else len (output )
435+
407436 comp_dir = map_stmt_list_to_comp_dir .get (stmt_list , '' )
408437
409438 # include_directories[ 1] = "/Users/yury/Work/junk/sqlite-playground/src"
@@ -422,16 +451,16 @@ def read_dwarf_info(wasm, options):
422451 # 0x0000000000000011 28 0 1 0 0 is_stmt
423452
424453 include_directories = {'0' : comp_dir }
425- for dir in include_dir_pattern .finditer (line_chunk ):
454+ for dir in include_dir_pattern .finditer (output , start , end ):
426455 include_directories [dir .group (1 )] = os .path .join (comp_dir , decode_octal_encoded_utf8 (dir .group (2 )))
427456
428457 files = {}
429- for file in file_pattern .finditer (line_chunk ):
458+ for file in file_pattern .finditer (output , start , end ):
430459 dir = include_directories [file .group (3 )]
431460 file_path = os .path .join (dir , decode_octal_encoded_utf8 (file .group (2 )))
432461 files [file .group (1 )] = file_path
433462
434- for line in line_pattern .finditer (line_chunk ):
463+ for line in line_pattern .finditer (output , start , end ):
435464 entry = {'address' : int (line .group (1 ), 16 ), 'line' : int (line .group (2 )), 'column' : int (line .group (3 )), 'file' : files [line .group (4 )], 'eos' : line .group (5 ) is not None }
436465 if not entry ['eos' ]:
437466 entries .append (entry )
@@ -444,12 +473,14 @@ def read_dwarf_info(wasm, options):
444473 else :
445474 entries .append (entry )
446475
476+ current_match = next_match
477+
447478 remove_dead_entries (entries )
448479
449480 # return entries sorted by the address field
450481 entries = sorted (entries , key = lambda entry : entry ['address' ])
451482
452- func_ranges = extract_func_ranges (debug_line_chunks [ 0 ] )
483+ func_ranges = extract_func_ranges (debug_info )
453484 return entries , func_ranges
454485
455486
0 commit comments