Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add ability to add comments through the sym file #17

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 21 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -7,7 +7,7 @@ A Game Boy ROM disassembler.

- Generates assembly code compatible with RGBDS (v0.3.8+ recommended, see [Notes](#notes))
- Supports ROMs with multiple banks
- Supports .sym files to define labels, code, data, text and image blocks
- Supports .sym files to define labels, code, data, text, image blocks, and comments
- Outputs a makefile to rebuild the ROM
- Uses defines from hardware.inc v2.7 for hardware registers ([source](https://github.com/tobiasvl/hardware.inc))
- Slow on large ROMs
@@ -41,6 +41,8 @@ All values (except for image widths) should be in hexadecimal. Entries start wi

Block types can be defined by using the ```.code```, ```.data```, ```.text```, and ```.image``` magic labels, followed by the length of the block in bytes.

Comments to add to the assembly code should start with `;;`, followed by the bank and address.

### Code

Adding a label for some code:
@@ -102,6 +104,24 @@ Resulting image:

![Imgur](https://i.imgur.com/iX5FCXL.png)

### Comments

You may like to add comments to the resulting assembly files, for instance, if you are only able to share the symbol file for licensing reasons:

```
01:ad43 Check_Health
;; 01:a4d3 Updates player health if they're being hit
```

Resulting assembly file:

```
Check_Health:
; Updates player health if they're being hit
push BC
...
```

## Notes

- For constant expressions, RGBDS will by default optimise instructions like ```LD [$FF40],a``` to ```LDH [$FF00+40],a```, so these are encoded as data bytes using a macro to ensure exact reproduction of the original ROM (thanks to ISSOtm). RGBDS >= v0.3.7 has an option to disable this optimisation. Use ```--disable-auto-ldh``` with mgbdis to disable the macro.
83 changes: 69 additions & 14 deletions mgbdis.py
Original file line number Diff line number Diff line change
@@ -231,6 +231,7 @@ def add_target_address(self, instruction_name, address):

def resolve_blocks(self):
blocks = self.symbols.get_blocks(self.bank_number)
comments = self.symbols.get_comments(self.bank_number)
block_start_addresses = sorted(blocks.keys())
resolved_blocks = dict()

@@ -253,22 +254,25 @@ def resolve_blocks(self):
'type': block['type'],
'length': end_address - start_address,
'arguments': block['arguments'],
'comments': comments,
}

if next_start_address is None and (end_address != self.memory_base_address + 0x4000):
# no more blocks and didn't finish at the end of the block, so finish up with a code block
resolved_blocks[end_address] = {
'type': 'code',
'length': (self.memory_base_address + 0x4000) - end_address,
'arguments': None
'arguments': None,
'comments': comments,
}

if next_start_address is not None and end_address < next_start_address:
# we have another block, but there is a gap until the next block, so fill in the gap with a code block
resolved_blocks[end_address] = {
'type': 'code',
'length': next_start_address - end_address,
'arguments': None
'arguments': None,
'comments': comments,
}

self.blocks = resolved_blocks
@@ -367,6 +371,10 @@ def format_data(self, data):
return self.format_instruction(self.style['db'], data)


def format_comments(self, comments):
return '\n'.join(comments)


def append_output(self, text):
self.output.append(text)

@@ -401,27 +409,26 @@ def disassemble(self, rom, first_pass = False):
start_address = block_start_addresses[index]
block = self.blocks[start_address]
end_address = start_address + block['length']
self.disassemble_block_range[block['type']](rom, self.rom_base_address + start_address, self.rom_base_address + end_address, block['arguments'])
self.disassemble_block_range[block['type']](rom, self.rom_base_address + start_address, self.rom_base_address + end_address, block['arguments'], block['comments'])
self.append_empty_line_if_none_already()

return '\n'.join(self.output)


def process_code_in_range(self, rom, start_address, end_address, arguments = None):
def process_code_in_range(self, rom, start_address, end_address, arguments = None, comments = None):
if not self.first_pass and debug:
print('Disassembling code in range: {} - {}'.format(hex_word(start_address), hex_word(end_address)))

self.pc = start_address
while self.pc < end_address:
instruction = self.disassemble_at_pc(rom, end_address)
instruction = self.disassemble_at_pc(rom, end_address, comments)


def disassemble_at_pc(self, rom, end_address):
def disassemble_at_pc(self, rom, end_address, comments):
pc = self.pc
pc_mem_address = rom_address_to_mem_address(pc)
length = 1
opcode = rom.data[pc]
comment = None
operands = None
operand_values = list()

@@ -600,8 +607,10 @@ def disassemble_at_pc(self, rom, end_address):
if len(labels):
self.append_labels_to_output(labels)

if comment is not None:
self.append_output(comment)
for offset in range(length):
address = pc_mem_address + offset
if address in comments:
self.append_output(self.format_comments(comments[address]))

instruction_bytes = rom.data[pc:pc + length]
self.append_output(self.format_instruction(instruction_name, operand_values, pc_mem_address, instruction_bytes))
@@ -621,7 +630,7 @@ def disassemble_at_pc(self, rom, end_address):
self.append_output('')


def process_data_in_range(self, rom, start_address, end_address, arguments = None):
def process_data_in_range(self, rom, start_address, end_address, arguments = None, comments = None):
if not self.first_pass and debug:
print('Outputting data in range: {} - {}'.format(hex_word(start_address), hex_word(end_address)))

@@ -639,6 +648,9 @@ def process_data_in_range(self, rom, start_address, end_address, arguments = Non

self.append_labels_to_output(labels)

if comments and address in comments:
self.append_output(self.format_comments(comments[address]))

values.append(hex_byte(rom.data[address]))

# output max of 16 bytes per line, and ensure any remaining values are output
@@ -647,7 +659,7 @@ def process_data_in_range(self, rom, start_address, end_address, arguments = Non
values = list()


def process_text_in_range(self, rom, start_address, end_address, arguments = None):
def process_text_in_range(self, rom, start_address, end_address, arguments = None, comments = None):
if not self.first_pass and debug:
print('Outputting text in range: {} - {}'.format(hex_word(start_address), hex_word(end_address)))

@@ -670,6 +682,9 @@ def process_text_in_range(self, rom, start_address, end_address, arguments = Non

self.append_labels_to_output(labels)

if comments and address in comments:
self.append_output(self.format_comments(comments[address]))

byte = rom.data[address]
if byte >= 0x20 and byte < 0x7F:
text += chr(byte)
@@ -685,13 +700,16 @@ def process_text_in_range(self, rom, start_address, end_address, arguments = Non
if len(values):
self.append_output(self.format_data(values))

def process_image_in_range(self, rom, start_address, end_address, arguments = None):
def process_image_in_range(self, rom, start_address, end_address, arguments = None, comments = None):
if not self.first_pass and debug:
print('Outputting image in range: {} - {}'.format(hex_word(start_address), hex_word(end_address)))

if self.first_pass:
return

if comments and start_address in comments:
self.append_output(self.format_comments(comments[start_address]))

mem_address = rom_address_to_mem_address(start_address)
labels = self.get_labels_for_non_code_address(mem_address)
if len(labels):
@@ -710,17 +728,49 @@ class Symbols:
def __init__(self):
self.symbols = dict()
self.blocks = dict()
self.comments = dict()

def load_sym_file(self, symbols_path):
f = open(symbols_path, 'r')

for line in f:
# ignore comments and empty lines
if line[0] != ';' and len(line.strip()):
if line[:3] == ';; ':
self.add_comment(line)
# ignore normal comments and empty lines
elif line[0] != ';' and len(line.strip()):
self.add_symbol_definition(line)

f.close()

def add_comment(self, comment):
split_comment = comment.split(maxsplit=2)[1:]
if len(split_comment) != 2:
return

location, body = split_comment
try:
bank, address = location.split(':')
bank = int(bank, 16)
address = int(address, 16)
except:
# Ignore comments that don't have a location
return

if body[-1] == '\n':
body = body[:-1]

if not len(body):
# Assume this is a blank comment for formatting
# Don't include whitespace at the end of the line
body = ';'
else:
body = '; {}'.format(body)

bank_comments = self.get_comments(bank)
if address not in bank_comments:
bank_comments[address] = []

bank_comments[address].append(body)

def add_symbol_definition(self, symbol_def):
try:
@@ -808,6 +858,11 @@ def get_blocks(self, bank):

return self.blocks[bank]

def get_comments(self, bank):
if bank not in self.comments:
self.comments[bank] = dict()
return self.comments[bank]

class ROM:

def __init__(self, rom_path, style):