diff --git a/Cargo.toml b/Cargo.toml index 32382711..ec0b023d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -23,6 +23,8 @@ log-serial = [] # gets you most of the code size reduction, without losing _all_ debugging. log-panic = ["log-serial"] integration_tests = [] +# Support builing the firmware as a BIOS ROM (i.e. starting in real mode). +rom = [] [dependencies] bitflags = "1.2" diff --git a/layout.ld b/layout.ld index 7ab6066f..6e597a22 100644 --- a/layout.ld +++ b/layout.ld @@ -1,14 +1,17 @@ ENTRY(rust64_start) /* This firmware doesn't use the ELF entrypoint */ +/* Loaders like to put stuff in low memory (< 1M), so we don't use it. */ +ram_min = 1M; +/* ram32.s only maps the first 2 MiB, which must include our whole program. */ +ram_max = 2M; + PHDRS { ram PT_LOAD FILEHDR PHDRS ; note PT_NOTE ; + rom PT_LOAD AT(ram_max) ; } -/* Loaders like to put stuff in low memory (< 1M), so we don't use it. */ -ram_min = 1M; - SECTIONS { /* Mapping the program headers and note into RAM makes the file smaller. */ @@ -24,6 +27,7 @@ SECTIONS .text32 : { *(.text32) } .data : { *(.data .data.*) } data_size = . - data_start; + file_size = . - ram_min; /* The BSS section isn't mapped from file data. It is just zeroed in RAM. */ .bss : { @@ -35,8 +39,34 @@ SECTIONS /* Our stack grows down and is page-aligned. TODO: Add stack guard pages. */ .stack (NOLOAD) : ALIGN(4K) { . += 64K; } stack_start = .; - /* ram32.s only maps the first 2 MiB, and that must include the stack. */ - ASSERT((. <= 2M), "Stack overflows initial identity-mapped memory region") + ASSERT((. <= ram_max), "Stack overflows initial identity-mapped region") + + /* This is correct because all of the code sections have alignment 16. */ + rom_size = ALIGN(SIZEOF(.gdt32), 16) + ALIGN(SIZEOF(.rom32), 16) + + ALIGN(SIZEOF(.rom16), 16) + ALIGN(SIZEOF(.reset), 16); + /* Our file's length (RAM code/data + ROM) without any additional padding */ + file_size += rom_size; + /* QEMU requires the ROM size to be a multiple of 64K. To achieve this for + our `sstrip`ed binary, we insert enough padding before the ROM code, + so that the end of the ROM code falls on a 64K file bounary. */ + pad_size = ALIGN(file_size, 64K) - file_size; + + /* When using the ROM, the entire firmware is loaded right below 4 GiB. + We work backwards from the end to figure where to place the ROM code, + 32-bit GDT, padding, and the remainder of the firmware. */ + pad_start = (1 << 32) - rom_size - pad_size; + /* The remainder of the firmware code/data expects to be run at addresses + [data_start, data_start + data_size), but will initially be located in + ROM, at addresses [rom_data_start, rom_data_start + data_size). As the + padding comes right after the remainder of the firmware, we have: */ + rom_data_start = pad_start - data_size; + + /* Only insert the padding if we are building as a ROM (to save size). */ + .pad pad_start : { . += rom_size ? pad_size : 0; } :NONE + .gdt32 : { *(.gdt32) } :rom + .rom32 : { *(.rom32) } + .rom16 : { *(.rom16) } + .reset : { KEEP(*(.reset)) } /* Strip symbols from the output binary (comment out to get symbols) */ /DISCARD/ : { diff --git a/src/asm/mod.rs b/src/asm/mod.rs index 9d3ccc06..5cc78638 100644 --- a/src/asm/mod.rs +++ b/src/asm/mod.rs @@ -1 +1,6 @@ +#[cfg(feature = "rom")] +global_asm!(include_str!("reset.s")); + +global_asm!(include_str!("rom16.s")); +global_asm!(include_str!("rom32.s")); global_asm!(include_str!("ram32.s")); diff --git a/src/asm/reset.s b/src/asm/reset.s new file mode 100644 index 00000000..f6eeb2bf --- /dev/null +++ b/src/asm/reset.s @@ -0,0 +1,10 @@ +.section .reset, "ax" +.code16 + +# The reset vector must go at the end of ROM, exactly 16 bytes from the end. +.align 16 +reset_vec: # 0x0_FFFF_FFF0 + jmp rom16_start + +.align 16, 0 +reset_end: # 0x1_0000_0000 \ No newline at end of file diff --git a/src/asm/rom16.s b/src/asm/rom16.s new file mode 100644 index 00000000..9cd86993 --- /dev/null +++ b/src/asm/rom16.s @@ -0,0 +1,26 @@ +.section .rom16, "ax" +.code16 + +.align 16 +rom16_start: + # Order of instructions from Intel SDM 9.9.1 "Switching to Protected Mode" + # Step 1: Disable interrupts + cli + + # Step 2: Load the GDT + # We are currently in 16-bit real mode. To enter 32-bit protected mode, we + # need to load 32-bit code/data segments into our GDT. The gdt32 in ROM is + # at too high of an address (right below 4G) for the data segment to reach. + # + # But we can load gdt32 via the code segement. After a reset, the base of + # the CS register is 0xFFFF0000, which means we can access gdt32. + lgdtl %cs:(GDT32_PTR - 0xFFFF0000) + + # Step 3: Set CRO.PE (Protected Mode Enable) + movl %cr0, %eax + orb $0b00000001, %al # Set bit 0 + movl %eax, %cr0 + + # Step 4: Far JMP to change execution flow and serialize the processor. + # Set CS to a 32-bit Code-Segment and jump to 32-bit code. + ljmpl $0x08, $rom32_start diff --git a/src/asm/rom32.s b/src/asm/rom32.s new file mode 100644 index 00000000..ea3f14ea --- /dev/null +++ b/src/asm/rom32.s @@ -0,0 +1,38 @@ +.section .rom32, "ax" +.code32 + +.align 16 +rom32_start: + # Now that we are in 32-bit mode, setup all the Data-Segments to be 32-bit. + movw $0x10, %ax + movw %ax, %ds + movw %ax, %es + movw %ax, %ss + movw %ax, %fs + movw %ax, %gs + + # Needed for the REP instructions below + cld + +copy_rom_to_ram: + # This is equivalent to: memcpy(data_start, rom_data_start, data_size) + movl $rom_data_start, %esi + movl $data_start, %edi + movl $data_size, %ecx + rep movsb (%esi), (%edi) + +zero_bss_in_ram: + # This is equivalent to: memset(bss_start, 0, bss_size) + xorb %al, %al + movl $bss_start, %edi + movl $bss_size, %ecx + rep stosb %al, (%edi) + +jump_to_ram: + # Zero out %ebx, as we don't have a PVH StartInfo struct. + xorl %ebx, %ebx + + # Jumping all that way from ROM (~4 GiB) to RAM (~1 MiB) is too far for a + # 32-bit relative jump, so we use a 32-bit absolute jump. + movl $ram32_start, %eax + jmp *%eax diff --git a/src/gdt.rs b/src/gdt.rs index 5b64713d..669889b7 100644 --- a/src/gdt.rs +++ b/src/gdt.rs @@ -26,6 +26,14 @@ bitflags::bitflags! { const COMMON = Self::ACCESSED.bits | Self::USER_SEGMENT.bits | Self::PRESENT.bits; // BIT32 must be 0, all other bits (not yet mentioned) are ignored. const CODE64 = Self::COMMON.bits | Self::EXECUTABLE.bits | Self::BIT64.bits; + + // All 32-bit segments have base = 0, limit = 4G = (0xF_FFFF + 1)*4K + const MAX_LIMIT = Self::LIMIT_0_15.bits | Self::LIMIT_16_19.bits | Self::GRANULARITY.bits; + const COMMON32 = Self::COMMON.bits | Self::MAX_LIMIT.bits | Self::BIT32.bits; + // We set READABLE because the ROM code reads data via cs. + const CODE32 = Self::COMMON32.bits | Self::READABLE.bits | Self::EXECUTABLE.bits; + // We set WRITABLE so the ROM code can write ROM data into RAM. + const DATA32 = Self::COMMON32.bits | Self::WRITABLE.bits; } } @@ -51,3 +59,11 @@ impl Pointer { #[no_mangle] static GDT64_PTR: Pointer = Pointer::new(&GDT64); static GDT64: [Descriptor; 2] = [Descriptor::empty(), Descriptor::CODE64]; + +// Our 32-bit GDT lives in ROM, so it can be directly used by the ROM code. We +// should never reference or access this GDT when we are running in RAM. +#[no_mangle] +#[link_section = ".gdt32"] +static GDT32_PTR: Pointer = Pointer::new(&GDT32); +#[link_section = ".gdt32"] +static GDT32: [Descriptor; 3] = [Descriptor::empty(), Descriptor::CODE32, Descriptor::DATA32];