diff --git a/.gitignore b/.gitignore index a2b9577..fec9b69 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,5 @@ .vscode/ build/ -test/ # Ignore pesky macOS files .DS_Store diff --git a/CMakeLists.txt b/CMakeLists.txt index 54c3be4..7b260ff 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -6,6 +6,7 @@ set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin) # Source files file(GLOB LIB_SOURCES "src/lib/*.c") +file(GLOB AS_SOURCES "src/as/*.c") file(GLOB EMU_SOURCES "src/emu/*.c") # Library for shared code @@ -13,8 +14,11 @@ add_library(lc3tools ${LIB_SOURCES}) target_include_directories(lc3tools PUBLIC include) # Executables +add_executable(lc3as ${AS_SOURCES}) add_executable(lc3emu ${EMU_SOURCES}) +target_include_directories(lc3as PRIVATE include/as) target_include_directories(lc3emu PRIVATE include/emu) # Link shared code and executables +target_link_libraries(lc3as lc3tools) target_link_libraries(lc3emu lc3tools) diff --git a/README.md b/README.md index 5fcf17d..8726d37 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ An implementation of the LC-3 instruction set architecture and assorted tools. | Name | Status | Notes | | ----------- | ------------- | ------------------------- | | `lc3emu` | In-progress | Emulator/Debugger | -| `lc3as` | Planned | Assembler | +| `lc3as` | In-progress | Assembler | | `lc3disas` | Planned | Disassembler | | `lc3cc` | Planned | C Compiler | diff --git a/include/as/lc3as.h b/include/as/lc3as.h new file mode 100644 index 0000000..38d0e26 --- /dev/null +++ b/include/as/lc3as.h @@ -0,0 +1,6 @@ +#ifndef __LC3AS_H +#define __LC3AS_H + +#define LC3AS_TEST 6969 + +#endif /* __LC3AS_H */ diff --git a/include/lc3tools.h b/include/lc3tools.h index 84f28a4..291049e 100644 --- a/include/lc3tools.h +++ b/include/lc3tools.h @@ -38,6 +38,8 @@ */ #define DEBUG 1 +#define TEST_DIR "../test" + /* * File separator character. */ diff --git a/src/as/README.md b/src/as/README.md new file mode 100644 index 0000000..ecfbe1d --- /dev/null +++ b/src/as/README.md @@ -0,0 +1,123 @@ +# Assembler Information +## Opcodes +The table below lists all opcodes and their accepted operand formats. + +| Opcode Format | Function | RTL | +|-----------------------|---------------------------|-----------------------------------------------| +| `ADD reg,reg,reg` | Signed addition | `reg <- reg + reg; setcc` | +| `ADD reg,reg,imm` | Signed addition | `reg <- reg + imm; setcc` | +| `AND reg,reg,reg` | Bitwise AND | `reg <- reg & reg; setcc` | +| `AND reg,reg,imm` | Bitwise AND | `reg <- reg & imm; setcc` | +| `BR addr` | Branch (unconditional) | `PC <- PC^ + addr` (same as `BRnzp`) | +| `BRn addr` | Branch (negative) | `PC <- PC^ + addr` | +| `BRz addr` | Branch (zero) | `PC <- PC^ + addr` | +| `BRp addr` | Branch (positive) | `PC <- PC^ + addr` | +| `BRnz addr` | Branch (negative,zero) | `PC <- PC^ + addr` | +| `BRnp addr` | Branch (negative,positive)| `PC <- PC^ + addr` | +| `BRzp addr` | Branch (zero,positive) | `PC <- PC^ + addr` | +| `BRnzp addr` | Branch (unconditional) | `PC <- PC^ + addr` | +| `JMP reg` | Jump | `PC <- reg` | +| `JSR addr` | Call subroutine | `R7 <- PC^; PC <- PC^ + (addr << 1)` | +| `JSRR reg` | Call subroutine | `R7 <- PC^; PC <- reg` | +| `LDB reg,reg,addr` | Load byte | `reg <- mem[reg + addr]; setcc` | +| `LDW reg,reg,addr` | Load word | `reg <- mem[reg + (addr << 1)]; setcc` | +| `LDI reg,reg,addr` | Load word (indirect) | `reg <- mem[mem[reg + (addr << 1)]]; setcc` | +| `LEA reg,addr` | Load effective address | `reg <- PC^ + (addr << 1); setcc` | +| `NOT reg,reg` | Bitwise NOT | `reg <- ~reg; setcc` | +| `RET` | Return from subroutine | `PC <- R7` | +| `RTI` | Return from interrupt | TODO (it's quite complex) | +| `LSHF reg,reg,imm` | Left shift | `reg <- reg << imm; setcc` | +| `RSHFL reg,reg,imm` | Right shift (logical) | `reg <- reg >> imm; setcc` | +| `RSHFA reg,reg,imm` | Right shift (arithmetic) | `reg <- reg >> imm; setcc` | +| `STB reg,reg,addr` | Store byte | `mem[reg + addr] <- reg` | +| `STW reg,reg,addr` | Store word | `mem[reg + (addr << 1)] <- reg` | +| `STI reg,reg,addr` | Store word (indirect) | `mem[mem[reg + (addr << 1)]] <- reg` | +| `TRAP addr` | Call trap service routine | `R7 <- PC; PC <- mem[addr << 1]` | +| `XOR reg,reg,reg` | Bitwise XOR | `reg <- reg ^ reg; setcc` | +| `XOR reg,reg,imm` | Bitwise XOR | `reg <- reg ^ imm; setcc` | + +Notes: + * For the store instructions (`STB`, `STW`, `STI`) the first operand is the + register to store while the second operand is the base address register. + * `PC^` refers to the incremented `PC` value (`PC` is incremented by 2 during + each fetch cycle). + * `setcc` means the instruction sets the condition codes (`nzp`) used for + conditional branches. + +## Instruction Format +`LABEL: OPCODE OPERANDS ; COMMENTS` + +### Format Details +`LABEL`: + * Memory address identifier. + * Alphanumeric, case-sensitive, underscore (_) allowed. + * 20 characters maximum. + * Label declaration must be terminated with a colon (:). + * Label declaration can exist on a line of its own or preceding opcode. + +`OPCODE`: + * Instruction mnemonic. + * Case-insensitive. + * Includes LC-3 instructions and assembler pseudo-ops. + * Pseudo-ops preceded by a period (.). + +`OPERANDS`: + * Instruction operands. + * Zero or more required depending on opcode. + * Must appear on same line as opcode. + * Operands separated by a comma (,). + * For multi-operand instructions, format is `dest,srcA,srcB` + * Integer literals can be represented in decimal or hexadecimal. + * Hexadecimal indicated by preceding 'x' or '0x' (case-insensitive). + * Operand types: register, immediate, address. + * Register: + * References a general purpose register. + * Specified by 'R' followed by a digit 0-7 (e.g. `R3`). + * Immediate: + * Constant value embedded into instruction. + * Used for arithmetic operations. + * Must be an integer literal. + * Address: + * References a memory location. + * Can appear as either integer literal or label name (case-sensitive). + * Register-relative, PC-relative, or absolute address embedded into + instruction. + * **Register-relative addressing** creates an absolute address by adding + the embedded value to the value of a general purpose register. + * **PC-relative addressing** creates an absolute address by adding the + embedded value to the current `PC` value. Note that `PC` is + incremented by 2 during each instruction fetch, so the `PC` value at + the time of address creation will point to the start of the *next* + instruction. + * **Absolute addressing uses** the value embedded in the instruction as + an absolute memory address. `TRAP` is the only instruction to use + absolute addressing. + +`COMMENT`: + * Preceded by a semicolon (;). + * Everything following semicolon until end-of-line ignored by the assembler. + +### Format Examples +``` +START: ; label by itself +BEGIN: RET ; label w/ opcode, no operands + + ; operand types + JMP R7 ; register + BR START ; address (label) + BRnz -1 ; address (literal, dec) + TRAP 0x25 ; address (literal, hex) + NOT R0, R6 ; register & register + LEA R0, DATA0 ; register & address (as label) + LEA R1, 60 ; register & address (literal, dec) + LEA R2, 0x3C ; register & address (literal, hex) + LDW R3, R5, DATA1 ; register & register & address (label) + STW R3, R5, 2 ; register & register & address (literal) + ADD R0, R1, R2 ; register & register & register + ADD R3, R4, -2 ; register & register & immediate (dec) + ADD R5, R6, x1E ; register & register & immediate (hex) + ADD R7, R0, START ; NOT ALLOWED! ADD does not take an address + +DATA0: .FILL 0xBEEF ; pseudo-op w/ hex operand +DATA1: .FILL 1337 ; pseudo-op w/ dec operand +``` diff --git a/src/as/main.c b/src/as/main.c new file mode 100644 index 0000000..3533a41 --- /dev/null +++ b/src/as/main.c @@ -0,0 +1,333 @@ +#include +#include +#include +#include + +#include +#include + +#define LINE_BUFFER_SIZE 512 +#define TOKEN_BUFFER_SIZE 64 + +#define ARRLEN(a) (sizeof(a)/sizeof(a[0])) + +enum token_type +{ + T_LABEL, + T_OPCODE, + T_PSEUDO_OP, + T_REGISTER, + T_IMMEDIATE + // TODO: string literal +}; + +struct token +{ + struct token *next; + + int line; + int pos; + int len; + + enum token_type type; + char raw_str[TOKEN_BUFFER_SIZE]; + char cap_str[TOKEN_BUFFER_SIZE]; + int val; +}; + +struct source_file +{ + FILE *file; + char line[LINE_BUFFER_SIZE]; + int line_num; + int line_pos; + int line_len; +}; + +const char * const OPCODES[] = +{ + // NOTE: BR-BRnzp not included in this table + "ADD", "AND", "JMP", "JSR", "JSRR", "LDB", "LDW", "LDI", + "LEA", "NOT", "RET", "RTI", "LSHF", "RSHFL","RSHFA","STB", + "STW", "STI", "TRAP", "XOR", + + "GETC", "HALT", "IN", "OUT", "PUTS", "PUTSP" +}; + +const char * const PSEUDO_OPS[] = +{ + ".BLKW",".END", ".FILL",".ORIG",".STRINGZ" +}; + +static struct token * read_token(struct source_file *src); +static int read_line(struct source_file *src); +static int try_read_constant(const char *s, int base, int *out); +static void s_toupper(char *s); +static int is_delim(char c); +static int is_opcode(const char *tok); + +int main(int argc, char *argv[]) +{ + struct source_file src; + + struct token *tok_list; + struct token *prev; + struct token *curr; + + if (argc < 2) + { + fprintf(stderr, "error: missing source file\n"); + return 1; + } + + memset(&src, 0, sizeof(struct source_file)); + src.file = fopen(argv[1], "rb"); + if (src.file == NULL) + { + fprintf(stderr, "error: failed to open source file\n"); + return 2; + } + + /* build token list */ + prev = NULL; + while ((curr = read_token(&src)) != NULL) + { + if (prev == NULL) + { + tok_list = curr; + prev = tok_list; + continue; + } + + prev->next = curr; + prev = curr; + } + + /* output and free token list */ + printf(" ln:pos\tlen\ttype\t\ttoken\n"); + curr = tok_list; + while (curr != NULL) + { + printf("% 3d:%d\t%d\t", curr->line, curr->pos, curr->len); + switch (curr->type) + { + case T_LABEL: + printf("LABEL\t\t"); + break; + case T_OPCODE: + printf("OPCODE\t\t"); + break; + case T_PSEUDO_OP: + printf("PSEUDO_OP\t"); + break; + case T_REGISTER: + printf("REGISTER\t"); + break; + case T_IMMEDIATE: + printf("IMMEDIATE\t"); + break; + default: + printf("UNKNOWN\t"); + break; + } + + printf("'%s'\n", curr->cap_str); + + prev = curr; + curr = curr->next; + free(prev); + } + + return 0; +} + +static struct token * read_token(struct source_file *src) +{ + struct token *token; /* I feel alright mamma I'm not jokin' */ + char *tok_head; + char *tok_tail; + int tok_seen; + + token = (struct token *) malloc(sizeof(struct token)); + if (token == NULL) + { + return NULL; + } + memset(token, 0, sizeof(struct token)); + + + if (src->line_pos >= src->line_len) + { + next_line: + if (read_line(src) == -1) + { + printf("Hit end of file! (line %d)\n", src->line_num); + free(token); + return NULL; + } + } + +start_over: + tok_seen = 0; + tok_head = &src->line[src->line_pos]; + tok_tail = tok_head; + + /* skip leading whitespace */ + while (isspace(*tok_head)) + { + if (src->line_pos >= src->line_len) + { + goto next_line; + } + + src->line_pos++; + tok_head++; + } + + tok_tail = tok_head; + token->line = src->line_num; + token->pos = src->line_pos + 1; + + /* read-in the token */ + while (!is_delim(*tok_tail)) + { + tok_seen = 1; + tok_tail++; + } + + /* handle special delimiter cases */ + switch (*tok_tail) + { + case ',': + if (!tok_seen) + { + src->line_pos++; + goto start_over; + } + break; + case ';': + case '\0': + goto next_line; + } + + /* extract bookkeeping information */ + *tok_tail = '\0'; + token->len = (tok_tail - tok_head); + src->line_pos += token->len + 1; + strncpy(token->raw_str, tok_head, TOKEN_BUFFER_SIZE); + strncpy(token->cap_str, tok_head, TOKEN_BUFFER_SIZE); + s_toupper(token->cap_str); + + /* determine token type */ + if (is_opcode(token->cap_str)) + { + token->type = T_OPCODE; + return token; + } + switch (toupper(*tok_head)) + { + case '.': + token->type = T_PSEUDO_OP; + break; + case 'R': + token->type = T_REGISTER; + break; + case '#': + case 'X': + token->type = T_IMMEDIATE; + break; + default: + token->type = T_LABEL; + break; + } + + return token; +} + +static int read_line(struct source_file *src) +{ + src->line_num++; + if (feof(src->file) || fgets(src->line, LINE_BUFFER_SIZE, src->file) == NULL) + { + return -1; + } + + src->line_pos = 0; + src->line_len = (int) strlen(src->line); + + return src->line_len; +} + +static int try_read_constant(const char *s, int base, int *out) +{ + char *end; + + *out = strtol(s, &end, base); + if (s == end) + { + return 0; + } + + return 1; +} + +static void s_toupper(char *s) +{ + while (*s != '\0') + { + *s = toupper(*s); + s++; + } +} + +static int is_delim(char c) +{ + switch (c) + { + case ',': + case ';': + case ' ': + case '\t': + case '\r': + case '\n': + case '\0': + return 1; + } + + return 0; +} + +static int is_opcode(const char *tok) +{ + int result; + + /* BR and friends */ + if (strncmp(tok, "BR", 2) == 0) + { + result= 1; + tok += 2; + + while (*tok != '\0') + { + if (*tok != 'N' && *tok != 'Z' && *tok != 'P') + { + result = 0; + break; + } + tok++; + } + + return result; + } + + /* all else */ + for (int i = 0; i < ARRLEN(OPCODES); i++) + { + if (strcmp(tok, OPCODES[i]) == 0) + { + return 1; + } + } + + return 0; +} diff --git a/test/as/factorial.asm b/test/as/factorial.asm new file mode 100644 index 0000000..b5ab795 --- /dev/null +++ b/test/as/factorial.asm @@ -0,0 +1,77 @@ +;=============================================================================== +; FILE: factorial.asm +; AUTHOR: Wes Hampson (whampso2) +; +; DESCRIPTION: +; Iteratively computes n! (n factorial), where n is an integer. +; Written for ECE411 MP0, Spring 2018. +; +; REGISTER DETAIL: +; R0: (constant) 0, used as a base address register +; R1: (constant) -1, used for decrementing loop counters +; R2: (constant) -2, used for setting up initial loop counter value +; R3: accumulator, contains the result of each addition performed +; R4: multiplicand, the number currently being multiplied +; R5: factorial loop counter (outer loop) +; R6: multiply loop counter (inner loop) (i.e., multiplier) +; R7: (unused) +; +; ASSUMPTIONS: +; * R0 contains a value of 0 upon entry. +;=============================================================================== + +.ORIGIN 0x0000 + +; .SEGMENT CodeSegment: ; TODO: support segments? +INIT: + ; Load constants used throughout the program. + LDR R1, R0, NEG_ONE + LDR R2, R0, NEG_TWO + + ; Initialize accumulator and multiplicand. + LDR R3, R0, INPUT ; sum = input, accumulator + LDR R4, R0, INPUT ; amt = input, multiplicand (amount added per iteration) + + ; Handle case for 0! + BRz ZERO_INPUT + + ; Initialize loop counters + ADD R5, R3, R2 ; i = input - 2, subtract 2 because 'sum' initialized to 'input' + ADD R6, R0, R5 ; k = i + +MULTIPLY_LOOP: + ; Computes 'amt*k' and adds the result to 'sum' + BRnz STORE + ADD R3, R3, R4 ; sum = sum + amt + ADD R6, R6, R1 ; k = k - 1 + BRnzp MULTIPLY_LOOP + +STORE: + STR R3, R0, RESULT ; result = sum + +NEXT_FACTORIAL: + ; Decrement loop counters and update multiplcand + LDR R4, R0, RESULT ; amt = sum + ADD R5, R5, R1 ; i = i - 1 + BRnz HALT ; if i < 1; halt + ADD R6, R0, R5 ; k = i + nop + BRnzp MULTIPLY_LOOP + +ZERO_INPUT: + ; Special case for 0!, stores 1 in the result. + LDR R3, R0, POS_ONE + STR R3, R0, RESULT + +HALT: + BRnzp HALT + + +; Data input/output +INPUT: .FILL 5 +RESULT: .FILL 0 + +; Constants +POS_ONE: .FILL 1 +NEG_ONE: .FILL -1 +NEG_TWO: .FILL -2 diff --git a/test/as/halt.asm b/test/as/halt.asm new file mode 100644 index 0000000..c0cb0a4 --- /dev/null +++ b/test/as/halt.asm @@ -0,0 +1,26 @@ +.ORIGIN 0x3000 + +MAIN: + AND R3, R3, R3 + ADD R3, R3, 1 + +HALT: + ; Set up mask + AND R5, R5, 0 + ADD R5, R5, 1 + LSHF R5, R5, 15 + NOT R5, R5 + + ; Load MCR + LEA R6, MCR + LDI R7, R6, 0 + + ; Turn off clock enable bit + AND R7, R7, R5 + STI R7, R6, 0 + +_HALT_LOOP: + BR _HALT_LOOP + +CLOCK_EN: .FILL 0x8000 +MCR: .FILL 0xFFFE diff --git a/test/as/tokenizer2.asm b/test/as/tokenizer2.asm new file mode 100644 index 0000000..6b93bc1 --- /dev/null +++ b/test/as/tokenizer2.asm @@ -0,0 +1,21 @@ +LABEL1 ; label by itself +RTI ; instr by itself +.END ; macro by itself + +JMP R6 ; instr w/ one operand (reg) +BR LABEL1 ; instr w/ one operand (label) +TRAP x25 ; instr w/ one operand (imm) +.FILL LABEL1 ; macro w/ one operand (label) +.FILL xBEEF ; macro w/ one operand (imm) + +ADD R0, R1, R2 ; instr w/ many operands (reg) +ADD R3, R4, #3 ; instr w/ many operands (positive imm) +ADD R3, R4, #-6 ; instr w/ many operands (negative imm) +ADD R5, R6, xE ; instr w/ many operands (hex) +ADD R4,R5,#3 ; instr w/ minimal spacing & positive imm + LEA R7, LABEL ; instr w/ leading space and label operand +ADD R4, R5, #14 ; instr w/ weird spacing (extra whitespace after ,) +ADD R0 , R5 , xA ; instr w/ weird spacing (extra whitespace before ,) +ADD R7 , R0 ,R6 ; instr w/ weird spacing (whitespace mixture) +ADD R4 ,R2 , R1 ; instr w/ weird spacing (whitespace mixture) + ADD R7, R6, R5 ; instr w/ weird spacing (tabs) diff --git a/test/as/tokenizer_basic.asm b/test/as/tokenizer_basic.asm new file mode 100644 index 0000000..c5a628a --- /dev/null +++ b/test/as/tokenizer_basic.asm @@ -0,0 +1,21 @@ +START: ; label by itself +BEGIN: RET ; label w/ opcode, no operands + + ; operand types + JMP R7 ; register + BR START ; address (label) + BRnz -1 ; address (literal, dec) + TRAP 0x25 ; address (literal, hex) + NOT R0, R6 ; register & register + LEA R0, DATA0 ; register & address (as label) + LEA R1, 60 ; register & address (literal, dec) + LEA R2, 0x3C ; register & address (literal, hex) + LDW R3, R5, DATA1 ; register & register & address (label) + STW R3, R5, 2 ; register & register & address (literal) + ADD R0, R1, R2 ; register & register & register + ADD R3, R4, -2 ; register & register & immediate (dec) + ADD R5, R6, x1E ; register & register & immediate (hex) + ADD R7, R0, START ; NOT ALLOWED! ADD does not take an address + +DATA0: .FILL 0xBEEF ; pseudo-op w/ hex operand +DATA1: .FILL 1337 ; pseudo-op w/ dec operand diff --git a/test/emu/factorial_reg.obj b/test/emu/factorial_reg.obj new file mode 100644 index 0000000..3b660f9 Binary files /dev/null and b/test/emu/factorial_reg.obj differ diff --git a/test/emu/foo.asm.obj b/test/emu/foo.asm.obj new file mode 100644 index 0000000..72f6ded Binary files /dev/null and b/test/emu/foo.asm.obj differ diff --git a/tools/emu/dbg.sh b/tools/emu/dbg.sh new file mode 100755 index 0000000..5593818 --- /dev/null +++ b/tools/emu/dbg.sh @@ -0,0 +1,5 @@ +#!/bin/bash + +emu_pid=$(ps -C | awk -F ' ' '{ if ($4 ~ /lc3emu/) print $1; }') +echo $emu_pid +sudo gdb -p $emu_pid diff --git a/tools/emu/rundbg.sh b/tools/emu/rundbg.sh new file mode 100755 index 0000000..8d9da03 --- /dev/null +++ b/tools/emu/rundbg.sh @@ -0,0 +1,7 @@ +#!/bin/bash + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" > /dev/null 2>&1 && pwd)" + +open -a Terminal $SCRIPT_DIR/../../build/bin/lc3emu +ping -c 2 127.0.0.1 # delay to allow Terminal to launch +$SCRIPT_DIR/../test/dbg.sh