diff --git a/twasm/README.md b/twasm/README.md index 91a7eb3..de73e90 100644 --- a/twasm/README.md +++ b/twasm/README.md @@ -21,8 +21,6 @@ I want to compile Bootler and Twasm with the Twasm assembler | stack (rsp) | +------------------------+ | input | -+------------------------+ <- this is lined up to a sector -| | <- and this is less than a sector +------------------------+ | assembler | +------ 0x00010000 ------+ @@ -37,11 +35,11 @@ each word represents a token on the token table. each token gets loaded into the token table with the following form: ``` -+----------+-----------------------+ -| 31 16 | 15 0 | -+----------+-----------------------+ -| reserved | token id | -+----------+-----------------------+ ++----------+----------+ +| 31 16 | 15 0 | ++----------+----------+ +| reserved | token id | ++----------+----------+ ``` ### token IDs @@ -152,4 +150,5 @@ supported tokens are listed below | - | 0x0063 | | | * | 0x0064 | | | / | 0x0065 | | +| | 0xFEXX | token terminator byte as token, where `XX` is the byte | | | 0xFFFF | unrecognised token | diff --git a/twasm/asm/main.asm b/twasm/asm/main.asm index a3dea27..2e5710c 100644 --- a/twasm/asm/main.asm +++ b/twasm/asm/main.asm @@ -1,11 +1,13 @@ LOAD_ADDR equ 0x00010000 ; address this program is loaded at -TOKEN_TABLE_ADDR equ 0x00060000 ; address the token table is loaded at -TOKEN_TABLE_SIZE equ 0x1000 ; max length of table - TEST_ARENA_ADDR equ 0x00050000 ; address to run tests at TEST_ARENA_SIZE equ 0x1000 ; maximum size tests can use +TOKEN_TABLE_ADDR equ 0x00060000 ; address the token table is loaded at +TOKEN_TABLE_SIZE equ 0x1000 ; max length of table +TOKEN_TABLE_ENTRY_SIZE equ 8 ; size of token table entry; a LOT of things + ; may break if this ever changes + OUTPUT_ADDR equ 0x00070000 ; address of outputed binary OUTPUT_SIZE equ 0x1000 ; max length of outputed binary @@ -13,6 +15,8 @@ STACK_ADDR equ 0x00060000 ; address to put the 64-bit stack at UNRECOGNISED_TOKEN_ID equ 0xFFFF ; id of an unrecognised token +TEST_LINE_LENGTH equ 80 ; right border of test suite results + [bits 64] [org LOAD_ADDR] @@ -26,12 +30,70 @@ start: call clear_token_table + mov rdi, program ; -> program + mov rsi, [program.size] ; = size of program + call tokenise + jmp halt ; ------------------------------------------------------------------------------ ; tokenising ; ------------------------------------------------------------------------------ +; ------------------------------------------------------------------------------ +; tokenise +; TODO write tests +; +; description: +; represents the program at the given address and puts it in the token table +; it's probably desirable to clear the token table before calling this function +; +; parameters: +; rdi -> first byte of program +; rsi = size of program in bytes +; ------------------------------------------------------------------------------ + +tokenise: + add rsi, rdi ; last byte of program + xor rcx, rcx ; number of tokens processed + .loop: + cmp rdi, rsi ; if current byte greater than last byte + jg .break ; then break + + push rdi + push rsi + push rcx + + ; rdi -> current byte + call identify_next_token + ; ax = id of token + ; dx = length of token + + pop rcx + pop rsi + pop rdi + + ; deal with terminator character (reported as 0 length token) + cmp rdx, 0 + je .token_length0 + jne .continue + + .token_length0: + mov ax, 0xFE00 ; terminator character + mov al, [rdi] ; byte of terminator + mov edx, 1 ; byte length is 1 + + .continue: + + add rdi, rdx ; current byte + length of token = next unread byte + + mov [TOKEN_TABLE_ADDR + rcx * TOKEN_TABLE_ENTRY_SIZE], rax ; fill next entry + ; in token table + inc rcx ; +1 token processed + jmp .loop + .break: + ret + ; ------------------------------------------------------------------------------ ; identify_token ; @@ -61,10 +123,10 @@ identify_token: jmp .unrecognised ; else unrecognised - .start_length1 + .start_length1: mov rcx, tokens.length1 ; rcx -> list of known tokens - .loop_length1 + .loop_length1: cmp rcx, tokens.length2 ; check if rcx still in the bounds of length1 tokens jge .unrecognised ; if not, unrecognised @@ -76,12 +138,12 @@ identify_token: add rcx, 3 ; length of token + length of id jmp .loop_length1 - .found_length1 + .found_length1: xor eax, eax ; make sure rest of rax is zeroed mov ax, [rcx + 1] ; return id of token ret - .start_length2 + .start_length2: mov rcx, tokens.length2 ; rcx -> list of known tokens .loop_length2: @@ -151,6 +213,58 @@ identify_token: mov ax, UNRECOGNISED_TOKEN_ID ret +; ------------------------------------------------------------------------------ +; identify_next_token +; description: +; like identify_token, except it automatically finds the length +; +; parameters: +; rdi -> first byte of token +; +; returned: +; ax = id of token; the rest of rax is zeroed +; dx = length of token in bytes; the rest of rdx is zeroed +; ------------------------------------------------------------------------------ + +identify_next_token: + push rdi + + mov rsi, rdi ; rsi is the current byte + xor rdi, rdi ; rdi is the length + .loop: + xor edx, edx + mov dl, [rsi] + + push rsi + push rdi + push rdx + + mov rdi, 8 ; length of terminator list + mov rsi, token_terminator_8 ; start of terminator list + call elemb + + pop rdx + pop rdi + pop rsi + + cmp rax, 1 ; check if the next character is a token terminator + je .break ; if so, break + + inc rdi ; next character + inc rsi ; next byte of token + jmp .loop + + .break: + mov rsi, rdi ; length of token + + pop rdi + + push rsi + call identify_token + pop rsi + mov rdx, rsi ; length + ret + ; ------------------------------------------------------------------------------ ; copy_token ; @@ -168,7 +282,6 @@ identify_token: copy_token: .loop: - mov dl, [rdi] ; move bit to compare to current byte in read buffer push rdi ; push incrementors to call elemb @@ -190,7 +303,6 @@ copy_token: inc rdi ; read pointer inc rsi ; write pointer - jmp .loop .break: @@ -227,6 +339,7 @@ copy_byte: ; ; description: ; prints a null-terminated string +; probably doesn't change any registers for ease of debugging ; ; parameters: ; rsi -> start of null-terminated string @@ -234,6 +347,9 @@ copy_byte: print: push rdx + push rax + push rsi + mov edx, 0x3F8 .loop: mov al, [rsi] @@ -243,6 +359,8 @@ print: inc rsi jmp .loop .done: + pop rsi + pop rax pop rdx ret @@ -276,7 +394,7 @@ halt: ; ------------------------------------------------------------------------------ elemb: - .loop + .loop: cmp rdi, 0 ; check if remaining length 0 je .not_found ; if so, break; dl not an element of list @@ -289,11 +407,11 @@ elemb: jmp .loop - .not_found + .not_found: xor eax, eax ; return 0; dl not an element of list ret - .found + .found: xor eax, eax mov rax, 1 ; return 1; dl an element of list ret @@ -356,6 +474,9 @@ run_tests: call clear_test_arena call test_identify_token + call clear_test_arena + call test_identify_next_token + ret .msg db "running test suite...", 0x0D, 0x0A, 0x00 @@ -609,12 +730,106 @@ test_identify_token: ret .msg db "test_identify_token...", 0x00 +; ------------------------------------------------------------------------------ +; test_identify_next_token +; +; description: +; tests identify_next_token described functionality +; ------------------------------------------------------------------------------ + +test_identify_next_token: + mov rsi, .msg + call print + + ; length1 token that exists + mov word [TEST_ARENA_ADDR], "* " + mov rdi, TEST_ARENA_ADDR + call identify_next_token + cmp ax, 0x0064 + jne .fail + + ; length1 token that doesn't exist + mov word [TEST_ARENA_ADDR], " " + mov rdi, TEST_ARENA_ADDR + call identify_next_token + cmp ax, 0xFFFF + jne .fail + + ; length2 token that exists + mov dword [TEST_ARENA_ADDR], "sp " + mov rdi, TEST_ARENA_ADDR + call identify_next_token + cmp ax, 0x0026 + jne .fail + + ; length2 token that doesn't exist + mov dword [TEST_ARENA_ADDR], "QQ " + mov rdi, TEST_ARENA_ADDR + call identify_next_token + cmp ax, 0xFFFF + jne .fail + + ; length3 token that exists + mov dword [TEST_ARENA_ADDR], "rax " + mov rdi, TEST_ARENA_ADDR + call identify_next_token + cmp ax, 0x0000 + jne .fail + + ; length3 token that exists + mov dword [TEST_ARENA_ADDR], "cr0 " + mov rdi, TEST_ARENA_ADDR + call identify_next_token + cmp ax, 0x004A + jne .fail + + ; length3 token that doesn't exist + mov dword [TEST_ARENA_ADDR], "r16 " + mov rdi, TEST_ARENA_ADDR + call identify_next_token + cmp ax, 0xFFFF + jne .fail + + ; length4 token that exists + mov dword [TEST_ARENA_ADDR], "r10d" + mov byte [TEST_ARENA_ADDR + 4], " " + mov rdi, TEST_ARENA_ADDR + call identify_next_token + cmp ax, 0x001A + jne .fail + + ; length4 token that exists + mov dword [TEST_ARENA_ADDR], "r15b" + mov byte [TEST_ARENA_ADDR + 4], " " + mov rdi, TEST_ARENA_ADDR + call identify_next_token + cmp ax, 0x003F + jne .fail + + ; length4 token that doesn't exist + mov dword [TEST_ARENA_ADDR], "r15q" + mov byte [TEST_ARENA_ADDR + 4], " " + mov rdi, TEST_ARENA_ADDR + call identify_next_token + cmp ax, 0xFFFF + jne .fail + + .pass: + mov rsi, msg_pass + call print + ret + .fail: + mov rsi, msg_fail + call print + ret + .msg db "test_identify_next_token...", 0x00 + ; ------------------------------------------------------------------------------ ; data ; ------------------------------------------------------------------------------ tokens: - .length1 + .length1: db "[" dw 0x0051 db "]" @@ -627,7 +842,7 @@ tokens: dw 0x0064 db "/" dw 0x0065 - .length2 + .length2: db "r8" dw 0x0008 db "r9" @@ -682,7 +897,7 @@ tokens: dw 0x005F db "jl" dw 0x0061 - .length3 + .length3: db "rax" dw 0x0000 db "rbx" @@ -781,7 +996,7 @@ tokens: dw 0x005E db "jle" dw 0x0060 - .length4 + .length4: db "r10d" dw 0x001A db "r11d" @@ -822,20 +1037,35 @@ tokens: dw 0x0050 db "call" dw 0x0059 - .length5 - .end + .length5: + .end: msg_welcome db "Welcome to Twasm", 0x0D, 0x0A, 0x00 msg_halt db "halted.", 0x0D, 0x0A, 0x00 -msg_pass db "passed.", 0x0D, 0x0A, 0x00 -msg_fail db "failed.", 0x0D, 0x0A, 0x00 +msg_pass: + db 0x0D, 0x0A + times (TEST_LINE_LENGTH + .start - .end) db " ", ; right align + .start db "passed." + .end db 0x0D, 0x0A, 0x00 +msg_fail: + db 0x0D, 0x0A + times (TEST_LINE_LENGTH + .start - .end) db " ", + .start db "failed." + .end db 0x0D, 0x0A, 0x00 test_byte db "Q" ; unterminated, just a byte chillin test_token_null db "TestTokn", 0x00 ; followed by null terminator. Quad word test_token_space db "TestTokn " ; followed by space. Quad word -test_elemb_0 ; [This Page Intentionally Left Blank] +test_elemb_0: ; [This Page Intentionally Left Blank] test_elemb_5 db 0x54, 0x00, 0x21, 0x20, 0x34 -token_terminator_8 db 0x00, " ", 0x0A, 0x0D, 0x00, 0x00, 0x00, 0x00 +token_terminator_8 db 0x00, " ", 0x0A, 0x0D, ",", 0x00, 0x00, 0x00 debug_string db "debug_string", 0x0D, 0x0A, 0x00 + +program: + db "xor eax, eax", 0x0D, 0x0A + db "inc rax", 0x0D, 0x0A + db "hlt", 0x0D, 0x0A + db 0x00 + .size db $ - program - 1