From 0ee8ff7914683f492c04e12bbec2af8cc2bb4125 Mon Sep 17 00:00:00 2001 From: andromeda Date: Thu, 12 Mar 2026 23:03:29 +0100 Subject: [PATCH] some major architecture changes --- twasm/README.md | 85 ++++- twasm/asm/main.asm | 827 +++++++++++++++++++++----------------------- twasm/asm/tests.asm | 202 ----------- 3 files changed, 459 insertions(+), 655 deletions(-) diff --git a/twasm/README.md b/twasm/README.md index d6d2ec5..c2cc263 100644 --- a/twasm/README.md +++ b/twasm/README.md @@ -12,6 +12,70 @@ I want to compile Bootler and Twasm with the Twasm assembler - [opcodes,ModR/M,SIB](http://ref.x86asm.net/coder64.html) (no secure site available) - [calling conventions](https://wiki.osdev.org/Calling_Conventions); I try to use System V +### tokeniser + +whitespace is ignored for the sake of readability; it can go between pretty much anything + +``` +------------------------ +tokeniser +------------------------ +byte(s) -> next byte(s) +------------------------ +Newline -> Newline + -> Komment + -> Operator + -> Directive + +Komment -> Newline + +Operator -> Newline + -> Komment + -> Operand + +Operand -> Newline + -> Komment + -> Comma + +Comma -> Operand + +Directive -> Newline + -> Komment + -> Operator +------------------------ +``` + +not yet implemented: + +``` +------------------------ +operand parser +------------------------ +byte(s) -> next byte(s) +------------------------ +START -> '[' + -> Register + -> Constant + +'[' -> Register + -> Constant + +']' -> END + +Register -> IF #[, ']' + -> Operator + +Constant -> IF #[, ']' + -> Operator + +Operator -> IF NOT #R, Register + -> Constant +------------------------ +:R: = whether a register has been found +:[: = whether a '[' has been found +------------------------ +``` + ### memory map ``` @@ -50,15 +114,15 @@ each token gets loaded into the token table with the following form: ### internal data structures -#### `tokens.by_nameX` +#### `tokens.[operators|registers]` -contains all tokens of that length followed by their ID. For some non-empty `tokens.by_nameX`, it is true that `tokens.by_name - tokens.by_nameX` is the size in bytes of `tokens.by_nameX`. +contains tokens by their type. Intended to be searched by token name to get the token's ID. each entry is in the following form: ``` +----------+--------------------------------+ -|[2 bytes] | 8 * token_length - 1 0 | +| 47 32 | 31 0 | +----------+--------------------------------+ | token ID | string without null terminator | +----------+--------------------------------+ @@ -68,19 +132,16 @@ each entry is in the following form: example implementation: ```nasm -tokens: - .by_name1: - db "+" - dw 0x0062 - db "-" - dw 0x0063 - .by_name2: - db "r8" +tokens + .registers: + dd "r8" dw 0x0008 .by_name3: ; this is required for futureproofness; the caller can use this to - ; find the size of tokens.by_name2 + ; find the size of registers.by_name2 ``` +note that tokens longer than 4 bytes are problematic :/ + #### `tokens.by_id` contains some tokens with their metadata. Some tokens have embedded information (`0x10XX` for instance). Those will not have entries in this table, being handled instead inside the assemble function itself. diff --git a/twasm/asm/main.asm b/twasm/asm/main.asm index b2961b8..22a1084 100644 --- a/twasm/asm/main.asm +++ b/twasm/asm/main.asm @@ -22,6 +22,14 @@ UNRECOGNISED_ID_OPCODE equ 0x90 ; opcode of an unrecognised id (NOP) TEST_LINE_LENGTH equ 80 ; right border of test suite results +; flags for expected values in tokeniser +E_COMMENT equ 1 << 0 +E_NEWLINE equ 1 << 1 +E_WHITESPACE equ 1 << 2 +E_COMMA equ 1 << 3 +E_OPERATOR equ 1 << 4 +E_OPERAND equ 1 << 5 + [bits 64] [org LOAD_ADDR] [default abs] ; TODO see if I actually need to do this @@ -455,256 +463,201 @@ get_reg_bits: ; ------------------------------------------------------------------------------ tokenise: - add rsi, rdi ; last byte of program - xor ecx, ecx ; number of tokens processed + ; rdi -> current byte of program + add rsi, rdi ; rsi -> last byte of program + xor eax, eax ; rax = number of tokens processed + xor edx, edx ; dl = current byte of program + .loop: cmp rdi, rsi ; if current byte greater than last byte jg .break ; then break - push rdi - push rsi - push rcx + mov dl, [rdi] ; dl = current byte - ; rdi -> current byte - call identify_next_token - ; ax = id of token - ; dx = length of token + cmp dl, ";" ; if current byte is the start of a comment + je .comment ; then handle the comment - pop rcx - pop rsi - pop rdi + cmp dl, 0x0A ; if current byte is the end of a line + je .newline_mk_flags ; then reset relevant flags - ; deal with terminator character (reported as 0 length token) - cmp rdx, 0 - je .token_length0 - jne .continue0 - - .token_length0: - mov ax, 0xFE00 ; terminator character - mov al, [rdi] ; byte of terminator - mov edx, 1 ; byte length is 1 - - .continue0: - add rdi, rdx ; current byte + length of token = next unread byte - - mov [TOKEN_TABLE_ADDR + rcx * TOKEN_TABLE_ENTRY_SIZE], ax ; fill next entry - ; in token table - - ; TODO fix undefined behaviour when open brackets and closed brackets aren't - ; correctly paired or have too much distance between them - cmp ax, 0x0051 ; check if read token is an open bracket - je .open_bracket ; if so, handle it - jne .continue_open_bracket ; if not, continue - - .open_bracket: - ; TODO make brackets able to hold more - mov [.data_open_bracket], cl ; record which entry the open bracket is at - - .continue_open_bracket: - cmp ax, 0x0052 ; check if read token is a closing bracket - je .close_bracket ; if so, handle it - jne .continue_close_bracket ; if not, continue - - .close_bracket: - ; rewrite open bracket token entry with a filled out one - push rcx - - mov dl, [.data_open_bracket] - sub cl, dl - mov byte [TOKEN_TABLE_ADDR + rdx * TOKEN_TABLE_ENTRY_SIZE], cl - mov byte [1 + TOKEN_TABLE_ADDR + rdx * TOKEN_TABLE_ENTRY_SIZE], 0x10 - - pop rcx - - .continue_close_bracket: - inc rcx ; +1 token processed - jmp .loop - .break: - mov rax, rcx - ret - - .data_open_bracket db 0x00 ; represents the token # of the latest open bracket - -; ------------------------------------------------------------------------------ -; identify_token -; -; description: -; returns the id of a given token. If there are multiple ways to represent a -; given token, like the open-bracket, it returns the one that doesn't require -; information about the surrounding tokens, because it has no such information. -; In other words, if it isn't in the `tokens` data structure, this function -; doesn't see it. If the first byte of the token points to a terminator -; byte, this function returns it as an unrecognised token. -; -; parameters: -; rdi -> first byte of token -; rsi = size of token in bytes -; -; returned: -; ax = id of token; the rest of rax is zeroed -; ------------------------------------------------------------------------------ - -identify_token: - cmp rsi, 1 ; if the token has length 1 - je .start_length1 ; then enter the length 1 loop - - cmp rsi, 2 ; if the token has length 2 - je .start_length2 ; then enter the length 2 loop - - cmp rsi, 3 ; if the token has length 3 - je .start_length3 ; then enter the length 3 loop - - cmp rsi, 4 ; if the token has length 4 - je .start_length4 ; then enter the length 4 loop - - jmp .unrecognised ; else unrecognised - - ; length1 - .start_length1: - mov rcx, tokens.by_name_1 ; rcx -> list of known tokens - - .loop_length1: - cmp rcx, tokens.by_name_2 ; check if rcx still in the bounds of length1 tokens - jge .unrecognised ; if not, unrecognised - - mov r10b, [rcx] ; known token - mov r11b, [rdi] ; token - cmp r10b, r11b ; if known token matches token - je .found_length1 ; exit loop - - add rcx, 3 ; length of token + length of id - jmp .loop_length1 - - .found_length1: - xor eax, eax ; make sure rest of rax is zeroed - mov ax, [rcx + 1] ; return id of token - ret - - ; length2 - .start_length2: - mov rcx, tokens.by_name_2 ; rcx -> list of known tokens - - .loop_length2: - cmp rcx, tokens.by_name_3 ; check if rcx still in the bounds of length2 tokens - jge .unrecognised ; if not, unrecognised - - mov r10w, [rcx] ; current entry in known tokens - mov r11w, [rdi] ; token - cmp r10w, r11w ; if current entry matches token, - je .found_length2 ; exit loop - - add rcx, 4 ; length of token + length of id - jmp .loop_length2 - - .found_length2: - xor eax, eax ; make sure rest of rax is zeroed - mov ax, [rcx + 2] ; return id of token - ret - - ; length3 - .start_length3: - mov rcx, tokens.by_name_3 ; rcx -> list of known tokens - - .loop_length3: - cmp rcx, tokens.by_name_4 ; check if rcx still in bounds of length3 tokens - jge .unrecognised ; if not, unrecognised - - ; TODO make this safe (it overreaches 1 byte) - mov r10d, [rcx] ; known token + next byte - mov r11d, [rdi] ; token + next byte - - and r10d, 0x00FFFFFF ; mask for just the token - and r11d, 0x00FFFFFF - - cmp r10d, r11d ; if known token matches token, - je .found_length3 ; exit loop - - add rcx, 5 ; length of token + length of id - jmp .loop_length3 - - .found_length3: - xor rax, rax ; zero rax - mov ax, [rcx + 3] ; return id of token - ret - - ; length4 - .start_length4: - mov rcx, tokens.by_name_4 ; rcx -> list of known tokens - - .loop_length4: - cmp rcx, tokens.by_name_5 ; check if rcx still in bounds of length3 tokens - jge .unrecognised ; if not, unrecognised - - mov r10d, [rcx] ; known token - mov r11d, [rdi] ; token - cmp r10d, r11d ; if known token matches token, - je .found_length4 ; exit loop - - add rcx, 6 ; length of token + length of id - jmp .loop_length4 - - .found_length4: - xor rax, rax ; zero rax - mov ax, [rcx + 4] ; return id of token - ret - - .unrecognised: - xor eax, eax - mov ax, UNRECOGNISED_TOKEN_ID - ret - -; ------------------------------------------------------------------------------ -; identify_next_token -; description: -; like identify_token, except it automatically finds the length. If the first -; byte of the token points to a terminator byte, it returns a length of 0. -; -; parameters: -; rdi -> first byte of token -; -; returned: -; ax = id of token; the rest of rax is zeroed -; dx = length of token in bytes; the rest of rdx is zeroed -; ------------------------------------------------------------------------------ - -identify_next_token: - push rdi - - mov rsi, rdi ; rsi is the current byte - xor rdi, rdi ; rdi is the length - .loop: - xor edx, edx - mov dl, [rsi] + cmp dl, "," ; if current byte is a comma + je .comma ; then handle the comma push rsi push rdi + push rax push rdx - - mov rdi, 8 ; length of terminator list - mov rsi, token_terminator_8 ; start of terminator list + mov rsi, whitespace_2 ; rsi -> list of whitespace (ignored) bytes + mov rdi, 2 ; rdi = size of list in bytes + ; dl = current byte call elemb - + ; al = 0 if not whitespace, 1 if whitespace + cmp al, 1 ; check if current byte is whitespace pop rdx + pop rax pop rdi pop rsi + je .skip_byte_whitespace - cmp rax, 1 ; check if the next character is a token terminator - je .break ; if so, break + test byte [.expecting], E_OPERATOR ; check if an operator is expected + jnz .operator ; if so, handle it + jmp .operand ; otherwise, handle as an operand - inc rdi ; next character - inc rsi ; next byte of token + .comment: + push rsi + mov rsi, .found + call print.debug + mov rsi, .msg_comment + call print + pop rsi + test byte [.expecting], E_COMMENT ; make sure a comment is expected + jz .unexpected_comment ; if not, error + .comment_loop: + ; TODO range check rdi + mov dl, [rdi] ; dl = current byte + + cmp dl, 0x0A ; if current byte is a newline + je .comment_break ; then break + + inc rdi ; point to next unread byte + jmp .comment_loop + .comment_break: + jmp .loop + + .skip_byte_whitespace: + push rsi + mov rsi, .found + call print.debug + mov rsi, .msg_whitespace + call print + pop rsi + + test byte [.expecting], E_WHITESPACE ; make sure a whitespace was expected + jz .unexpected_whitespace ; if not, error + inc rdi + jmp .loop ; else, loop + + .comma: ; found comma + push rsi + mov rsi, .found + call print.debug + mov rsi, .msg_comma + call print + pop rsi + + test byte [.expecting], E_COMMA ; make sure a comma was expected + jz .unexpected_comma ; if not, error + inc rdi + mov [.expecting], E_WHITESPACE | E_OPERAND ; else, make operand expected + jmp .loop ; and loop + + .newline_mk_flags: + push rsi + mov rsi, .found + call print.debug + mov rsi, .msg_newline + call print + pop rsi + + test byte [.expecting], E_NEWLINE ; make sure a newline was expected + jz .unexpected_newline ; if not, error + + mov byte [.expecting], E_COMMENT | E_NEWLINE | E_WHITESPACE | E_OPERATOR + + inc rdi + jmp .loop + + .operator: + push rsi + mov rsi, .found + call print.debug + mov rsi, .msg_operator + call print + pop rsi + .operator_loop: + mov dl, [rdi] ; next byte + + cmp dl, " " + je .operator_break + cmp dl, 0x0A + je .operator_break + cmp dl, ";" + je .operator_break + + inc rdi ; inc byte counter + jmp .operator_loop ; and loop + .operator_break: + mov byte [.expecting], E_COMMENT | E_NEWLINE | E_WHITESPACE | E_OPERAND + jmp .loop + + .operand: + push rsi + mov rsi, .found + call print.debug + mov rsi, .msg_operand + call print + pop rsi + test byte [.expecting], E_OPERAND ; make sure an operand was expected + jz .unexpected_operand ; if not, error + .operand_loop: + mov dl, [rdi] + cmp dl, "," + je .operand_break + cmp dl, 0x0A + je .operand_break + cmp dl, 0x00 + je .operand_break + inc rdi + jmp .operand_loop + .operand_break: + mov byte [.expecting], E_COMMENT | E_NEWLINE | E_WHITESPACE | E_COMMA jmp .loop .break: - mov rsi, rdi ; length of token + ret - pop rdi + ; state - push rsi - call identify_token - pop rsi - mov rdx, rsi ; length - ret + .expecting db E_COMMENT | E_NEWLINE | E_WHITESPACE | E_OPERATOR + + .unexpected_whitespace: + mov rsi, .err_unexpected + call print.error + mov rsi, .msg_whitespace + call print + jmp halt + .unexpected_comment: + mov rsi, .err_unexpected + call print.error + mov rsi, .msg_comment + call print + jmp halt + .unexpected_newline: + mov rsi, .err_unexpected + call print.error + mov rsi, .msg_newline + call print + jmp halt + .unexpected_comma: + mov rsi, .err_unexpected + call print.error + mov rsi, .msg_comma + call print + jmp halt + .unexpected_operand: + mov rsi, .err_unexpected + call print.error + mov rsi, .msg_operand + call print + jmp halt + .err_unexpected db "unexpected ", 0x00 + .found db "found ", 0x00 + .msg_whitespace db "whitespace.", 0x0A, 0x00 + .msg_comment db "comment.", 0x0A, 0x00 + .msg_newline db "newline.", 0x0A, 0x00 + .msg_comma db "comma.", 0x0A, 0x00 + .msg_operator db "operator.", 0x0A, 0x00 + .msg_operand db "operand.", 0x0A, 0x00 ; ------------------------------------------------------------------------------ ; utilities @@ -789,7 +742,7 @@ halt: ; elemb ; ; description: -; checks if given byte is element of the specified list +; checks if given byte is element of the specified list. ; ; parameters: ; rdi = size of list @@ -872,216 +825,9 @@ clear_output_arena: ; data ; ------------------------------------------------------------------------------ + + tokens: - .by_name_1: - db "[" - dw 0x0051 - db "]" - dw 0x0052 - db "+" - dw 0x0062 - db "-" - dw 0x0063 - db "*" - dw 0x0064 - db "/" - dw 0x0065 - .by_name_2: - db "r8" - dw 0x0008 - db "r9" - dw 0x0009 - db "ax" - dw 0x0020 - db "bx" - dw 0x0021 - db "cx" - dw 0x0022 - db "dx" - dw 0x0023 - db "si" - dw 0x0024 - db "di" - dw 0x0025 - db "sp" - dw 0x0026 - db "bp" - dw 0x0027 - db "al" - dw 0x0030 - db "bl" - dw 0x0031 - db "cl" - dw 0x0032 - db "dl" - dw 0x0033 - db "ah" - dw 0x0040 - db "bh" - dw 0x0041 - db "ch" - dw 0x0042 - db "dh" - dw 0x0043 - db "cs" - dw 0x0044 - db "ds" - dw 0x0045 - db "es" - dw 0x0046 - db "fs" - dw 0x0047 - db "gs" - dw 0x0048 - db "ss" - dw 0x0049 - db "je" - dw 0x005C - db "jg" - dw 0x005F - db "jl" - dw 0x0061 - .by_name_3: - db "rax" - dw 0x0000 - db "rbx" - dw 0x0001 - db "rcx" - dw 0x0002 - db "rdx" - dw 0x0003 - db "rsi" - dw 0x0004 - db "rdi" - dw 0x0005 - db "rsp" - dw 0x0006 - db "rbp" - dw 0x0007 - db "r10" - dw 0x000A - db "r11" - dw 0x000B - db "r12" - dw 0x000C - db "r13" - dw 0x000D - db "r14" - dw 0x000E - db "r15" - dw 0x000F - db "eax" - dw 0x0010 - db "ebx" - dw 0x0011 - db "ecx" - dw 0x0012 - db "edx" - dw 0x0013 - db "esi" - dw 0x0014 - db "edi" - dw 0x0015 - db "esp" - dw 0x0016 - db "ebp" - dw 0x0017 - db "r8d" - dw 0x0018 - db "r9d" - dw 0x0019 - db "r8w" - dw 0x0028 - db "r9w" - dw 0x0029 - db "sil" - dw 0x0034 - db "dil" - dw 0x0035 - db "spl" - dw 0x0036 - db "bpl" - dw 0x0037 - db "r8b" - dw 0x0038 - db "r9b" - dw 0x0039 - db "cr0" - dw 0x004A - db "cr2" - dw 0x004B - db "cr3" - dw 0x004C - db "cr4" - dw 0x004D - db "cr8" - dw 0x004E - db "hlt" - dw 0x004F - db "xor" - dw 0x0053 - db "inc" - dw 0x0054 - db "dec" - dw 0x0055 - db "mov" - dw 0x0056 - db "add" - dw 0x0057 - db "sub" - dw 0x0058 - db "ret" - dw 0x005A - db "cmp" - dw 0x005B - db "jne" - dw 0x005D - db "jge" - dw 0x005E - db "jle" - dw 0x0060 - .by_name_4: - db "r10d" - dw 0x001A - db "r11d" - dw 0x001B - db "r12d" - dw 0x001C - db "r13d" - dw 0x001D - db "r14d" - dw 0x001E - db "r15d" - dw 0x001F - db "r10w" - dw 0x002A - db "r11w" - dw 0x002B - db "r12w" - dw 0x002C - db "r13w" - dw 0x002D - db "r14w" - dw 0x002E - db "r15w" - dw 0x002F - db "r10b" - dw 0x003A - db "r11b" - dw 0x003B - db "r12b" - dw 0x003C - db "r13b" - dw 0x003D - db "r14b" - dw 0x003E - db "r15b" - dw 0x003F - db "int3" - dw 0x0050 - db "call" - dw 0x0059 - .by_name_5: .by_id: dw 0x0010 ; eax db 0x02 ; type: register @@ -1114,6 +860,202 @@ tokens: db 0x01 ; type: operator db 0x00 ; # operands .by_id_end: + .operators: + dd "je" + dw 0x005C + dd "jg" + dw 0x005F + dd "jl" + dw 0x0061 + dd "hlt" + dw 0x004F + dd "xor" + dw 0x0053 + dd "inc" + dw 0x0054 + dd "dec" + dw 0x0055 + dd "mov" + dw 0x0056 + dd "add" + dw 0x0057 + dd "sub" + dw 0x0058 + dd "ret" + dw 0x005A + dd "cmp" + dw 0x005B + dd "jne" + dw 0x005D + dd "jge" + dw 0x005E + dd "jle" + dw 0x0060 + dd "int3" + dw 0x0050 + dd "call" + dw 0x0059 + .operators_end: + .registers: + dd "r8" + dw 0x0008 + dd "r9" + dw 0x0009 + dd "ax" + dw 0x0020 + dd "bx" + dw 0x0021 + dd "cx" + dw 0x0022 + dd "dx" + dw 0x0023 + dd "si" + dw 0x0024 + dd "di" + dw 0x0025 + dd "sp" + dw 0x0026 + dd "bp" + dw 0x0027 + dd "al" + dw 0x0030 + dd "bl" + dw 0x0031 + dd "cl" + dw 0x0032 + dd "dl" + dw 0x0033 + dd "ah" + dw 0x0040 + dd "bh" + dw 0x0041 + dd "ch" + dw 0x0042 + dd "dh" + dw 0x0043 + dd "cs" + dw 0x0044 + dd "ds" + dw 0x0045 + dd "es" + dw 0x0046 + dd "fs" + dw 0x0047 + dd "gs" + dw 0x0048 + dd "ss" + dw 0x0049 + dd "rax" + dw 0x0000 + dd "rbx" + dw 0x0001 + dd "rcx" + dw 0x0002 + dd "rdx" + dw 0x0003 + dd "rsi" + dw 0x0004 + dd "rdi" + dw 0x0005 + dd "rsp" + dw 0x0006 + dd "rbp" + dw 0x0007 + dd "r10" + dw 0x000A + dd "r11" + dw 0x000B + dd "r12" + dw 0x000C + dd "r13" + dw 0x000D + dd "r14" + dw 0x000E + dd "r15" + dw 0x000F + dd "eax" + dw 0x0010 + dd "ebx" + dw 0x0011 + dd "ecx" + dw 0x0012 + dd "edx" + dw 0x0013 + dd "esi" + dw 0x0014 + dd "edi" + dw 0x0015 + dd "esp" + dw 0x0016 + dd "ebp" + dw 0x0017 + dd "r8d" + dw 0x0018 + dd "r9d" + dw 0x0019 + dd "r8w" + dw 0x0028 + dd "r9w" + dw 0x0029 + dd "sil" + dw 0x0034 + dd "dil" + dw 0x0035 + dd "spl" + dw 0x0036 + dd "bpl" + dw 0x0037 + dd "r8b" + dw 0x0038 + dd "r9b" + dw 0x0039 + dd "cr0" + dw 0x004A + dd "cr2" + dw 0x004B + dd "cr3" + dw 0x004C + dd "cr4" + dw 0x004D + dd "cr8" + dw 0x004E + dd "r10d" + dw 0x001A + dd "r11d" + dw 0x001B + dd "r12d" + dw 0x001C + dd "r13d" + dw 0x001D + dd "r14d" + dw 0x001E + dd "r15d" + dw 0x001F + dd "r10w" + dw 0x002A + dd "r11w" + dw 0x002B + dd "r12w" + dw 0x002C + dd "r13w" + dw 0x002D + dd "r14w" + dw 0x002E + dd "r15w" + dw 0x002F + dd "r10b" + dw 0x003A + dd "r11b" + dw 0x003B + dd "r12b" + dw 0x003C + dd "r13b" + dw 0x003D + dd "r14b" + dw 0x003E + dd "r15b" + dw 0x003F + .registers_end: opcodes: .by_id: @@ -1139,11 +1081,14 @@ msg_halt db "halted.", 0x0A, 0x00 token_terminator_8 db 0x00, " ", 0x0A, 0x0D, ",", 0x00, 0x00, 0x00 +whitespace_2 db " ", 0x0D + ; test program program: db "xor eax, eax", 0x0A - db "inc rax", 0x0A + db "inc rax ; inline comment", 0x0A + db "; one line comment", 0x0A db "mov [ rax ], rdx", 0x0A - db "hlt", 0x0A + db "hlt" db 0x00 ; just for the sake of being able to print it, I made it a string - .size db $ - program - 1 + .size db $ - program diff --git a/twasm/asm/tests.asm b/twasm/asm/tests.asm index fc9fcc7..6fffb84 100644 --- a/twasm/asm/tests.asm +++ b/twasm/asm/tests.asm @@ -16,12 +16,6 @@ run_tests: call clear_test_arena call test_elemb - call clear_test_arena - call test_identify_token - - call clear_test_arena - call test_identify_next_token - call clear_test_arena call test_get_tte_type @@ -101,202 +95,6 @@ test_elemb: ret .msg db "test_elemb...", 0x00 -; ------------------------------------------------------------------------------ -; test_identify_token -; -; description: -; tests identify_token described functionality -; ------------------------------------------------------------------------------ - -test_identify_token: - mov rsi, .msg - call print.test - - ; length1 token that exists - mov byte [TEST_ARENA_ADDR], "*" - mov rdi, TEST_ARENA_ADDR - mov rsi, 1 - call identify_token - cmp ax, 0x0064 - jne .fail - - ; length1 token that doesn't exist - mov byte [TEST_ARENA_ADDR], " " - mov rdi, TEST_ARENA_ADDR - mov rsi, 1 - call identify_token - cmp ax, 0xFFFF - jne .fail - - ; length2 token that exists - mov word [TEST_ARENA_ADDR], "sp" - mov rdi, TEST_ARENA_ADDR - mov rsi, 2 - call identify_token - cmp ax, 0x0026 - jne .fail - - ; length2 token that doesn't exist - mov word [TEST_ARENA_ADDR], "QQ" - mov rdi, TEST_ARENA_ADDR - mov rsi, 2 - call identify_token - cmp ax, 0xFFFF - jne .fail - - ; length3 token that exists - mov dword [TEST_ARENA_ADDR], "rax" - mov rdi, TEST_ARENA_ADDR - mov rsi, 3 - call identify_token - cmp ax, 0x0000 - jne .fail - - ; length3 token that exists - mov dword [TEST_ARENA_ADDR], "cr0" - mov rdi, TEST_ARENA_ADDR - mov rsi, 3 - call identify_token - cmp ax, 0x004A - jne .fail - - ; length3 token that doesn't exist - mov dword [TEST_ARENA_ADDR], "r16" - mov rdi, TEST_ARENA_ADDR - mov rsi, 3 - call identify_token - cmp ax, 0xFFFF - jne .fail - - ; length4 token that exists - mov dword [TEST_ARENA_ADDR], "r10d" - mov rdi, TEST_ARENA_ADDR - mov rsi, 4 - call identify_token - cmp ax, 0x001A - jne .fail - - ; length4 token that exists - mov dword [TEST_ARENA_ADDR], "r15b" - mov rdi, TEST_ARENA_ADDR - mov rsi, 4 - call identify_token - cmp ax, 0x003F - jne .fail - - ; length4 token that doesn't exist - mov dword [TEST_ARENA_ADDR], "r15q" - mov rdi, TEST_ARENA_ADDR - mov rsi, 4 - call identify_token - cmp ax, 0xFFFF - jne .fail - - .pass: - mov rsi, msg_pass - call print - ret - .fail: - mov rsi, msg_fail - call print - ret - .msg db "test_identify_token...", 0x00 - -; ------------------------------------------------------------------------------ -; test_identify_next_token -; -; description: -; tests identify_next_token described functionality -; ------------------------------------------------------------------------------ - -test_identify_next_token: - mov rsi, .msg - call print.test - - ; length1 token that exists - mov word [TEST_ARENA_ADDR], "* " - mov rdi, TEST_ARENA_ADDR - call identify_next_token - cmp ax, 0x0064 - jne .fail - - ; length1 token that doesn't exist - mov word [TEST_ARENA_ADDR], " " - mov rdi, TEST_ARENA_ADDR - call identify_next_token - cmp ax, 0xFFFF - jne .fail - - ; length2 token that exists - mov dword [TEST_ARENA_ADDR], "sp " - mov rdi, TEST_ARENA_ADDR - call identify_next_token - cmp ax, 0x0026 - jne .fail - - ; length2 token that doesn't exist - mov dword [TEST_ARENA_ADDR], "QQ " - mov rdi, TEST_ARENA_ADDR - call identify_next_token - cmp ax, 0xFFFF - jne .fail - - ; length3 token that exists - mov dword [TEST_ARENA_ADDR], "rax " - mov rdi, TEST_ARENA_ADDR - call identify_next_token - cmp ax, 0x0000 - jne .fail - - ; length3 token that exists - mov dword [TEST_ARENA_ADDR], "cr0 " - mov rdi, TEST_ARENA_ADDR - call identify_next_token - cmp ax, 0x004A - jne .fail - - ; length3 token that doesn't exist - mov dword [TEST_ARENA_ADDR], "r16 " - mov rdi, TEST_ARENA_ADDR - call identify_next_token - cmp ax, 0xFFFF - jne .fail - - ; length4 token that exists - mov dword [TEST_ARENA_ADDR], "r10d" - mov byte [TEST_ARENA_ADDR + 4], " " - mov rdi, TEST_ARENA_ADDR - call identify_next_token - cmp ax, 0x001A - jne .fail - - ; length4 token that exists - mov dword [TEST_ARENA_ADDR], "r15b" - mov byte [TEST_ARENA_ADDR + 4], " " - mov rdi, TEST_ARENA_ADDR - call identify_next_token - cmp ax, 0x003F - jne .fail - - ; length4 token that doesn't exist - mov dword [TEST_ARENA_ADDR], "r15q" - mov byte [TEST_ARENA_ADDR + 4], " " - mov rdi, TEST_ARENA_ADDR - call identify_next_token - cmp ax, 0xFFFF - jne .fail - - .pass: - mov rsi, msg_pass - call print - ret - .fail: - mov rsi, msg_fail - call print - ret - .msg db "test_identify_next_token...", 0x00 - - ; ------------------------------------------------------------------------------ ; test_get_tte_type ;