some major architecture changes

This commit is contained in:
andromeda
2026-03-12 23:03:29 +01:00
parent bc19d760b9
commit 0ee8ff7914
3 changed files with 459 additions and 655 deletions

View File

@@ -12,6 +12,70 @@ I want to compile Bootler and Twasm with the Twasm assembler
- [opcodes,ModR/M,SIB](http://ref.x86asm.net/coder64.html) (no secure site available)
- [calling conventions](https://wiki.osdev.org/Calling_Conventions); I try to use System V
### tokeniser
whitespace is ignored for the sake of readability; it can go between pretty much anything
```
------------------------
tokeniser
------------------------
byte(s) -> next byte(s)
------------------------
Newline -> Newline
-> Komment
-> Operator
-> Directive
Komment -> Newline
Operator -> Newline
-> Komment
-> Operand
Operand -> Newline
-> Komment
-> Comma
Comma -> Operand
Directive -> Newline
-> Komment
-> Operator
------------------------
```
not yet implemented:
```
------------------------
operand parser
------------------------
byte(s) -> next byte(s)
------------------------
START -> '['
-> Register
-> Constant
'[' -> Register
-> Constant
']' -> END
Register -> IF #[, ']'
-> Operator
Constant -> IF #[, ']'
-> Operator
Operator -> IF NOT #R, Register
-> Constant
------------------------
:R: = whether a register has been found
:[: = whether a '[' has been found
------------------------
```
### memory map
```
@@ -50,15 +114,15 @@ each token gets loaded into the token table with the following form:
### internal data structures
#### `tokens.by_nameX`
#### `tokens.[operators|registers]`
contains all tokens of that length followed by their ID. For some non-empty `tokens.by_nameX`, it is true that `tokens.by_name<X+1> - tokens.by_nameX` is the size in bytes of `tokens.by_nameX`.
contains tokens by their type. Intended to be searched by token name to get the token's ID.
each entry is in the following form:
```
+----------+--------------------------------+
|[2 bytes] | 8 * token_length - 1 0 |
| 47 32 | 31 0 |
+----------+--------------------------------+
| token ID | string without null terminator |
+----------+--------------------------------+
@@ -68,19 +132,16 @@ each entry is in the following form:
example implementation:
```nasm
tokens:
.by_name1:
db "+"
dw 0x0062
db "-"
dw 0x0063
.by_name2:
db "r8"
tokens
.registers:
dd "r8"
dw 0x0008
.by_name3: ; this is required for futureproofness; the caller can use this to
; find the size of tokens.by_name2
; find the size of registers.by_name2
```
note that tokens longer than 4 bytes are problematic :/
#### `tokens.by_id`
contains some tokens with their metadata. Some tokens have embedded information (`0x10XX` for instance). Those will not have entries in this table, being handled instead inside the assemble function itself.

View File

@@ -22,6 +22,14 @@ UNRECOGNISED_ID_OPCODE equ 0x90 ; opcode of an unrecognised id (NOP)
TEST_LINE_LENGTH equ 80 ; right border of test suite results
; flags for expected values in tokeniser
E_COMMENT equ 1 << 0
E_NEWLINE equ 1 << 1
E_WHITESPACE equ 1 << 2
E_COMMA equ 1 << 3
E_OPERATOR equ 1 << 4
E_OPERAND equ 1 << 5
[bits 64]
[org LOAD_ADDR]
[default abs] ; TODO see if I actually need to do this
@@ -455,256 +463,201 @@ get_reg_bits:
; ------------------------------------------------------------------------------
tokenise:
add rsi, rdi ; last byte of program
xor ecx, ecx ; number of tokens processed
; rdi -> current byte of program
add rsi, rdi ; rsi -> last byte of program
xor eax, eax ; rax = number of tokens processed
xor edx, edx ; dl = current byte of program
.loop:
cmp rdi, rsi ; if current byte greater than last byte
jg .break ; then break
push rdi
push rsi
push rcx
mov dl, [rdi] ; dl = current byte
; rdi -> current byte
call identify_next_token
; ax = id of token
; dx = length of token
cmp dl, ";" ; if current byte is the start of a comment
je .comment ; then handle the comment
pop rcx
pop rsi
pop rdi
cmp dl, 0x0A ; if current byte is the end of a line
je .newline_mk_flags ; then reset relevant flags
; deal with terminator character (reported as 0 length token)
cmp rdx, 0
je .token_length0
jne .continue0
.token_length0:
mov ax, 0xFE00 ; terminator character
mov al, [rdi] ; byte of terminator
mov edx, 1 ; byte length is 1
.continue0:
add rdi, rdx ; current byte + length of token = next unread byte
mov [TOKEN_TABLE_ADDR + rcx * TOKEN_TABLE_ENTRY_SIZE], ax ; fill next entry
; in token table
; TODO fix undefined behaviour when open brackets and closed brackets aren't
; correctly paired or have too much distance between them
cmp ax, 0x0051 ; check if read token is an open bracket
je .open_bracket ; if so, handle it
jne .continue_open_bracket ; if not, continue
.open_bracket:
; TODO make brackets able to hold more
mov [.data_open_bracket], cl ; record which entry the open bracket is at
.continue_open_bracket:
cmp ax, 0x0052 ; check if read token is a closing bracket
je .close_bracket ; if so, handle it
jne .continue_close_bracket ; if not, continue
.close_bracket:
; rewrite open bracket token entry with a filled out one
push rcx
mov dl, [.data_open_bracket]
sub cl, dl
mov byte [TOKEN_TABLE_ADDR + rdx * TOKEN_TABLE_ENTRY_SIZE], cl
mov byte [1 + TOKEN_TABLE_ADDR + rdx * TOKEN_TABLE_ENTRY_SIZE], 0x10
pop rcx
.continue_close_bracket:
inc rcx ; +1 token processed
jmp .loop
.break:
mov rax, rcx
ret
.data_open_bracket db 0x00 ; represents the token # of the latest open bracket
; ------------------------------------------------------------------------------
; identify_token
;
; description:
; returns the id of a given token. If there are multiple ways to represent a
; given token, like the open-bracket, it returns the one that doesn't require
; information about the surrounding tokens, because it has no such information.
; In other words, if it isn't in the `tokens` data structure, this function
; doesn't see it. If the first byte of the token points to a terminator
; byte, this function returns it as an unrecognised token.
;
; parameters:
; rdi -> first byte of token
; rsi = size of token in bytes
;
; returned:
; ax = id of token; the rest of rax is zeroed
; ------------------------------------------------------------------------------
identify_token:
cmp rsi, 1 ; if the token has length 1
je .start_length1 ; then enter the length 1 loop
cmp rsi, 2 ; if the token has length 2
je .start_length2 ; then enter the length 2 loop
cmp rsi, 3 ; if the token has length 3
je .start_length3 ; then enter the length 3 loop
cmp rsi, 4 ; if the token has length 4
je .start_length4 ; then enter the length 4 loop
jmp .unrecognised ; else unrecognised
; length1
.start_length1:
mov rcx, tokens.by_name_1 ; rcx -> list of known tokens
.loop_length1:
cmp rcx, tokens.by_name_2 ; check if rcx still in the bounds of length1 tokens
jge .unrecognised ; if not, unrecognised
mov r10b, [rcx] ; known token
mov r11b, [rdi] ; token
cmp r10b, r11b ; if known token matches token
je .found_length1 ; exit loop
add rcx, 3 ; length of token + length of id
jmp .loop_length1
.found_length1:
xor eax, eax ; make sure rest of rax is zeroed
mov ax, [rcx + 1] ; return id of token
ret
; length2
.start_length2:
mov rcx, tokens.by_name_2 ; rcx -> list of known tokens
.loop_length2:
cmp rcx, tokens.by_name_3 ; check if rcx still in the bounds of length2 tokens
jge .unrecognised ; if not, unrecognised
mov r10w, [rcx] ; current entry in known tokens
mov r11w, [rdi] ; token
cmp r10w, r11w ; if current entry matches token,
je .found_length2 ; exit loop
add rcx, 4 ; length of token + length of id
jmp .loop_length2
.found_length2:
xor eax, eax ; make sure rest of rax is zeroed
mov ax, [rcx + 2] ; return id of token
ret
; length3
.start_length3:
mov rcx, tokens.by_name_3 ; rcx -> list of known tokens
.loop_length3:
cmp rcx, tokens.by_name_4 ; check if rcx still in bounds of length3 tokens
jge .unrecognised ; if not, unrecognised
; TODO make this safe (it overreaches 1 byte)
mov r10d, [rcx] ; known token + next byte
mov r11d, [rdi] ; token + next byte
and r10d, 0x00FFFFFF ; mask for just the token
and r11d, 0x00FFFFFF
cmp r10d, r11d ; if known token matches token,
je .found_length3 ; exit loop
add rcx, 5 ; length of token + length of id
jmp .loop_length3
.found_length3:
xor rax, rax ; zero rax
mov ax, [rcx + 3] ; return id of token
ret
; length4
.start_length4:
mov rcx, tokens.by_name_4 ; rcx -> list of known tokens
.loop_length4:
cmp rcx, tokens.by_name_5 ; check if rcx still in bounds of length3 tokens
jge .unrecognised ; if not, unrecognised
mov r10d, [rcx] ; known token
mov r11d, [rdi] ; token
cmp r10d, r11d ; if known token matches token,
je .found_length4 ; exit loop
add rcx, 6 ; length of token + length of id
jmp .loop_length4
.found_length4:
xor rax, rax ; zero rax
mov ax, [rcx + 4] ; return id of token
ret
.unrecognised:
xor eax, eax
mov ax, UNRECOGNISED_TOKEN_ID
ret
; ------------------------------------------------------------------------------
; identify_next_token
; description:
; like identify_token, except it automatically finds the length. If the first
; byte of the token points to a terminator byte, it returns a length of 0.
;
; parameters:
; rdi -> first byte of token
;
; returned:
; ax = id of token; the rest of rax is zeroed
; dx = length of token in bytes; the rest of rdx is zeroed
; ------------------------------------------------------------------------------
identify_next_token:
push rdi
mov rsi, rdi ; rsi is the current byte
xor rdi, rdi ; rdi is the length
.loop:
xor edx, edx
mov dl, [rsi]
cmp dl, "," ; if current byte is a comma
je .comma ; then handle the comma
push rsi
push rdi
push rax
push rdx
mov rdi, 8 ; length of terminator list
mov rsi, token_terminator_8 ; start of terminator list
mov rsi, whitespace_2 ; rsi -> list of whitespace (ignored) bytes
mov rdi, 2 ; rdi = size of list in bytes
; dl = current byte
call elemb
; al = 0 if not whitespace, 1 if whitespace
cmp al, 1 ; check if current byte is whitespace
pop rdx
pop rax
pop rdi
pop rsi
je .skip_byte_whitespace
cmp rax, 1 ; check if the next character is a token terminator
je .break ; if so, break
test byte [.expecting], E_OPERATOR ; check if an operator is expected
jnz .operator ; if so, handle it
jmp .operand ; otherwise, handle as an operand
inc rdi ; next character
inc rsi ; next byte of token
.comment:
push rsi
mov rsi, .found
call print.debug
mov rsi, .msg_comment
call print
pop rsi
test byte [.expecting], E_COMMENT ; make sure a comment is expected
jz .unexpected_comment ; if not, error
.comment_loop:
; TODO range check rdi
mov dl, [rdi] ; dl = current byte
cmp dl, 0x0A ; if current byte is a newline
je .comment_break ; then break
inc rdi ; point to next unread byte
jmp .comment_loop
.comment_break:
jmp .loop
.skip_byte_whitespace:
push rsi
mov rsi, .found
call print.debug
mov rsi, .msg_whitespace
call print
pop rsi
test byte [.expecting], E_WHITESPACE ; make sure a whitespace was expected
jz .unexpected_whitespace ; if not, error
inc rdi
jmp .loop ; else, loop
.comma: ; found comma
push rsi
mov rsi, .found
call print.debug
mov rsi, .msg_comma
call print
pop rsi
test byte [.expecting], E_COMMA ; make sure a comma was expected
jz .unexpected_comma ; if not, error
inc rdi
mov [.expecting], E_WHITESPACE | E_OPERAND ; else, make operand expected
jmp .loop ; and loop
.newline_mk_flags:
push rsi
mov rsi, .found
call print.debug
mov rsi, .msg_newline
call print
pop rsi
test byte [.expecting], E_NEWLINE ; make sure a newline was expected
jz .unexpected_newline ; if not, error
mov byte [.expecting], E_COMMENT | E_NEWLINE | E_WHITESPACE | E_OPERATOR
inc rdi
jmp .loop
.operator:
push rsi
mov rsi, .found
call print.debug
mov rsi, .msg_operator
call print
pop rsi
.operator_loop:
mov dl, [rdi] ; next byte
cmp dl, " "
je .operator_break
cmp dl, 0x0A
je .operator_break
cmp dl, ";"
je .operator_break
inc rdi ; inc byte counter
jmp .operator_loop ; and loop
.operator_break:
mov byte [.expecting], E_COMMENT | E_NEWLINE | E_WHITESPACE | E_OPERAND
jmp .loop
.operand:
push rsi
mov rsi, .found
call print.debug
mov rsi, .msg_operand
call print
pop rsi
test byte [.expecting], E_OPERAND ; make sure an operand was expected
jz .unexpected_operand ; if not, error
.operand_loop:
mov dl, [rdi]
cmp dl, ","
je .operand_break
cmp dl, 0x0A
je .operand_break
cmp dl, 0x00
je .operand_break
inc rdi
jmp .operand_loop
.operand_break:
mov byte [.expecting], E_COMMENT | E_NEWLINE | E_WHITESPACE | E_COMMA
jmp .loop
.break:
mov rsi, rdi ; length of token
ret
pop rdi
; state
push rsi
call identify_token
pop rsi
mov rdx, rsi ; length
ret
.expecting db E_COMMENT | E_NEWLINE | E_WHITESPACE | E_OPERATOR
.unexpected_whitespace:
mov rsi, .err_unexpected
call print.error
mov rsi, .msg_whitespace
call print
jmp halt
.unexpected_comment:
mov rsi, .err_unexpected
call print.error
mov rsi, .msg_comment
call print
jmp halt
.unexpected_newline:
mov rsi, .err_unexpected
call print.error
mov rsi, .msg_newline
call print
jmp halt
.unexpected_comma:
mov rsi, .err_unexpected
call print.error
mov rsi, .msg_comma
call print
jmp halt
.unexpected_operand:
mov rsi, .err_unexpected
call print.error
mov rsi, .msg_operand
call print
jmp halt
.err_unexpected db "unexpected ", 0x00
.found db "found ", 0x00
.msg_whitespace db "whitespace.", 0x0A, 0x00
.msg_comment db "comment.", 0x0A, 0x00
.msg_newline db "newline.", 0x0A, 0x00
.msg_comma db "comma.", 0x0A, 0x00
.msg_operator db "operator.", 0x0A, 0x00
.msg_operand db "operand.", 0x0A, 0x00
; ------------------------------------------------------------------------------
; utilities
@@ -789,7 +742,7 @@ halt:
; elemb
;
; description:
; checks if given byte is element of the specified list
; checks if given byte is element of the specified list.
;
; parameters:
; rdi = size of list
@@ -872,216 +825,9 @@ clear_output_arena:
; data
; ------------------------------------------------------------------------------
tokens:
.by_name_1:
db "["
dw 0x0051
db "]"
dw 0x0052
db "+"
dw 0x0062
db "-"
dw 0x0063
db "*"
dw 0x0064
db "/"
dw 0x0065
.by_name_2:
db "r8"
dw 0x0008
db "r9"
dw 0x0009
db "ax"
dw 0x0020
db "bx"
dw 0x0021
db "cx"
dw 0x0022
db "dx"
dw 0x0023
db "si"
dw 0x0024
db "di"
dw 0x0025
db "sp"
dw 0x0026
db "bp"
dw 0x0027
db "al"
dw 0x0030
db "bl"
dw 0x0031
db "cl"
dw 0x0032
db "dl"
dw 0x0033
db "ah"
dw 0x0040
db "bh"
dw 0x0041
db "ch"
dw 0x0042
db "dh"
dw 0x0043
db "cs"
dw 0x0044
db "ds"
dw 0x0045
db "es"
dw 0x0046
db "fs"
dw 0x0047
db "gs"
dw 0x0048
db "ss"
dw 0x0049
db "je"
dw 0x005C
db "jg"
dw 0x005F
db "jl"
dw 0x0061
.by_name_3:
db "rax"
dw 0x0000
db "rbx"
dw 0x0001
db "rcx"
dw 0x0002
db "rdx"
dw 0x0003
db "rsi"
dw 0x0004
db "rdi"
dw 0x0005
db "rsp"
dw 0x0006
db "rbp"
dw 0x0007
db "r10"
dw 0x000A
db "r11"
dw 0x000B
db "r12"
dw 0x000C
db "r13"
dw 0x000D
db "r14"
dw 0x000E
db "r15"
dw 0x000F
db "eax"
dw 0x0010
db "ebx"
dw 0x0011
db "ecx"
dw 0x0012
db "edx"
dw 0x0013
db "esi"
dw 0x0014
db "edi"
dw 0x0015
db "esp"
dw 0x0016
db "ebp"
dw 0x0017
db "r8d"
dw 0x0018
db "r9d"
dw 0x0019
db "r8w"
dw 0x0028
db "r9w"
dw 0x0029
db "sil"
dw 0x0034
db "dil"
dw 0x0035
db "spl"
dw 0x0036
db "bpl"
dw 0x0037
db "r8b"
dw 0x0038
db "r9b"
dw 0x0039
db "cr0"
dw 0x004A
db "cr2"
dw 0x004B
db "cr3"
dw 0x004C
db "cr4"
dw 0x004D
db "cr8"
dw 0x004E
db "hlt"
dw 0x004F
db "xor"
dw 0x0053
db "inc"
dw 0x0054
db "dec"
dw 0x0055
db "mov"
dw 0x0056
db "add"
dw 0x0057
db "sub"
dw 0x0058
db "ret"
dw 0x005A
db "cmp"
dw 0x005B
db "jne"
dw 0x005D
db "jge"
dw 0x005E
db "jle"
dw 0x0060
.by_name_4:
db "r10d"
dw 0x001A
db "r11d"
dw 0x001B
db "r12d"
dw 0x001C
db "r13d"
dw 0x001D
db "r14d"
dw 0x001E
db "r15d"
dw 0x001F
db "r10w"
dw 0x002A
db "r11w"
dw 0x002B
db "r12w"
dw 0x002C
db "r13w"
dw 0x002D
db "r14w"
dw 0x002E
db "r15w"
dw 0x002F
db "r10b"
dw 0x003A
db "r11b"
dw 0x003B
db "r12b"
dw 0x003C
db "r13b"
dw 0x003D
db "r14b"
dw 0x003E
db "r15b"
dw 0x003F
db "int3"
dw 0x0050
db "call"
dw 0x0059
.by_name_5:
.by_id:
dw 0x0010 ; eax
db 0x02 ; type: register
@@ -1114,6 +860,202 @@ tokens:
db 0x01 ; type: operator
db 0x00 ; # operands
.by_id_end:
.operators:
dd "je"
dw 0x005C
dd "jg"
dw 0x005F
dd "jl"
dw 0x0061
dd "hlt"
dw 0x004F
dd "xor"
dw 0x0053
dd "inc"
dw 0x0054
dd "dec"
dw 0x0055
dd "mov"
dw 0x0056
dd "add"
dw 0x0057
dd "sub"
dw 0x0058
dd "ret"
dw 0x005A
dd "cmp"
dw 0x005B
dd "jne"
dw 0x005D
dd "jge"
dw 0x005E
dd "jle"
dw 0x0060
dd "int3"
dw 0x0050
dd "call"
dw 0x0059
.operators_end:
.registers:
dd "r8"
dw 0x0008
dd "r9"
dw 0x0009
dd "ax"
dw 0x0020
dd "bx"
dw 0x0021
dd "cx"
dw 0x0022
dd "dx"
dw 0x0023
dd "si"
dw 0x0024
dd "di"
dw 0x0025
dd "sp"
dw 0x0026
dd "bp"
dw 0x0027
dd "al"
dw 0x0030
dd "bl"
dw 0x0031
dd "cl"
dw 0x0032
dd "dl"
dw 0x0033
dd "ah"
dw 0x0040
dd "bh"
dw 0x0041
dd "ch"
dw 0x0042
dd "dh"
dw 0x0043
dd "cs"
dw 0x0044
dd "ds"
dw 0x0045
dd "es"
dw 0x0046
dd "fs"
dw 0x0047
dd "gs"
dw 0x0048
dd "ss"
dw 0x0049
dd "rax"
dw 0x0000
dd "rbx"
dw 0x0001
dd "rcx"
dw 0x0002
dd "rdx"
dw 0x0003
dd "rsi"
dw 0x0004
dd "rdi"
dw 0x0005
dd "rsp"
dw 0x0006
dd "rbp"
dw 0x0007
dd "r10"
dw 0x000A
dd "r11"
dw 0x000B
dd "r12"
dw 0x000C
dd "r13"
dw 0x000D
dd "r14"
dw 0x000E
dd "r15"
dw 0x000F
dd "eax"
dw 0x0010
dd "ebx"
dw 0x0011
dd "ecx"
dw 0x0012
dd "edx"
dw 0x0013
dd "esi"
dw 0x0014
dd "edi"
dw 0x0015
dd "esp"
dw 0x0016
dd "ebp"
dw 0x0017
dd "r8d"
dw 0x0018
dd "r9d"
dw 0x0019
dd "r8w"
dw 0x0028
dd "r9w"
dw 0x0029
dd "sil"
dw 0x0034
dd "dil"
dw 0x0035
dd "spl"
dw 0x0036
dd "bpl"
dw 0x0037
dd "r8b"
dw 0x0038
dd "r9b"
dw 0x0039
dd "cr0"
dw 0x004A
dd "cr2"
dw 0x004B
dd "cr3"
dw 0x004C
dd "cr4"
dw 0x004D
dd "cr8"
dw 0x004E
dd "r10d"
dw 0x001A
dd "r11d"
dw 0x001B
dd "r12d"
dw 0x001C
dd "r13d"
dw 0x001D
dd "r14d"
dw 0x001E
dd "r15d"
dw 0x001F
dd "r10w"
dw 0x002A
dd "r11w"
dw 0x002B
dd "r12w"
dw 0x002C
dd "r13w"
dw 0x002D
dd "r14w"
dw 0x002E
dd "r15w"
dw 0x002F
dd "r10b"
dw 0x003A
dd "r11b"
dw 0x003B
dd "r12b"
dw 0x003C
dd "r13b"
dw 0x003D
dd "r14b"
dw 0x003E
dd "r15b"
dw 0x003F
.registers_end:
opcodes:
.by_id:
@@ -1139,11 +1081,14 @@ msg_halt db "halted.", 0x0A, 0x00
token_terminator_8 db 0x00, " ", 0x0A, 0x0D, ",", 0x00, 0x00, 0x00
whitespace_2 db " ", 0x0D
; test program
program:
db "xor eax, eax", 0x0A
db "inc rax", 0x0A
db "inc rax ; inline comment", 0x0A
db "; one line comment", 0x0A
db "mov [ rax ], rdx", 0x0A
db "hlt", 0x0A
db "hlt"
db 0x00 ; just for the sake of being able to print it, I made it a string
.size db $ - program - 1
.size db $ - program

View File

@@ -16,12 +16,6 @@ run_tests:
call clear_test_arena
call test_elemb
call clear_test_arena
call test_identify_token
call clear_test_arena
call test_identify_next_token
call clear_test_arena
call test_get_tte_type
@@ -101,202 +95,6 @@ test_elemb:
ret
.msg db "test_elemb...", 0x00
; ------------------------------------------------------------------------------
; test_identify_token
;
; description:
; tests identify_token described functionality
; ------------------------------------------------------------------------------
test_identify_token:
mov rsi, .msg
call print.test
; length1 token that exists
mov byte [TEST_ARENA_ADDR], "*"
mov rdi, TEST_ARENA_ADDR
mov rsi, 1
call identify_token
cmp ax, 0x0064
jne .fail
; length1 token that doesn't exist
mov byte [TEST_ARENA_ADDR], " "
mov rdi, TEST_ARENA_ADDR
mov rsi, 1
call identify_token
cmp ax, 0xFFFF
jne .fail
; length2 token that exists
mov word [TEST_ARENA_ADDR], "sp"
mov rdi, TEST_ARENA_ADDR
mov rsi, 2
call identify_token
cmp ax, 0x0026
jne .fail
; length2 token that doesn't exist
mov word [TEST_ARENA_ADDR], "QQ"
mov rdi, TEST_ARENA_ADDR
mov rsi, 2
call identify_token
cmp ax, 0xFFFF
jne .fail
; length3 token that exists
mov dword [TEST_ARENA_ADDR], "rax"
mov rdi, TEST_ARENA_ADDR
mov rsi, 3
call identify_token
cmp ax, 0x0000
jne .fail
; length3 token that exists
mov dword [TEST_ARENA_ADDR], "cr0"
mov rdi, TEST_ARENA_ADDR
mov rsi, 3
call identify_token
cmp ax, 0x004A
jne .fail
; length3 token that doesn't exist
mov dword [TEST_ARENA_ADDR], "r16"
mov rdi, TEST_ARENA_ADDR
mov rsi, 3
call identify_token
cmp ax, 0xFFFF
jne .fail
; length4 token that exists
mov dword [TEST_ARENA_ADDR], "r10d"
mov rdi, TEST_ARENA_ADDR
mov rsi, 4
call identify_token
cmp ax, 0x001A
jne .fail
; length4 token that exists
mov dword [TEST_ARENA_ADDR], "r15b"
mov rdi, TEST_ARENA_ADDR
mov rsi, 4
call identify_token
cmp ax, 0x003F
jne .fail
; length4 token that doesn't exist
mov dword [TEST_ARENA_ADDR], "r15q"
mov rdi, TEST_ARENA_ADDR
mov rsi, 4
call identify_token
cmp ax, 0xFFFF
jne .fail
.pass:
mov rsi, msg_pass
call print
ret
.fail:
mov rsi, msg_fail
call print
ret
.msg db "test_identify_token...", 0x00
; ------------------------------------------------------------------------------
; test_identify_next_token
;
; description:
; tests identify_next_token described functionality
; ------------------------------------------------------------------------------
test_identify_next_token:
mov rsi, .msg
call print.test
; length1 token that exists
mov word [TEST_ARENA_ADDR], "* "
mov rdi, TEST_ARENA_ADDR
call identify_next_token
cmp ax, 0x0064
jne .fail
; length1 token that doesn't exist
mov word [TEST_ARENA_ADDR], " "
mov rdi, TEST_ARENA_ADDR
call identify_next_token
cmp ax, 0xFFFF
jne .fail
; length2 token that exists
mov dword [TEST_ARENA_ADDR], "sp "
mov rdi, TEST_ARENA_ADDR
call identify_next_token
cmp ax, 0x0026
jne .fail
; length2 token that doesn't exist
mov dword [TEST_ARENA_ADDR], "QQ "
mov rdi, TEST_ARENA_ADDR
call identify_next_token
cmp ax, 0xFFFF
jne .fail
; length3 token that exists
mov dword [TEST_ARENA_ADDR], "rax "
mov rdi, TEST_ARENA_ADDR
call identify_next_token
cmp ax, 0x0000
jne .fail
; length3 token that exists
mov dword [TEST_ARENA_ADDR], "cr0 "
mov rdi, TEST_ARENA_ADDR
call identify_next_token
cmp ax, 0x004A
jne .fail
; length3 token that doesn't exist
mov dword [TEST_ARENA_ADDR], "r16 "
mov rdi, TEST_ARENA_ADDR
call identify_next_token
cmp ax, 0xFFFF
jne .fail
; length4 token that exists
mov dword [TEST_ARENA_ADDR], "r10d"
mov byte [TEST_ARENA_ADDR + 4], " "
mov rdi, TEST_ARENA_ADDR
call identify_next_token
cmp ax, 0x001A
jne .fail
; length4 token that exists
mov dword [TEST_ARENA_ADDR], "r15b"
mov byte [TEST_ARENA_ADDR + 4], " "
mov rdi, TEST_ARENA_ADDR
call identify_next_token
cmp ax, 0x003F
jne .fail
; length4 token that doesn't exist
mov dword [TEST_ARENA_ADDR], "r15q"
mov byte [TEST_ARENA_ADDR + 4], " "
mov rdi, TEST_ARENA_ADDR
call identify_next_token
cmp ax, 0xFFFF
jne .fail
.pass:
mov rsi, msg_pass
call print
ret
.fail:
mov rsi, msg_fail
call print
ret
.msg db "test_identify_next_token...", 0x00
; ------------------------------------------------------------------------------
; test_get_tte_type
;