some major architecture changes
This commit is contained in:
@@ -12,6 +12,70 @@ I want to compile Bootler and Twasm with the Twasm assembler
|
||||
- [opcodes,ModR/M,SIB](http://ref.x86asm.net/coder64.html) (no secure site available)
|
||||
- [calling conventions](https://wiki.osdev.org/Calling_Conventions); I try to use System V
|
||||
|
||||
### tokeniser
|
||||
|
||||
whitespace is ignored for the sake of readability; it can go between pretty much anything
|
||||
|
||||
```
|
||||
------------------------
|
||||
tokeniser
|
||||
------------------------
|
||||
byte(s) -> next byte(s)
|
||||
------------------------
|
||||
Newline -> Newline
|
||||
-> Komment
|
||||
-> Operator
|
||||
-> Directive
|
||||
|
||||
Komment -> Newline
|
||||
|
||||
Operator -> Newline
|
||||
-> Komment
|
||||
-> Operand
|
||||
|
||||
Operand -> Newline
|
||||
-> Komment
|
||||
-> Comma
|
||||
|
||||
Comma -> Operand
|
||||
|
||||
Directive -> Newline
|
||||
-> Komment
|
||||
-> Operator
|
||||
------------------------
|
||||
```
|
||||
|
||||
not yet implemented:
|
||||
|
||||
```
|
||||
------------------------
|
||||
operand parser
|
||||
------------------------
|
||||
byte(s) -> next byte(s)
|
||||
------------------------
|
||||
START -> '['
|
||||
-> Register
|
||||
-> Constant
|
||||
|
||||
'[' -> Register
|
||||
-> Constant
|
||||
|
||||
']' -> END
|
||||
|
||||
Register -> IF #[, ']'
|
||||
-> Operator
|
||||
|
||||
Constant -> IF #[, ']'
|
||||
-> Operator
|
||||
|
||||
Operator -> IF NOT #R, Register
|
||||
-> Constant
|
||||
------------------------
|
||||
:R: = whether a register has been found
|
||||
:[: = whether a '[' has been found
|
||||
------------------------
|
||||
```
|
||||
|
||||
### memory map
|
||||
|
||||
```
|
||||
@@ -50,15 +114,15 @@ each token gets loaded into the token table with the following form:
|
||||
|
||||
### internal data structures
|
||||
|
||||
#### `tokens.by_nameX`
|
||||
#### `tokens.[operators|registers]`
|
||||
|
||||
contains all tokens of that length followed by their ID. For some non-empty `tokens.by_nameX`, it is true that `tokens.by_name<X+1> - tokens.by_nameX` is the size in bytes of `tokens.by_nameX`.
|
||||
contains tokens by their type. Intended to be searched by token name to get the token's ID.
|
||||
|
||||
each entry is in the following form:
|
||||
|
||||
```
|
||||
+----------+--------------------------------+
|
||||
|[2 bytes] | 8 * token_length - 1 0 |
|
||||
| 47 32 | 31 0 |
|
||||
+----------+--------------------------------+
|
||||
| token ID | string without null terminator |
|
||||
+----------+--------------------------------+
|
||||
@@ -68,19 +132,16 @@ each entry is in the following form:
|
||||
example implementation:
|
||||
|
||||
```nasm
|
||||
tokens:
|
||||
.by_name1:
|
||||
db "+"
|
||||
dw 0x0062
|
||||
db "-"
|
||||
dw 0x0063
|
||||
.by_name2:
|
||||
db "r8"
|
||||
tokens
|
||||
.registers:
|
||||
dd "r8"
|
||||
dw 0x0008
|
||||
.by_name3: ; this is required for futureproofness; the caller can use this to
|
||||
; find the size of tokens.by_name2
|
||||
; find the size of registers.by_name2
|
||||
```
|
||||
|
||||
note that tokens longer than 4 bytes are problematic :/
|
||||
|
||||
#### `tokens.by_id`
|
||||
|
||||
contains some tokens with their metadata. Some tokens have embedded information (`0x10XX` for instance). Those will not have entries in this table, being handled instead inside the assemble function itself.
|
||||
|
||||
@@ -22,6 +22,14 @@ UNRECOGNISED_ID_OPCODE equ 0x90 ; opcode of an unrecognised id (NOP)
|
||||
|
||||
TEST_LINE_LENGTH equ 80 ; right border of test suite results
|
||||
|
||||
; flags for expected values in tokeniser
|
||||
E_COMMENT equ 1 << 0
|
||||
E_NEWLINE equ 1 << 1
|
||||
E_WHITESPACE equ 1 << 2
|
||||
E_COMMA equ 1 << 3
|
||||
E_OPERATOR equ 1 << 4
|
||||
E_OPERAND equ 1 << 5
|
||||
|
||||
[bits 64]
|
||||
[org LOAD_ADDR]
|
||||
[default abs] ; TODO see if I actually need to do this
|
||||
@@ -455,256 +463,201 @@ get_reg_bits:
|
||||
; ------------------------------------------------------------------------------
|
||||
|
||||
tokenise:
|
||||
add rsi, rdi ; last byte of program
|
||||
xor ecx, ecx ; number of tokens processed
|
||||
; rdi -> current byte of program
|
||||
add rsi, rdi ; rsi -> last byte of program
|
||||
xor eax, eax ; rax = number of tokens processed
|
||||
xor edx, edx ; dl = current byte of program
|
||||
|
||||
.loop:
|
||||
cmp rdi, rsi ; if current byte greater than last byte
|
||||
jg .break ; then break
|
||||
|
||||
push rdi
|
||||
push rsi
|
||||
push rcx
|
||||
mov dl, [rdi] ; dl = current byte
|
||||
|
||||
; rdi -> current byte
|
||||
call identify_next_token
|
||||
; ax = id of token
|
||||
; dx = length of token
|
||||
cmp dl, ";" ; if current byte is the start of a comment
|
||||
je .comment ; then handle the comment
|
||||
|
||||
pop rcx
|
||||
pop rsi
|
||||
pop rdi
|
||||
cmp dl, 0x0A ; if current byte is the end of a line
|
||||
je .newline_mk_flags ; then reset relevant flags
|
||||
|
||||
; deal with terminator character (reported as 0 length token)
|
||||
cmp rdx, 0
|
||||
je .token_length0
|
||||
jne .continue0
|
||||
|
||||
.token_length0:
|
||||
mov ax, 0xFE00 ; terminator character
|
||||
mov al, [rdi] ; byte of terminator
|
||||
mov edx, 1 ; byte length is 1
|
||||
|
||||
.continue0:
|
||||
add rdi, rdx ; current byte + length of token = next unread byte
|
||||
|
||||
mov [TOKEN_TABLE_ADDR + rcx * TOKEN_TABLE_ENTRY_SIZE], ax ; fill next entry
|
||||
; in token table
|
||||
|
||||
; TODO fix undefined behaviour when open brackets and closed brackets aren't
|
||||
; correctly paired or have too much distance between them
|
||||
cmp ax, 0x0051 ; check if read token is an open bracket
|
||||
je .open_bracket ; if so, handle it
|
||||
jne .continue_open_bracket ; if not, continue
|
||||
|
||||
.open_bracket:
|
||||
; TODO make brackets able to hold more
|
||||
mov [.data_open_bracket], cl ; record which entry the open bracket is at
|
||||
|
||||
.continue_open_bracket:
|
||||
cmp ax, 0x0052 ; check if read token is a closing bracket
|
||||
je .close_bracket ; if so, handle it
|
||||
jne .continue_close_bracket ; if not, continue
|
||||
|
||||
.close_bracket:
|
||||
; rewrite open bracket token entry with a filled out one
|
||||
push rcx
|
||||
|
||||
mov dl, [.data_open_bracket]
|
||||
sub cl, dl
|
||||
mov byte [TOKEN_TABLE_ADDR + rdx * TOKEN_TABLE_ENTRY_SIZE], cl
|
||||
mov byte [1 + TOKEN_TABLE_ADDR + rdx * TOKEN_TABLE_ENTRY_SIZE], 0x10
|
||||
|
||||
pop rcx
|
||||
|
||||
.continue_close_bracket:
|
||||
inc rcx ; +1 token processed
|
||||
jmp .loop
|
||||
.break:
|
||||
mov rax, rcx
|
||||
ret
|
||||
|
||||
.data_open_bracket db 0x00 ; represents the token # of the latest open bracket
|
||||
|
||||
; ------------------------------------------------------------------------------
|
||||
; identify_token
|
||||
;
|
||||
; description:
|
||||
; returns the id of a given token. If there are multiple ways to represent a
|
||||
; given token, like the open-bracket, it returns the one that doesn't require
|
||||
; information about the surrounding tokens, because it has no such information.
|
||||
; In other words, if it isn't in the `tokens` data structure, this function
|
||||
; doesn't see it. If the first byte of the token points to a terminator
|
||||
; byte, this function returns it as an unrecognised token.
|
||||
;
|
||||
; parameters:
|
||||
; rdi -> first byte of token
|
||||
; rsi = size of token in bytes
|
||||
;
|
||||
; returned:
|
||||
; ax = id of token; the rest of rax is zeroed
|
||||
; ------------------------------------------------------------------------------
|
||||
|
||||
identify_token:
|
||||
cmp rsi, 1 ; if the token has length 1
|
||||
je .start_length1 ; then enter the length 1 loop
|
||||
|
||||
cmp rsi, 2 ; if the token has length 2
|
||||
je .start_length2 ; then enter the length 2 loop
|
||||
|
||||
cmp rsi, 3 ; if the token has length 3
|
||||
je .start_length3 ; then enter the length 3 loop
|
||||
|
||||
cmp rsi, 4 ; if the token has length 4
|
||||
je .start_length4 ; then enter the length 4 loop
|
||||
|
||||
jmp .unrecognised ; else unrecognised
|
||||
|
||||
; length1
|
||||
.start_length1:
|
||||
mov rcx, tokens.by_name_1 ; rcx -> list of known tokens
|
||||
|
||||
.loop_length1:
|
||||
cmp rcx, tokens.by_name_2 ; check if rcx still in the bounds of length1 tokens
|
||||
jge .unrecognised ; if not, unrecognised
|
||||
|
||||
mov r10b, [rcx] ; known token
|
||||
mov r11b, [rdi] ; token
|
||||
cmp r10b, r11b ; if known token matches token
|
||||
je .found_length1 ; exit loop
|
||||
|
||||
add rcx, 3 ; length of token + length of id
|
||||
jmp .loop_length1
|
||||
|
||||
.found_length1:
|
||||
xor eax, eax ; make sure rest of rax is zeroed
|
||||
mov ax, [rcx + 1] ; return id of token
|
||||
ret
|
||||
|
||||
; length2
|
||||
.start_length2:
|
||||
mov rcx, tokens.by_name_2 ; rcx -> list of known tokens
|
||||
|
||||
.loop_length2:
|
||||
cmp rcx, tokens.by_name_3 ; check if rcx still in the bounds of length2 tokens
|
||||
jge .unrecognised ; if not, unrecognised
|
||||
|
||||
mov r10w, [rcx] ; current entry in known tokens
|
||||
mov r11w, [rdi] ; token
|
||||
cmp r10w, r11w ; if current entry matches token,
|
||||
je .found_length2 ; exit loop
|
||||
|
||||
add rcx, 4 ; length of token + length of id
|
||||
jmp .loop_length2
|
||||
|
||||
.found_length2:
|
||||
xor eax, eax ; make sure rest of rax is zeroed
|
||||
mov ax, [rcx + 2] ; return id of token
|
||||
ret
|
||||
|
||||
; length3
|
||||
.start_length3:
|
||||
mov rcx, tokens.by_name_3 ; rcx -> list of known tokens
|
||||
|
||||
.loop_length3:
|
||||
cmp rcx, tokens.by_name_4 ; check if rcx still in bounds of length3 tokens
|
||||
jge .unrecognised ; if not, unrecognised
|
||||
|
||||
; TODO make this safe (it overreaches 1 byte)
|
||||
mov r10d, [rcx] ; known token + next byte
|
||||
mov r11d, [rdi] ; token + next byte
|
||||
|
||||
and r10d, 0x00FFFFFF ; mask for just the token
|
||||
and r11d, 0x00FFFFFF
|
||||
|
||||
cmp r10d, r11d ; if known token matches token,
|
||||
je .found_length3 ; exit loop
|
||||
|
||||
add rcx, 5 ; length of token + length of id
|
||||
jmp .loop_length3
|
||||
|
||||
.found_length3:
|
||||
xor rax, rax ; zero rax
|
||||
mov ax, [rcx + 3] ; return id of token
|
||||
ret
|
||||
|
||||
; length4
|
||||
.start_length4:
|
||||
mov rcx, tokens.by_name_4 ; rcx -> list of known tokens
|
||||
|
||||
.loop_length4:
|
||||
cmp rcx, tokens.by_name_5 ; check if rcx still in bounds of length3 tokens
|
||||
jge .unrecognised ; if not, unrecognised
|
||||
|
||||
mov r10d, [rcx] ; known token
|
||||
mov r11d, [rdi] ; token
|
||||
cmp r10d, r11d ; if known token matches token,
|
||||
je .found_length4 ; exit loop
|
||||
|
||||
add rcx, 6 ; length of token + length of id
|
||||
jmp .loop_length4
|
||||
|
||||
.found_length4:
|
||||
xor rax, rax ; zero rax
|
||||
mov ax, [rcx + 4] ; return id of token
|
||||
ret
|
||||
|
||||
.unrecognised:
|
||||
xor eax, eax
|
||||
mov ax, UNRECOGNISED_TOKEN_ID
|
||||
ret
|
||||
|
||||
; ------------------------------------------------------------------------------
|
||||
; identify_next_token
|
||||
; description:
|
||||
; like identify_token, except it automatically finds the length. If the first
|
||||
; byte of the token points to a terminator byte, it returns a length of 0.
|
||||
;
|
||||
; parameters:
|
||||
; rdi -> first byte of token
|
||||
;
|
||||
; returned:
|
||||
; ax = id of token; the rest of rax is zeroed
|
||||
; dx = length of token in bytes; the rest of rdx is zeroed
|
||||
; ------------------------------------------------------------------------------
|
||||
|
||||
identify_next_token:
|
||||
push rdi
|
||||
|
||||
mov rsi, rdi ; rsi is the current byte
|
||||
xor rdi, rdi ; rdi is the length
|
||||
.loop:
|
||||
xor edx, edx
|
||||
mov dl, [rsi]
|
||||
cmp dl, "," ; if current byte is a comma
|
||||
je .comma ; then handle the comma
|
||||
|
||||
push rsi
|
||||
push rdi
|
||||
push rax
|
||||
push rdx
|
||||
|
||||
mov rdi, 8 ; length of terminator list
|
||||
mov rsi, token_terminator_8 ; start of terminator list
|
||||
mov rsi, whitespace_2 ; rsi -> list of whitespace (ignored) bytes
|
||||
mov rdi, 2 ; rdi = size of list in bytes
|
||||
; dl = current byte
|
||||
call elemb
|
||||
|
||||
; al = 0 if not whitespace, 1 if whitespace
|
||||
cmp al, 1 ; check if current byte is whitespace
|
||||
pop rdx
|
||||
pop rax
|
||||
pop rdi
|
||||
pop rsi
|
||||
je .skip_byte_whitespace
|
||||
|
||||
cmp rax, 1 ; check if the next character is a token terminator
|
||||
je .break ; if so, break
|
||||
test byte [.expecting], E_OPERATOR ; check if an operator is expected
|
||||
jnz .operator ; if so, handle it
|
||||
jmp .operand ; otherwise, handle as an operand
|
||||
|
||||
inc rdi ; next character
|
||||
inc rsi ; next byte of token
|
||||
.comment:
|
||||
push rsi
|
||||
mov rsi, .found
|
||||
call print.debug
|
||||
mov rsi, .msg_comment
|
||||
call print
|
||||
pop rsi
|
||||
test byte [.expecting], E_COMMENT ; make sure a comment is expected
|
||||
jz .unexpected_comment ; if not, error
|
||||
.comment_loop:
|
||||
; TODO range check rdi
|
||||
mov dl, [rdi] ; dl = current byte
|
||||
|
||||
cmp dl, 0x0A ; if current byte is a newline
|
||||
je .comment_break ; then break
|
||||
|
||||
inc rdi ; point to next unread byte
|
||||
jmp .comment_loop
|
||||
.comment_break:
|
||||
jmp .loop
|
||||
|
||||
.skip_byte_whitespace:
|
||||
push rsi
|
||||
mov rsi, .found
|
||||
call print.debug
|
||||
mov rsi, .msg_whitespace
|
||||
call print
|
||||
pop rsi
|
||||
|
||||
test byte [.expecting], E_WHITESPACE ; make sure a whitespace was expected
|
||||
jz .unexpected_whitespace ; if not, error
|
||||
inc rdi
|
||||
jmp .loop ; else, loop
|
||||
|
||||
.comma: ; found comma
|
||||
push rsi
|
||||
mov rsi, .found
|
||||
call print.debug
|
||||
mov rsi, .msg_comma
|
||||
call print
|
||||
pop rsi
|
||||
|
||||
test byte [.expecting], E_COMMA ; make sure a comma was expected
|
||||
jz .unexpected_comma ; if not, error
|
||||
inc rdi
|
||||
mov [.expecting], E_WHITESPACE | E_OPERAND ; else, make operand expected
|
||||
jmp .loop ; and loop
|
||||
|
||||
.newline_mk_flags:
|
||||
push rsi
|
||||
mov rsi, .found
|
||||
call print.debug
|
||||
mov rsi, .msg_newline
|
||||
call print
|
||||
pop rsi
|
||||
|
||||
test byte [.expecting], E_NEWLINE ; make sure a newline was expected
|
||||
jz .unexpected_newline ; if not, error
|
||||
|
||||
mov byte [.expecting], E_COMMENT | E_NEWLINE | E_WHITESPACE | E_OPERATOR
|
||||
|
||||
inc rdi
|
||||
jmp .loop
|
||||
|
||||
.operator:
|
||||
push rsi
|
||||
mov rsi, .found
|
||||
call print.debug
|
||||
mov rsi, .msg_operator
|
||||
call print
|
||||
pop rsi
|
||||
.operator_loop:
|
||||
mov dl, [rdi] ; next byte
|
||||
|
||||
cmp dl, " "
|
||||
je .operator_break
|
||||
cmp dl, 0x0A
|
||||
je .operator_break
|
||||
cmp dl, ";"
|
||||
je .operator_break
|
||||
|
||||
inc rdi ; inc byte counter
|
||||
jmp .operator_loop ; and loop
|
||||
.operator_break:
|
||||
mov byte [.expecting], E_COMMENT | E_NEWLINE | E_WHITESPACE | E_OPERAND
|
||||
jmp .loop
|
||||
|
||||
.operand:
|
||||
push rsi
|
||||
mov rsi, .found
|
||||
call print.debug
|
||||
mov rsi, .msg_operand
|
||||
call print
|
||||
pop rsi
|
||||
test byte [.expecting], E_OPERAND ; make sure an operand was expected
|
||||
jz .unexpected_operand ; if not, error
|
||||
.operand_loop:
|
||||
mov dl, [rdi]
|
||||
cmp dl, ","
|
||||
je .operand_break
|
||||
cmp dl, 0x0A
|
||||
je .operand_break
|
||||
cmp dl, 0x00
|
||||
je .operand_break
|
||||
inc rdi
|
||||
jmp .operand_loop
|
||||
.operand_break:
|
||||
mov byte [.expecting], E_COMMENT | E_NEWLINE | E_WHITESPACE | E_COMMA
|
||||
jmp .loop
|
||||
|
||||
.break:
|
||||
mov rsi, rdi ; length of token
|
||||
ret
|
||||
|
||||
pop rdi
|
||||
; state
|
||||
|
||||
push rsi
|
||||
call identify_token
|
||||
pop rsi
|
||||
mov rdx, rsi ; length
|
||||
ret
|
||||
.expecting db E_COMMENT | E_NEWLINE | E_WHITESPACE | E_OPERATOR
|
||||
|
||||
.unexpected_whitespace:
|
||||
mov rsi, .err_unexpected
|
||||
call print.error
|
||||
mov rsi, .msg_whitespace
|
||||
call print
|
||||
jmp halt
|
||||
.unexpected_comment:
|
||||
mov rsi, .err_unexpected
|
||||
call print.error
|
||||
mov rsi, .msg_comment
|
||||
call print
|
||||
jmp halt
|
||||
.unexpected_newline:
|
||||
mov rsi, .err_unexpected
|
||||
call print.error
|
||||
mov rsi, .msg_newline
|
||||
call print
|
||||
jmp halt
|
||||
.unexpected_comma:
|
||||
mov rsi, .err_unexpected
|
||||
call print.error
|
||||
mov rsi, .msg_comma
|
||||
call print
|
||||
jmp halt
|
||||
.unexpected_operand:
|
||||
mov rsi, .err_unexpected
|
||||
call print.error
|
||||
mov rsi, .msg_operand
|
||||
call print
|
||||
jmp halt
|
||||
.err_unexpected db "unexpected ", 0x00
|
||||
.found db "found ", 0x00
|
||||
.msg_whitespace db "whitespace.", 0x0A, 0x00
|
||||
.msg_comment db "comment.", 0x0A, 0x00
|
||||
.msg_newline db "newline.", 0x0A, 0x00
|
||||
.msg_comma db "comma.", 0x0A, 0x00
|
||||
.msg_operator db "operator.", 0x0A, 0x00
|
||||
.msg_operand db "operand.", 0x0A, 0x00
|
||||
|
||||
; ------------------------------------------------------------------------------
|
||||
; utilities
|
||||
@@ -789,7 +742,7 @@ halt:
|
||||
; elemb
|
||||
;
|
||||
; description:
|
||||
; checks if given byte is element of the specified list
|
||||
; checks if given byte is element of the specified list.
|
||||
;
|
||||
; parameters:
|
||||
; rdi = size of list
|
||||
@@ -872,216 +825,9 @@ clear_output_arena:
|
||||
; data
|
||||
; ------------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
tokens:
|
||||
.by_name_1:
|
||||
db "["
|
||||
dw 0x0051
|
||||
db "]"
|
||||
dw 0x0052
|
||||
db "+"
|
||||
dw 0x0062
|
||||
db "-"
|
||||
dw 0x0063
|
||||
db "*"
|
||||
dw 0x0064
|
||||
db "/"
|
||||
dw 0x0065
|
||||
.by_name_2:
|
||||
db "r8"
|
||||
dw 0x0008
|
||||
db "r9"
|
||||
dw 0x0009
|
||||
db "ax"
|
||||
dw 0x0020
|
||||
db "bx"
|
||||
dw 0x0021
|
||||
db "cx"
|
||||
dw 0x0022
|
||||
db "dx"
|
||||
dw 0x0023
|
||||
db "si"
|
||||
dw 0x0024
|
||||
db "di"
|
||||
dw 0x0025
|
||||
db "sp"
|
||||
dw 0x0026
|
||||
db "bp"
|
||||
dw 0x0027
|
||||
db "al"
|
||||
dw 0x0030
|
||||
db "bl"
|
||||
dw 0x0031
|
||||
db "cl"
|
||||
dw 0x0032
|
||||
db "dl"
|
||||
dw 0x0033
|
||||
db "ah"
|
||||
dw 0x0040
|
||||
db "bh"
|
||||
dw 0x0041
|
||||
db "ch"
|
||||
dw 0x0042
|
||||
db "dh"
|
||||
dw 0x0043
|
||||
db "cs"
|
||||
dw 0x0044
|
||||
db "ds"
|
||||
dw 0x0045
|
||||
db "es"
|
||||
dw 0x0046
|
||||
db "fs"
|
||||
dw 0x0047
|
||||
db "gs"
|
||||
dw 0x0048
|
||||
db "ss"
|
||||
dw 0x0049
|
||||
db "je"
|
||||
dw 0x005C
|
||||
db "jg"
|
||||
dw 0x005F
|
||||
db "jl"
|
||||
dw 0x0061
|
||||
.by_name_3:
|
||||
db "rax"
|
||||
dw 0x0000
|
||||
db "rbx"
|
||||
dw 0x0001
|
||||
db "rcx"
|
||||
dw 0x0002
|
||||
db "rdx"
|
||||
dw 0x0003
|
||||
db "rsi"
|
||||
dw 0x0004
|
||||
db "rdi"
|
||||
dw 0x0005
|
||||
db "rsp"
|
||||
dw 0x0006
|
||||
db "rbp"
|
||||
dw 0x0007
|
||||
db "r10"
|
||||
dw 0x000A
|
||||
db "r11"
|
||||
dw 0x000B
|
||||
db "r12"
|
||||
dw 0x000C
|
||||
db "r13"
|
||||
dw 0x000D
|
||||
db "r14"
|
||||
dw 0x000E
|
||||
db "r15"
|
||||
dw 0x000F
|
||||
db "eax"
|
||||
dw 0x0010
|
||||
db "ebx"
|
||||
dw 0x0011
|
||||
db "ecx"
|
||||
dw 0x0012
|
||||
db "edx"
|
||||
dw 0x0013
|
||||
db "esi"
|
||||
dw 0x0014
|
||||
db "edi"
|
||||
dw 0x0015
|
||||
db "esp"
|
||||
dw 0x0016
|
||||
db "ebp"
|
||||
dw 0x0017
|
||||
db "r8d"
|
||||
dw 0x0018
|
||||
db "r9d"
|
||||
dw 0x0019
|
||||
db "r8w"
|
||||
dw 0x0028
|
||||
db "r9w"
|
||||
dw 0x0029
|
||||
db "sil"
|
||||
dw 0x0034
|
||||
db "dil"
|
||||
dw 0x0035
|
||||
db "spl"
|
||||
dw 0x0036
|
||||
db "bpl"
|
||||
dw 0x0037
|
||||
db "r8b"
|
||||
dw 0x0038
|
||||
db "r9b"
|
||||
dw 0x0039
|
||||
db "cr0"
|
||||
dw 0x004A
|
||||
db "cr2"
|
||||
dw 0x004B
|
||||
db "cr3"
|
||||
dw 0x004C
|
||||
db "cr4"
|
||||
dw 0x004D
|
||||
db "cr8"
|
||||
dw 0x004E
|
||||
db "hlt"
|
||||
dw 0x004F
|
||||
db "xor"
|
||||
dw 0x0053
|
||||
db "inc"
|
||||
dw 0x0054
|
||||
db "dec"
|
||||
dw 0x0055
|
||||
db "mov"
|
||||
dw 0x0056
|
||||
db "add"
|
||||
dw 0x0057
|
||||
db "sub"
|
||||
dw 0x0058
|
||||
db "ret"
|
||||
dw 0x005A
|
||||
db "cmp"
|
||||
dw 0x005B
|
||||
db "jne"
|
||||
dw 0x005D
|
||||
db "jge"
|
||||
dw 0x005E
|
||||
db "jle"
|
||||
dw 0x0060
|
||||
.by_name_4:
|
||||
db "r10d"
|
||||
dw 0x001A
|
||||
db "r11d"
|
||||
dw 0x001B
|
||||
db "r12d"
|
||||
dw 0x001C
|
||||
db "r13d"
|
||||
dw 0x001D
|
||||
db "r14d"
|
||||
dw 0x001E
|
||||
db "r15d"
|
||||
dw 0x001F
|
||||
db "r10w"
|
||||
dw 0x002A
|
||||
db "r11w"
|
||||
dw 0x002B
|
||||
db "r12w"
|
||||
dw 0x002C
|
||||
db "r13w"
|
||||
dw 0x002D
|
||||
db "r14w"
|
||||
dw 0x002E
|
||||
db "r15w"
|
||||
dw 0x002F
|
||||
db "r10b"
|
||||
dw 0x003A
|
||||
db "r11b"
|
||||
dw 0x003B
|
||||
db "r12b"
|
||||
dw 0x003C
|
||||
db "r13b"
|
||||
dw 0x003D
|
||||
db "r14b"
|
||||
dw 0x003E
|
||||
db "r15b"
|
||||
dw 0x003F
|
||||
db "int3"
|
||||
dw 0x0050
|
||||
db "call"
|
||||
dw 0x0059
|
||||
.by_name_5:
|
||||
.by_id:
|
||||
dw 0x0010 ; eax
|
||||
db 0x02 ; type: register
|
||||
@@ -1114,6 +860,202 @@ tokens:
|
||||
db 0x01 ; type: operator
|
||||
db 0x00 ; # operands
|
||||
.by_id_end:
|
||||
.operators:
|
||||
dd "je"
|
||||
dw 0x005C
|
||||
dd "jg"
|
||||
dw 0x005F
|
||||
dd "jl"
|
||||
dw 0x0061
|
||||
dd "hlt"
|
||||
dw 0x004F
|
||||
dd "xor"
|
||||
dw 0x0053
|
||||
dd "inc"
|
||||
dw 0x0054
|
||||
dd "dec"
|
||||
dw 0x0055
|
||||
dd "mov"
|
||||
dw 0x0056
|
||||
dd "add"
|
||||
dw 0x0057
|
||||
dd "sub"
|
||||
dw 0x0058
|
||||
dd "ret"
|
||||
dw 0x005A
|
||||
dd "cmp"
|
||||
dw 0x005B
|
||||
dd "jne"
|
||||
dw 0x005D
|
||||
dd "jge"
|
||||
dw 0x005E
|
||||
dd "jle"
|
||||
dw 0x0060
|
||||
dd "int3"
|
||||
dw 0x0050
|
||||
dd "call"
|
||||
dw 0x0059
|
||||
.operators_end:
|
||||
.registers:
|
||||
dd "r8"
|
||||
dw 0x0008
|
||||
dd "r9"
|
||||
dw 0x0009
|
||||
dd "ax"
|
||||
dw 0x0020
|
||||
dd "bx"
|
||||
dw 0x0021
|
||||
dd "cx"
|
||||
dw 0x0022
|
||||
dd "dx"
|
||||
dw 0x0023
|
||||
dd "si"
|
||||
dw 0x0024
|
||||
dd "di"
|
||||
dw 0x0025
|
||||
dd "sp"
|
||||
dw 0x0026
|
||||
dd "bp"
|
||||
dw 0x0027
|
||||
dd "al"
|
||||
dw 0x0030
|
||||
dd "bl"
|
||||
dw 0x0031
|
||||
dd "cl"
|
||||
dw 0x0032
|
||||
dd "dl"
|
||||
dw 0x0033
|
||||
dd "ah"
|
||||
dw 0x0040
|
||||
dd "bh"
|
||||
dw 0x0041
|
||||
dd "ch"
|
||||
dw 0x0042
|
||||
dd "dh"
|
||||
dw 0x0043
|
||||
dd "cs"
|
||||
dw 0x0044
|
||||
dd "ds"
|
||||
dw 0x0045
|
||||
dd "es"
|
||||
dw 0x0046
|
||||
dd "fs"
|
||||
dw 0x0047
|
||||
dd "gs"
|
||||
dw 0x0048
|
||||
dd "ss"
|
||||
dw 0x0049
|
||||
dd "rax"
|
||||
dw 0x0000
|
||||
dd "rbx"
|
||||
dw 0x0001
|
||||
dd "rcx"
|
||||
dw 0x0002
|
||||
dd "rdx"
|
||||
dw 0x0003
|
||||
dd "rsi"
|
||||
dw 0x0004
|
||||
dd "rdi"
|
||||
dw 0x0005
|
||||
dd "rsp"
|
||||
dw 0x0006
|
||||
dd "rbp"
|
||||
dw 0x0007
|
||||
dd "r10"
|
||||
dw 0x000A
|
||||
dd "r11"
|
||||
dw 0x000B
|
||||
dd "r12"
|
||||
dw 0x000C
|
||||
dd "r13"
|
||||
dw 0x000D
|
||||
dd "r14"
|
||||
dw 0x000E
|
||||
dd "r15"
|
||||
dw 0x000F
|
||||
dd "eax"
|
||||
dw 0x0010
|
||||
dd "ebx"
|
||||
dw 0x0011
|
||||
dd "ecx"
|
||||
dw 0x0012
|
||||
dd "edx"
|
||||
dw 0x0013
|
||||
dd "esi"
|
||||
dw 0x0014
|
||||
dd "edi"
|
||||
dw 0x0015
|
||||
dd "esp"
|
||||
dw 0x0016
|
||||
dd "ebp"
|
||||
dw 0x0017
|
||||
dd "r8d"
|
||||
dw 0x0018
|
||||
dd "r9d"
|
||||
dw 0x0019
|
||||
dd "r8w"
|
||||
dw 0x0028
|
||||
dd "r9w"
|
||||
dw 0x0029
|
||||
dd "sil"
|
||||
dw 0x0034
|
||||
dd "dil"
|
||||
dw 0x0035
|
||||
dd "spl"
|
||||
dw 0x0036
|
||||
dd "bpl"
|
||||
dw 0x0037
|
||||
dd "r8b"
|
||||
dw 0x0038
|
||||
dd "r9b"
|
||||
dw 0x0039
|
||||
dd "cr0"
|
||||
dw 0x004A
|
||||
dd "cr2"
|
||||
dw 0x004B
|
||||
dd "cr3"
|
||||
dw 0x004C
|
||||
dd "cr4"
|
||||
dw 0x004D
|
||||
dd "cr8"
|
||||
dw 0x004E
|
||||
dd "r10d"
|
||||
dw 0x001A
|
||||
dd "r11d"
|
||||
dw 0x001B
|
||||
dd "r12d"
|
||||
dw 0x001C
|
||||
dd "r13d"
|
||||
dw 0x001D
|
||||
dd "r14d"
|
||||
dw 0x001E
|
||||
dd "r15d"
|
||||
dw 0x001F
|
||||
dd "r10w"
|
||||
dw 0x002A
|
||||
dd "r11w"
|
||||
dw 0x002B
|
||||
dd "r12w"
|
||||
dw 0x002C
|
||||
dd "r13w"
|
||||
dw 0x002D
|
||||
dd "r14w"
|
||||
dw 0x002E
|
||||
dd "r15w"
|
||||
dw 0x002F
|
||||
dd "r10b"
|
||||
dw 0x003A
|
||||
dd "r11b"
|
||||
dw 0x003B
|
||||
dd "r12b"
|
||||
dw 0x003C
|
||||
dd "r13b"
|
||||
dw 0x003D
|
||||
dd "r14b"
|
||||
dw 0x003E
|
||||
dd "r15b"
|
||||
dw 0x003F
|
||||
.registers_end:
|
||||
|
||||
opcodes:
|
||||
.by_id:
|
||||
@@ -1139,11 +1081,14 @@ msg_halt db "halted.", 0x0A, 0x00
|
||||
|
||||
token_terminator_8 db 0x00, " ", 0x0A, 0x0D, ",", 0x00, 0x00, 0x00
|
||||
|
||||
whitespace_2 db " ", 0x0D
|
||||
|
||||
; test program
|
||||
program:
|
||||
db "xor eax, eax", 0x0A
|
||||
db "inc rax", 0x0A
|
||||
db "inc rax ; inline comment", 0x0A
|
||||
db "; one line comment", 0x0A
|
||||
db "mov [ rax ], rdx", 0x0A
|
||||
db "hlt", 0x0A
|
||||
db "hlt"
|
||||
db 0x00 ; just for the sake of being able to print it, I made it a string
|
||||
.size db $ - program - 1
|
||||
.size db $ - program
|
||||
|
||||
@@ -16,12 +16,6 @@ run_tests:
|
||||
call clear_test_arena
|
||||
call test_elemb
|
||||
|
||||
call clear_test_arena
|
||||
call test_identify_token
|
||||
|
||||
call clear_test_arena
|
||||
call test_identify_next_token
|
||||
|
||||
call clear_test_arena
|
||||
call test_get_tte_type
|
||||
|
||||
@@ -101,202 +95,6 @@ test_elemb:
|
||||
ret
|
||||
.msg db "test_elemb...", 0x00
|
||||
|
||||
; ------------------------------------------------------------------------------
|
||||
; test_identify_token
|
||||
;
|
||||
; description:
|
||||
; tests identify_token described functionality
|
||||
; ------------------------------------------------------------------------------
|
||||
|
||||
test_identify_token:
|
||||
mov rsi, .msg
|
||||
call print.test
|
||||
|
||||
; length1 token that exists
|
||||
mov byte [TEST_ARENA_ADDR], "*"
|
||||
mov rdi, TEST_ARENA_ADDR
|
||||
mov rsi, 1
|
||||
call identify_token
|
||||
cmp ax, 0x0064
|
||||
jne .fail
|
||||
|
||||
; length1 token that doesn't exist
|
||||
mov byte [TEST_ARENA_ADDR], " "
|
||||
mov rdi, TEST_ARENA_ADDR
|
||||
mov rsi, 1
|
||||
call identify_token
|
||||
cmp ax, 0xFFFF
|
||||
jne .fail
|
||||
|
||||
; length2 token that exists
|
||||
mov word [TEST_ARENA_ADDR], "sp"
|
||||
mov rdi, TEST_ARENA_ADDR
|
||||
mov rsi, 2
|
||||
call identify_token
|
||||
cmp ax, 0x0026
|
||||
jne .fail
|
||||
|
||||
; length2 token that doesn't exist
|
||||
mov word [TEST_ARENA_ADDR], "QQ"
|
||||
mov rdi, TEST_ARENA_ADDR
|
||||
mov rsi, 2
|
||||
call identify_token
|
||||
cmp ax, 0xFFFF
|
||||
jne .fail
|
||||
|
||||
; length3 token that exists
|
||||
mov dword [TEST_ARENA_ADDR], "rax"
|
||||
mov rdi, TEST_ARENA_ADDR
|
||||
mov rsi, 3
|
||||
call identify_token
|
||||
cmp ax, 0x0000
|
||||
jne .fail
|
||||
|
||||
; length3 token that exists
|
||||
mov dword [TEST_ARENA_ADDR], "cr0"
|
||||
mov rdi, TEST_ARENA_ADDR
|
||||
mov rsi, 3
|
||||
call identify_token
|
||||
cmp ax, 0x004A
|
||||
jne .fail
|
||||
|
||||
; length3 token that doesn't exist
|
||||
mov dword [TEST_ARENA_ADDR], "r16"
|
||||
mov rdi, TEST_ARENA_ADDR
|
||||
mov rsi, 3
|
||||
call identify_token
|
||||
cmp ax, 0xFFFF
|
||||
jne .fail
|
||||
|
||||
; length4 token that exists
|
||||
mov dword [TEST_ARENA_ADDR], "r10d"
|
||||
mov rdi, TEST_ARENA_ADDR
|
||||
mov rsi, 4
|
||||
call identify_token
|
||||
cmp ax, 0x001A
|
||||
jne .fail
|
||||
|
||||
; length4 token that exists
|
||||
mov dword [TEST_ARENA_ADDR], "r15b"
|
||||
mov rdi, TEST_ARENA_ADDR
|
||||
mov rsi, 4
|
||||
call identify_token
|
||||
cmp ax, 0x003F
|
||||
jne .fail
|
||||
|
||||
; length4 token that doesn't exist
|
||||
mov dword [TEST_ARENA_ADDR], "r15q"
|
||||
mov rdi, TEST_ARENA_ADDR
|
||||
mov rsi, 4
|
||||
call identify_token
|
||||
cmp ax, 0xFFFF
|
||||
jne .fail
|
||||
|
||||
.pass:
|
||||
mov rsi, msg_pass
|
||||
call print
|
||||
ret
|
||||
.fail:
|
||||
mov rsi, msg_fail
|
||||
call print
|
||||
ret
|
||||
.msg db "test_identify_token...", 0x00
|
||||
|
||||
; ------------------------------------------------------------------------------
|
||||
; test_identify_next_token
|
||||
;
|
||||
; description:
|
||||
; tests identify_next_token described functionality
|
||||
; ------------------------------------------------------------------------------
|
||||
|
||||
test_identify_next_token:
|
||||
mov rsi, .msg
|
||||
call print.test
|
||||
|
||||
; length1 token that exists
|
||||
mov word [TEST_ARENA_ADDR], "* "
|
||||
mov rdi, TEST_ARENA_ADDR
|
||||
call identify_next_token
|
||||
cmp ax, 0x0064
|
||||
jne .fail
|
||||
|
||||
; length1 token that doesn't exist
|
||||
mov word [TEST_ARENA_ADDR], " "
|
||||
mov rdi, TEST_ARENA_ADDR
|
||||
call identify_next_token
|
||||
cmp ax, 0xFFFF
|
||||
jne .fail
|
||||
|
||||
; length2 token that exists
|
||||
mov dword [TEST_ARENA_ADDR], "sp "
|
||||
mov rdi, TEST_ARENA_ADDR
|
||||
call identify_next_token
|
||||
cmp ax, 0x0026
|
||||
jne .fail
|
||||
|
||||
; length2 token that doesn't exist
|
||||
mov dword [TEST_ARENA_ADDR], "QQ "
|
||||
mov rdi, TEST_ARENA_ADDR
|
||||
call identify_next_token
|
||||
cmp ax, 0xFFFF
|
||||
jne .fail
|
||||
|
||||
; length3 token that exists
|
||||
mov dword [TEST_ARENA_ADDR], "rax "
|
||||
mov rdi, TEST_ARENA_ADDR
|
||||
call identify_next_token
|
||||
cmp ax, 0x0000
|
||||
jne .fail
|
||||
|
||||
; length3 token that exists
|
||||
mov dword [TEST_ARENA_ADDR], "cr0 "
|
||||
mov rdi, TEST_ARENA_ADDR
|
||||
call identify_next_token
|
||||
cmp ax, 0x004A
|
||||
jne .fail
|
||||
|
||||
; length3 token that doesn't exist
|
||||
mov dword [TEST_ARENA_ADDR], "r16 "
|
||||
mov rdi, TEST_ARENA_ADDR
|
||||
call identify_next_token
|
||||
cmp ax, 0xFFFF
|
||||
jne .fail
|
||||
|
||||
; length4 token that exists
|
||||
mov dword [TEST_ARENA_ADDR], "r10d"
|
||||
mov byte [TEST_ARENA_ADDR + 4], " "
|
||||
mov rdi, TEST_ARENA_ADDR
|
||||
call identify_next_token
|
||||
cmp ax, 0x001A
|
||||
jne .fail
|
||||
|
||||
; length4 token that exists
|
||||
mov dword [TEST_ARENA_ADDR], "r15b"
|
||||
mov byte [TEST_ARENA_ADDR + 4], " "
|
||||
mov rdi, TEST_ARENA_ADDR
|
||||
call identify_next_token
|
||||
cmp ax, 0x003F
|
||||
jne .fail
|
||||
|
||||
; length4 token that doesn't exist
|
||||
mov dword [TEST_ARENA_ADDR], "r15q"
|
||||
mov byte [TEST_ARENA_ADDR + 4], " "
|
||||
mov rdi, TEST_ARENA_ADDR
|
||||
call identify_next_token
|
||||
cmp ax, 0xFFFF
|
||||
jne .fail
|
||||
|
||||
.pass:
|
||||
mov rsi, msg_pass
|
||||
call print
|
||||
ret
|
||||
.fail:
|
||||
mov rsi, msg_fail
|
||||
call print
|
||||
ret
|
||||
.msg db "test_identify_next_token...", 0x00
|
||||
|
||||
|
||||
; ------------------------------------------------------------------------------
|
||||
; test_get_tte_type
|
||||
;
|
||||
|
||||
Reference in New Issue
Block a user