tokenise labels and constants! Now assembly highkey fails but ok
This commit is contained in:
@@ -6,7 +6,7 @@
|
||||
LOAD_ADDR equ 0x7C00
|
||||
|
||||
KERNEL_START equ 2 ; first sector on disk to load kernel from; 1 indexed
|
||||
KERNEL_SIZE equ 16 ; length of kernel in sectors
|
||||
KERNEL_SIZE equ 32 ; length of kernel in sectors
|
||||
KERNEL_LOAD_ADDR_ES equ 0x1000 ; kernel to be loaded at es * 0x10 + 0x0000
|
||||
|
||||
PAGE_TABLE_LOAD_ADDR equ 0x1000 ; start of page table; 4 * pt size
|
||||
|
||||
109
twasm/README.md
109
twasm/README.md
@@ -22,11 +22,14 @@ tokeniser
|
||||
------------------------
|
||||
byte(s) -> next byte(s)
|
||||
------------------------
|
||||
Newline -> Newline
|
||||
Newline -> Label
|
||||
-> Newline
|
||||
-> Komment
|
||||
-> Operator
|
||||
-> Directive
|
||||
|
||||
Label -> Newline
|
||||
|
||||
Komment -> Newline
|
||||
|
||||
Operator -> Newline
|
||||
@@ -45,37 +48,6 @@ Directive -> Newline
|
||||
------------------------
|
||||
```
|
||||
|
||||
not yet implemented:
|
||||
|
||||
```
|
||||
------------------------
|
||||
operand parser
|
||||
------------------------
|
||||
byte(s) -> next byte(s)
|
||||
------------------------
|
||||
START -> '['
|
||||
-> Register
|
||||
-> Constant
|
||||
|
||||
'[' -> Register
|
||||
-> Constant
|
||||
|
||||
']' -> END
|
||||
|
||||
Register -> IF #[, ']'
|
||||
-> Operator
|
||||
|
||||
Constant -> IF #[, ']'
|
||||
-> Operator
|
||||
|
||||
Operator -> IF NOT #R, Register
|
||||
-> Constant
|
||||
------------------------
|
||||
:R: = whether a register has been found
|
||||
:[: = whether a '[' has been found
|
||||
------------------------
|
||||
```
|
||||
|
||||
### memory map
|
||||
|
||||
```
|
||||
@@ -88,6 +60,10 @@ Operator -> IF NOT #R, Register
|
||||
+------ 0x00060000 ------+
|
||||
| test arena |
|
||||
+------ 0x00050000 ------+
|
||||
| label table |
|
||||
+------ 0x00040000 ------+
|
||||
| awaiting label table |
|
||||
+------ 0x00030000 ------+
|
||||
| stack (rsp) |
|
||||
+------------------------+
|
||||
| input |
|
||||
@@ -105,6 +81,7 @@ each word represents a token on the token table.
|
||||
each token gets loaded into the token table with the following form:
|
||||
|
||||
```
|
||||
2 bytes
|
||||
+----------+
|
||||
| 15 0 |
|
||||
+----------+
|
||||
@@ -112,6 +89,40 @@ each token gets loaded into the token table with the following form:
|
||||
+----------+
|
||||
```
|
||||
|
||||
#### label table (LT)
|
||||
|
||||
label definitions are stored and recalled from this table. The memory addresses are relative to the start of the program
|
||||
|
||||
```
|
||||
16 bytes
|
||||
+---------+
|
||||
| 127 64 |
|
||||
+---------+
|
||||
| address |
|
||||
+---------+
|
||||
| 63 0 |
|
||||
+---------+
|
||||
| hash |
|
||||
+---------+
|
||||
```
|
||||
|
||||
#### awaiting label table (ALT)
|
||||
|
||||
forward references are stored in this table to be filled in after assembly is otherwise complete. The memory addresses are relative to the start of the program
|
||||
|
||||
```
|
||||
16 bytes
|
||||
+----------+----------+------------------+---------+
|
||||
| 127 105 | 104 104 | 103 96 | 95 64 |
|
||||
+----------+----------+------------------+---------+
|
||||
| reserved | abs flag | # bytes reserved | address |
|
||||
+----------+----------+------------------+---------+
|
||||
| 63 0 |
|
||||
+--------------------------------------------------+
|
||||
| hash |
|
||||
+--------------------------------------------------+
|
||||
```
|
||||
|
||||
### internal data structures
|
||||
|
||||
#### `tokens.[operators|registers]`
|
||||
@@ -121,6 +132,7 @@ contains tokens by their type. Intended to be searched by token name to get the
|
||||
each entry is in the following form:
|
||||
|
||||
```
|
||||
6 bytes
|
||||
+----------+--------------------------------+
|
||||
| 47 32 | 31 0 |
|
||||
+----------+--------------------------------+
|
||||
@@ -129,26 +141,16 @@ each entry is in the following form:
|
||||
|
||||
```
|
||||
|
||||
example implementation:
|
||||
|
||||
```nasm
|
||||
tokens
|
||||
.registers:
|
||||
dd "r8"
|
||||
dw 0x0008
|
||||
.by_name3: ; this is required for futureproofness; the caller can use this to
|
||||
; find the size of registers.by_name2
|
||||
```
|
||||
|
||||
note that tokens longer than 4 bytes are problematic :/
|
||||
|
||||
#### `tokens.by_id`
|
||||
|
||||
contains some tokens with their metadata. Some tokens have embedded information (`0x10XX` for instance). Those will not have entries in this table, being handled instead inside the assemble function itself.
|
||||
contains some tokens with their metadata. Some tokens have embedded information (`0x10XX` for instance). Those do not have entries in this table, being handled instead inside the assemble function itself.
|
||||
|
||||
metadata about some tokens in the following form:
|
||||
|
||||
```
|
||||
4 bytes
|
||||
+----------------+----------+-------+----------+
|
||||
| 31 24 | 23 20 | 19 16 | 15 0 |
|
||||
+----------------+----------+-------+----------+
|
||||
@@ -168,6 +170,7 @@ the `type` hex digit is defined as the following:
|
||||
type metadata for the different types is as follows:
|
||||
|
||||
```
|
||||
1 byte
|
||||
+----------+
|
||||
| type 0x0 |
|
||||
+----------+
|
||||
@@ -178,6 +181,7 @@ type metadata for the different types is as follows:
|
||||
```
|
||||
|
||||
```
|
||||
1 byte
|
||||
+-------------------------------+
|
||||
| type 0x1 |
|
||||
+----------+--------------------+
|
||||
@@ -188,6 +192,7 @@ type metadata for the different types is as follows:
|
||||
```
|
||||
|
||||
```
|
||||
1 byte
|
||||
+------------------------------+
|
||||
| type 0x2 |
|
||||
+----------+-----------+-------+
|
||||
@@ -210,6 +215,7 @@ type metadata for the different types is as follows:
|
||||
entries are as follows:
|
||||
|
||||
```
|
||||
16 bytes
|
||||
+------------------------------+
|
||||
| 0 operand operators |
|
||||
+------------------------------+
|
||||
@@ -230,6 +236,7 @@ entries are as follows:
|
||||
| reserved | opcode | token ID |
|
||||
+----------+--------+----------+
|
||||
|
||||
16 bytes
|
||||
+-------------------------------------------------------------+
|
||||
| 1 operand operators |
|
||||
+-------------------------------------------------------------+
|
||||
@@ -252,6 +259,7 @@ entries are as follows:
|
||||
| | dst=r/m | |
|
||||
+----------+---------------+----------------------------------+
|
||||
|
||||
16 bytes
|
||||
+----------------------------------------------+
|
||||
| 2 operand operators |
|
||||
+----------------------------------------------+
|
||||
@@ -389,14 +397,23 @@ supported tokens are listed below
|
||||
| ret | 0x005A | |
|
||||
| cmp | 0x005B | |
|
||||
| | 0x10XX | some memory address; `XX` is as specified below |
|
||||
| | 0xFEXX | used to pass some raw value `XX` in place of a token id |
|
||||
| | 0x20XX | some constant; `XX` is as specified below |
|
||||
| | 0x3XXX | some label definition; `XXX` is its entry index in the label table |
|
||||
| | 0x4XXX | some label reference; `XXX` is its entry index in the label table
|
||||
| | 0xFEXX | used to pass some raw value `XX` in place of a token id to a couple of functions that mention this as a feature. If the function doesn't mention it, it will lead to undefined behaviour |
|
||||
| | 0xFFFF | unrecognised token |
|
||||
|
||||
values of `XX` in `0x10XX`:
|
||||
|
||||
| XX | description |
|
||||
|------|-------------|
|
||||
| 0x00 | following byte is the token ID of some register |
|
||||
| 0x00 | following word is the token ID of some register |
|
||||
|
||||
values of `XX` in `0x20XX`:
|
||||
|
||||
| XX | description |
|
||||
|------|-------------|
|
||||
| 0x00 | following 8 bytes are the constant's value |
|
||||
|
||||
### example program
|
||||
|
||||
|
||||
@@ -2,16 +2,21 @@
|
||||
|
||||
LOAD_ADDR equ 0x00010000 ; address this program is loaded at
|
||||
|
||||
STACK_ADDR equ 0x00030000 ; address to put the 64-bit stack at
|
||||
|
||||
AWAITING_LABEL_TABLE_ADDR equ 0x00030000 ; address to store pending labels at
|
||||
AWAITING_LABEL_TABLE_SIZE equ 0x00010000
|
||||
LABEL_TABLE_ADDR equ 0x00040000 ; address to store labels at
|
||||
LABEL_TABLE_SIZE equ 0x00010000
|
||||
|
||||
TEST_ARENA_ADDR equ 0x00050000 ; address to run tests at
|
||||
TEST_ARENA_SIZE equ 0x1000 ; maximum size tests can use
|
||||
TEST_ARENA_SIZE equ 0x00010000 ; maximum size tests can use
|
||||
|
||||
TOKEN_TABLE_ADDR equ 0x00060000 ; address the token table is loaded at
|
||||
TOKEN_TABLE_SIZE equ 0x1000 ; max length of table
|
||||
TOKEN_TABLE_SIZE equ 0x00010000 ; max length of table
|
||||
|
||||
OUTPUT_ADDR equ 0x00070000 ; address of outputed binary
|
||||
OUTPUT_SIZE equ 0x1000 ; max length of outputed binary
|
||||
|
||||
STACK_ADDR equ 0x00060000 ; address to put the 64-bit stack at
|
||||
OUTPUT_SIZE equ 0x00010000 ; max length of outputed binary
|
||||
|
||||
UNRECOGNISED_TOKEN_ID equ 0xFFFF ; id of an unrecognised token
|
||||
UNRECOGNISED_ID_TYPE equ 0x0F ; type of an unrecognised id
|
||||
@@ -27,6 +32,7 @@ E_WHITESPACE equ 1 << 2
|
||||
E_COMMA equ 1 << 3
|
||||
E_OPERATOR equ 1 << 4
|
||||
E_OPERAND equ 1 << 5
|
||||
E_LABEL equ 1 << 6
|
||||
|
||||
[bits 64]
|
||||
[org LOAD_ADDR]
|
||||
@@ -43,6 +49,7 @@ start:
|
||||
call run_tests
|
||||
|
||||
call clear_token_table
|
||||
call clear_label_tables
|
||||
|
||||
mov rdi, program ; -> program
|
||||
mov rsi, [program.size] ; = size of program
|
||||
@@ -98,6 +105,7 @@ assemble:
|
||||
|
||||
cmp al, 0x1 ; check if next tte is an operator
|
||||
je .operator ; if so, handle
|
||||
|
||||
jmp .unexpected_token ; otherwise, fail
|
||||
|
||||
.operator: ; if next tte's type is an operator:
|
||||
@@ -120,7 +128,6 @@ assemble:
|
||||
je .operator_2 ; if so, handle case of two operands
|
||||
|
||||
jmp .unexpected_token
|
||||
|
||||
.operator_0:
|
||||
mov rsi, .msg_operator_0
|
||||
call print.debug
|
||||
@@ -133,7 +140,6 @@ assemble:
|
||||
call .write_byte
|
||||
|
||||
jmp .loop_next_token
|
||||
|
||||
.operator_1:
|
||||
mov rsi, .msg_operator_1
|
||||
call print.debug
|
||||
@@ -167,12 +173,10 @@ assemble:
|
||||
je .operator_1_register
|
||||
|
||||
jmp .unexpected_token
|
||||
|
||||
.operator_1_memory:
|
||||
mov rsi, .msg_operator_1_memory
|
||||
call print.debug
|
||||
jmp .unsupported_memory_access
|
||||
|
||||
.operator_1_register:
|
||||
mov rsi, .msg_operator_1_register
|
||||
call print.debug
|
||||
@@ -213,7 +217,6 @@ assemble:
|
||||
call .write_byte
|
||||
|
||||
jmp .loop_next_token
|
||||
|
||||
.operator_2:
|
||||
mov rsi, .msg_operator_2
|
||||
call print.debug
|
||||
@@ -240,7 +243,6 @@ assemble:
|
||||
je .operator_2_register
|
||||
|
||||
jmp .unexpected_token
|
||||
|
||||
.operator_2_memory:
|
||||
mov rsi, .msg_operator_2_memory
|
||||
call print.debug
|
||||
@@ -319,7 +321,6 @@ assemble:
|
||||
|
||||
cmp al, 11b ; 64 bit
|
||||
je .operator_2_memory_register_64
|
||||
|
||||
.operator_2_memory_register_16:
|
||||
mov al, 0x66
|
||||
call .push_byte
|
||||
@@ -337,7 +338,6 @@ assemble:
|
||||
call .write_byte
|
||||
|
||||
jmp .loop_next_token
|
||||
|
||||
.operator_2_register:
|
||||
mov rsi, .msg_operator_2_register
|
||||
call print.debug
|
||||
@@ -398,7 +398,6 @@ assemble:
|
||||
je .operator_2_register_register ; if so, handle
|
||||
|
||||
jmp .unexpected_token
|
||||
|
||||
.operator_2_register_memory:
|
||||
push rsi
|
||||
mov rsi, .msg_operator_2_register_memory
|
||||
@@ -441,7 +440,6 @@ assemble:
|
||||
call .write_byte
|
||||
|
||||
jmp .loop_next_token
|
||||
|
||||
.operator_2_register_register:
|
||||
push rsi
|
||||
mov rsi, .msg_operator_2_register_register
|
||||
@@ -543,7 +541,6 @@ assemble:
|
||||
|
||||
jmp .operator_2_register_register_continue
|
||||
.operator_2_register_register_continue:
|
||||
|
||||
push rsi
|
||||
mov esi, edi ; si = reg; src tte
|
||||
pop rdi ; di = r/m; dst tte
|
||||
@@ -658,7 +655,7 @@ assemble:
|
||||
call .output_byte
|
||||
mov byte [ecx], 0x00
|
||||
jmp .flush_write_buffer_loop
|
||||
.flush_write_buffer_break
|
||||
.flush_write_buffer_break:
|
||||
mov dword [.buffer_pointer], .buffer
|
||||
pop rax
|
||||
pop rcx
|
||||
@@ -680,6 +677,7 @@ assemble:
|
||||
.msg_operator_2_register db "operator_2_register", 0x0A, 0x00
|
||||
.msg_operator_2_register_memory db "operator_2_register_memory", 0x0A, 0x00
|
||||
.msg_operator_2_register_register db "operator_2_register_register", 0x0A, 0x00
|
||||
.msg_potential_label db "potential_label", 0x0A, 0x00
|
||||
|
||||
; ------------------------------------------------------------------------------
|
||||
; get_tte_type
|
||||
@@ -956,9 +954,11 @@ tokenise:
|
||||
pop rsi ; rsi -> last byte of program
|
||||
jnz .skip_byte_whitespace
|
||||
|
||||
test byte [.expecting], E_OPERATOR ; check if an operator is expected
|
||||
test byte [.expecting], E_LABEL ; check if a label is expected
|
||||
jnz .label ; if so, handle it
|
||||
test byte [.expecting], E_OPERATOR ; else, check if an operator is expected
|
||||
jnz .operator ; if so, handle it
|
||||
jmp .operand ; otherwise, handle as an operand
|
||||
jmp .operand ; else, handle as an operand
|
||||
|
||||
.comment:
|
||||
push rsi
|
||||
@@ -1021,11 +1021,72 @@ tokenise:
|
||||
test byte [.expecting], E_NEWLINE ; make sure a newline was expected
|
||||
jz .unexpected_newline ; if not, error
|
||||
|
||||
mov byte [.expecting], E_COMMENT | E_NEWLINE | E_WHITESPACE | E_OPERATOR
|
||||
mov byte [.expecting], E_COMMENT | E_NEWLINE | E_WHITESPACE | E_OPERATOR | E_LABEL
|
||||
|
||||
inc rdi
|
||||
jmp .loop
|
||||
|
||||
.label:
|
||||
push rax
|
||||
xor eax, eax ; rax = number of bytes in label
|
||||
.label_loop:
|
||||
mov dl, [rdi + rax] ; next byte
|
||||
cmp dl, ":"
|
||||
je .label_break
|
||||
cmp dl, " "
|
||||
je .label_not_found
|
||||
cmp dl, 0x0A
|
||||
je .label_not_found
|
||||
cmp dl, 0x00
|
||||
je .label_not_found
|
||||
cmp dl, ";"
|
||||
je .label_not_found
|
||||
inc eax ; inc byte counter
|
||||
cmp rdi, rsi
|
||||
jge .break
|
||||
jmp .label_loop
|
||||
.label_break:
|
||||
push rsi
|
||||
mov rsi, .found
|
||||
call print.debug
|
||||
mov rsi, .msg_label
|
||||
call print
|
||||
pop rsi ; rsi -> last byte of program
|
||||
|
||||
push rax
|
||||
push rdi
|
||||
push rsi
|
||||
|
||||
mov rsi, rdi ; rsi -> start of string
|
||||
mov rdi, rax ; rdi = size of string
|
||||
call djb2
|
||||
; rax = hash
|
||||
mov rdi, rax ; rdi = hash
|
||||
call add_label_hash
|
||||
; rax = index on label table
|
||||
mov cx, ax
|
||||
and cx, 0x0FFF
|
||||
or cx, 0x3000
|
||||
|
||||
pop rsi ; rsi -> last byte of program
|
||||
pop rdi ; rdi -> current byte of program
|
||||
pop rax ; rax = number of bytes in label
|
||||
|
||||
add rdi, rax ; move on to next byte
|
||||
inc rdi ; move past the colon
|
||||
|
||||
pop rax ; rax = number of tokens processed
|
||||
|
||||
mov [TOKEN_TABLE_ADDR + rax * 2], cx
|
||||
inc rax ; the next token
|
||||
|
||||
mov byte [.expecting], E_COMMENT | E_NEWLINE | E_WHITESPACE
|
||||
jmp .loop
|
||||
.label_not_found:
|
||||
pop rax ; rax = number of tokens processed
|
||||
mov byte [.expecting], E_COMMENT | E_NEWLINE | E_WHITESPACE | E_OPERATOR
|
||||
jmp .loop
|
||||
|
||||
.operator:
|
||||
; debug message
|
||||
push rsi
|
||||
@@ -1038,7 +1099,6 @@ tokenise:
|
||||
mov rcx, rax ; rcx = number of tokens processed
|
||||
xor eax, eax ; eax = number of bytes in operator
|
||||
mov [.pending_operator], eax ; zero pending operator
|
||||
|
||||
.operator_loop:
|
||||
; TODO give this its own error
|
||||
|
||||
@@ -1063,7 +1123,6 @@ tokenise:
|
||||
cmp rdi, rsi
|
||||
jge .break
|
||||
jmp .operator_loop ; and loop
|
||||
|
||||
.operator_break:
|
||||
; rax already pushed from .operator
|
||||
push rdi
|
||||
@@ -1097,7 +1156,6 @@ tokenise:
|
||||
push rax
|
||||
push rdi
|
||||
xor eax, eax ; rax = length of operand
|
||||
|
||||
.operand_loop:
|
||||
mov dl, [rdi]
|
||||
|
||||
@@ -1113,7 +1171,6 @@ tokenise:
|
||||
inc rax ; inc length counter
|
||||
inc rdi ; inc byte pointer
|
||||
jmp .operand_loop
|
||||
|
||||
.operand_break:
|
||||
pop rdi ; rdi -> first byte of operand
|
||||
push rdi
|
||||
@@ -1121,9 +1178,11 @@ tokenise:
|
||||
mov rsi, rax ; rsi = length of operand in bytes
|
||||
|
||||
mov cx, ax ; cx = length counter for safe keeping
|
||||
push rcx
|
||||
call evaluate_operand
|
||||
; dl = return code
|
||||
; rax = binary data
|
||||
pop rcx
|
||||
pop rsi
|
||||
pop rdi ; rdi = first byte of operand
|
||||
add di, cx ; rdi = last byte of operand
|
||||
@@ -1131,31 +1190,48 @@ tokenise:
|
||||
pop rax ; rax = number of tokens processed
|
||||
|
||||
; operand is some reg
|
||||
; cx = token ID
|
||||
cmp dl, 0x00
|
||||
; cx = token ID
|
||||
je .operand_register
|
||||
|
||||
; operand is some [reg]
|
||||
; cx = token ID
|
||||
cmp dl, 0x10
|
||||
; cx = token ID
|
||||
je .operand_addr_register
|
||||
|
||||
jmp .unexpected_operand
|
||||
; operand is some constant
|
||||
cmp dl, 0x20
|
||||
; rcx = constant value
|
||||
je .operand_constant
|
||||
|
||||
; cx = token ID
|
||||
; operand is some label
|
||||
cmp dl, 0x30
|
||||
; rcx = index of label in LT
|
||||
je .operand_label
|
||||
|
||||
jmp .unexpected_operand
|
||||
.operand_register:
|
||||
mov [TOKEN_TABLE_ADDR + rax * 2], cx
|
||||
inc rax ; another token processed
|
||||
jmp .operand_break_continue
|
||||
|
||||
; cx = token ID
|
||||
.operand_addr_register:
|
||||
mov word [TOKEN_TABLE_ADDR + rax * 2], 0x1000
|
||||
inc rax ; 0x1000: addr reg token, next token is the register
|
||||
mov [TOKEN_TABLE_ADDR + rax * 2], cx
|
||||
inc rax ; the register as returned by evaluate_operand
|
||||
jmp .operand_break_continue
|
||||
|
||||
.operand_constant:
|
||||
mov word [TOKEN_TABLE_ADDR + rax * 2], 0x2000
|
||||
inc rax ; another token processed
|
||||
mov [TOKEN_TABLE_ADDR + rax * 2], rcx
|
||||
add rax, 4
|
||||
jmp .operand_break_continue
|
||||
.operand_label:
|
||||
and cx, 0x0FFF
|
||||
or cx, 0x3000
|
||||
mov [TOKEN_TABLE_ADDR + rax * 2], cx
|
||||
inc rax
|
||||
jmp .operand_break_continue
|
||||
.operand_break_continue:
|
||||
mov byte [.expecting], E_COMMENT | E_NEWLINE | E_WHITESPACE | E_COMMA
|
||||
jmp .loop
|
||||
@@ -1164,8 +1240,7 @@ tokenise:
|
||||
ret
|
||||
|
||||
; state
|
||||
|
||||
.expecting db E_COMMENT | E_NEWLINE | E_WHITESPACE | E_OPERATOR
|
||||
.expecting db E_COMMENT | E_NEWLINE | E_WHITESPACE | E_OPERATOR | E_LABEL
|
||||
|
||||
.unexpected_whitespace:
|
||||
mov rsi, .err_unexpected
|
||||
@@ -1209,6 +1284,7 @@ tokenise:
|
||||
.msg_comment db "comment.", 0x0A, 0x00
|
||||
.msg_newline db "newline.", 0x0A, 0x00
|
||||
.msg_comma db "comma.", 0x0A, 0x00
|
||||
.msg_label db "label.", 0x0A, 0x00
|
||||
.msg_operator db "operator.", 0x0A, 0x00
|
||||
.msg_operand db "operand.", 0x0A, 0x00
|
||||
.pending_operator dd 0 ; the operator token that is pending processing
|
||||
@@ -1220,10 +1296,12 @@ tokenise:
|
||||
; takes the location and length of an operand and evaluates it into binary data
|
||||
; and a return code to interpret the binary data.
|
||||
;
|
||||
; | code | rsi contents | notes |
|
||||
; | code | rax contents | notes |
|
||||
; |------|----------------------|-------|
|
||||
; | 0x00 | token ID of register | reg |
|
||||
; | 0x10 | token ID of register | [reg] |
|
||||
; | 0x20 | constant value | const |
|
||||
; | 0x30 | index of label in LT | label |
|
||||
; | 0xFF | - | error |
|
||||
;
|
||||
; parameters:
|
||||
@@ -1239,6 +1317,7 @@ evaluate_operand:
|
||||
push rdi ; rdi -> start of operand
|
||||
; rsi = size of operand
|
||||
call trim_trailing_whitespace
|
||||
; rax = new size of operand
|
||||
|
||||
pop rdi ; rdi -> first byte of operand
|
||||
mov rsi, rax ; rsi = size of operand w/o trailing whitespace
|
||||
@@ -1249,7 +1328,7 @@ evaluate_operand:
|
||||
cmp byte [rdi], '[' ; case: memory addressing
|
||||
je .address
|
||||
|
||||
jmp .register ; otherwise: register
|
||||
jmp .register ; otherwise: register (or constant, or label)
|
||||
|
||||
.address:
|
||||
cmp byte [rdi + rsi - 1], ']' ; check if address is closed correctly
|
||||
@@ -1262,12 +1341,13 @@ evaluate_operand:
|
||||
cmp dl, 0x10 ; make sure return code isn't another memory reference
|
||||
je .unrecognised ; if it is, fail
|
||||
|
||||
or dl, 0x10 ; flip bit for address return
|
||||
shr edx, 4
|
||||
or dl, 0x10 ; address return
|
||||
ret
|
||||
|
||||
.register:
|
||||
cmp rsi, 4
|
||||
jg .unrecognised
|
||||
jg .constant ; not a register: too long. Maybe constant?
|
||||
push rdi
|
||||
mov edi, [rdi] ; edi = register to be searched
|
||||
|
||||
@@ -1288,18 +1368,47 @@ evaluate_operand:
|
||||
.register3:
|
||||
and edi, 0xFFFFFF
|
||||
.register4:
|
||||
|
||||
call identify_register
|
||||
; ax = register's token ID or UNRECOGNISED_TOKEN_ID
|
||||
pop rdi
|
||||
pop rdi ; rdi -> first byte of operand
|
||||
|
||||
cmp ax, UNRECOGNISED_TOKEN_ID
|
||||
je .unrecognised
|
||||
cmp ax, UNRECOGNISED_TOKEN_ID ; if not a register, constant?
|
||||
je .constant
|
||||
|
||||
mov dl, 0x00
|
||||
ret
|
||||
|
||||
.constant:
|
||||
push rdi
|
||||
push rsi
|
||||
; rdi -> first byte of constant
|
||||
; rsi = size of constant in bytes
|
||||
call evaluate_constant
|
||||
; dl = type of constant
|
||||
; rax = hex value of constant
|
||||
pop rdi ; rdi = size of label in bytes
|
||||
pop rsi ; rsi -> first byte of label
|
||||
|
||||
cmp dl, 0xFF
|
||||
je .label
|
||||
|
||||
; rax = hex value of constant
|
||||
mov dl, 0x20
|
||||
ret
|
||||
|
||||
.label:
|
||||
; rdi = size of label in bytes
|
||||
; rsi -> first byte of label
|
||||
call djb2
|
||||
; rax = hash
|
||||
mov rdi, rax ; rdi = hash
|
||||
call add_label_hash
|
||||
; rax = index in LT of label
|
||||
mov dl, 0x30
|
||||
ret
|
||||
|
||||
.unrecognised:
|
||||
xor eax, eax
|
||||
mov dl, 0xFF
|
||||
ret
|
||||
|
||||
@@ -1353,6 +1462,7 @@ evaluate_constant:
|
||||
je .chr
|
||||
pop rcx
|
||||
|
||||
push rcx ; waste value; .unrecognise expects something on the stack
|
||||
jmp .unrecognised
|
||||
|
||||
.numeric:
|
||||
@@ -1396,12 +1506,10 @@ evaluate_constant:
|
||||
cmp dl, 9 ; if !digit:
|
||||
jg .hex_alpha ; letter
|
||||
jmp .hex_continue ; else loop
|
||||
|
||||
.hex_alpha:
|
||||
sub dl, 7 ; map [('A'-'0')..('F'-'0')] to [0xA..0xF]
|
||||
cmp dl, 0xF ; if not in the range [0xA..0xF]
|
||||
jg .unrecognised ; then unrecognised
|
||||
|
||||
.hex_continue:
|
||||
and dl, 0x0F ; mask
|
||||
or al, dl ; and add newest nibble
|
||||
@@ -1471,14 +1579,12 @@ evaluate_constant:
|
||||
inc rdi
|
||||
|
||||
jmp .chr
|
||||
|
||||
.chr_break:
|
||||
cmp rcx, 1 ; for each [1..rcx]
|
||||
jle .chr_break_for_good
|
||||
rol rax, 8 ; roll left to make up for the roll right earlier
|
||||
dec rcx
|
||||
jmp .chr_break
|
||||
|
||||
.chr_break_for_good:
|
||||
mov dl, [rdi] ; make sure the chr is closed
|
||||
cmp dl, '"'
|
||||
@@ -1492,8 +1598,9 @@ evaluate_constant:
|
||||
|
||||
.unrecognised:
|
||||
pop rdx
|
||||
mov rdx, 0xFF ; unrecognised type
|
||||
mov edx, 0xFF ; unrecognised type
|
||||
ret
|
||||
.msg db "evaluate_constant", 0x0A, 0x00
|
||||
|
||||
; ------------------------------------------------------------------------------
|
||||
; identify_register
|
||||
@@ -1750,6 +1857,38 @@ trim_trailing_whitespace:
|
||||
mov rax, rsi
|
||||
ret
|
||||
|
||||
; ------------------------------------------------------------------------------
|
||||
; add_label_hash
|
||||
;
|
||||
; description:
|
||||
; adds a label hash to the label table, or just finds it if already present
|
||||
;
|
||||
; parameters
|
||||
; rdi = 64-bit hash to be added
|
||||
;
|
||||
; returned
|
||||
; rax = index in label table
|
||||
; ------------------------------------------------------------------------------
|
||||
|
||||
add_label_hash:
|
||||
xor eax, eax
|
||||
.loop:
|
||||
cmp rax, LABEL_TABLE_SIZE
|
||||
jge .break
|
||||
mov rcx, [LABEL_TABLE_ADDR + rax]
|
||||
; TODO bug if there's an empty slot before the entry, it won't be found
|
||||
cmp rcx, 0 ; empty slot
|
||||
je .break
|
||||
cmp rcx, rdi ; already present
|
||||
je .break
|
||||
add rax, 16
|
||||
jmp .loop
|
||||
.break:
|
||||
mov [LABEL_TABLE_ADDR + rax], rdi
|
||||
shr rax, 4 ; rax / 16
|
||||
; rax = index
|
||||
ret
|
||||
|
||||
; ------------------------------------------------------------------------------
|
||||
; clear_token_table
|
||||
;
|
||||
@@ -1764,6 +1903,26 @@ clear_token_table:
|
||||
rep stosd
|
||||
ret
|
||||
|
||||
; ------------------------------------------------------------------------------
|
||||
; clear_label_tables
|
||||
;
|
||||
; description:
|
||||
; clears the label table as specified by LABEL_TABLE_SIZE and LABEL_TABLE_ADDR
|
||||
; and the awaiting label table as specified by AWAITING_LABEL_TABLE_SIZE and
|
||||
; AWAITING_LABEL_TABLE_ADDR
|
||||
; ------------------------------------------------------------------------------
|
||||
|
||||
clear_label_tables:
|
||||
xor eax, eax ; value to write
|
||||
mov ecx, LABEL_TABLE_SIZE / 4 ; number of double words
|
||||
mov edi, LABEL_TABLE_ADDR ; address to start
|
||||
rep stosd
|
||||
xor eax, eax ; value to write
|
||||
mov ecx, AWAITING_LABEL_TABLE_SIZE / 4 ; number of double words
|
||||
mov edi, AWAITING_LABEL_TABLE_ADDR ; address to start
|
||||
rep stosd
|
||||
ret
|
||||
|
||||
; ------------------------------------------------------------------------------
|
||||
; clear_test_arena
|
||||
;
|
||||
@@ -1772,9 +1931,9 @@ clear_token_table:
|
||||
; ------------------------------------------------------------------------------
|
||||
|
||||
clear_test_arena:
|
||||
xor eax, eax ; value to write
|
||||
mov ecx, TOKEN_TABLE_SIZE / 4 ; number of double words
|
||||
mov edi, TOKEN_TABLE_ADDR ; address to start
|
||||
xor eax, eax ; value to write
|
||||
mov ecx, TEST_ARENA_SIZE / 4 ; number of double words
|
||||
mov edi, TEST_ARENA_ADDR ; address to start
|
||||
rep stosd
|
||||
ret
|
||||
|
||||
@@ -2341,22 +2500,76 @@ whitespace_2 db " ", 0x0D
|
||||
|
||||
; test program
|
||||
program:
|
||||
db "xor eax, eax", 0x0A
|
||||
db "mov rax, rax", 0x0A
|
||||
db "mov rax, rbx", 0x0A
|
||||
db "mov eax, ebx", 0x0A
|
||||
db "mov ax, bx", 0x0A
|
||||
db "inc rax ; inline comment", 0x0A
|
||||
db "dec rax", 0x0A
|
||||
db "; one line comment", 0x0A
|
||||
db "mov rdx, [rax]", 0x0A
|
||||
db "mov [rax], rdx", 0x0A
|
||||
db "mov [rcx], rbx", 0x0A
|
||||
db "mov rcx, [rbx]", 0x0A
|
||||
db "mov rcx, [ebx]", 0x0A
|
||||
db "mov ecx, [ebx]", 0x0A
|
||||
db "mov cx, [ebx]", 0x0A
|
||||
db "hlt", 0x0A
|
||||
db "print:", 0x0A
|
||||
db " push rdx", 0x0A
|
||||
db " push rax", 0x0A
|
||||
db " push rsi", 0x0A
|
||||
db "", 0x0A
|
||||
db " mov edx, 0x3F8", 0x0A
|
||||
db " .loop:", 0x0A
|
||||
db " mov al, [rsi]", 0x0A
|
||||
db " cmp al, 0x00", 0x0A
|
||||
db " je .done", 0x0A
|
||||
db " out dx, al", 0x0A
|
||||
db " inc rsi", 0x0A
|
||||
db " jmp .loop", 0x0A
|
||||
db " .done:", 0x0A
|
||||
db " pop rsi", 0x0A
|
||||
db " pop rax", 0x0A
|
||||
db " pop rdx", 0x0A
|
||||
db " ret", 0x0A
|
||||
db " .debug:", 0x0A
|
||||
db " push rsi", 0x0A
|
||||
db " mov rsi, .debug_msg", 0x0A
|
||||
db " call print", 0x0A
|
||||
db " pop rsi", 0x0A
|
||||
db " jmp print ; tail call", 0x0A
|
||||
db " .error:", 0x0A
|
||||
db " push rsi", 0x0A
|
||||
db " mov rsi, .error_msg", 0x0A
|
||||
db " call print", 0x0A
|
||||
db " pop rsi", 0x0A
|
||||
db " jmp print ; tail call", 0x0A
|
||||
db " .test:", 0x0A
|
||||
db " push rsi", 0x0A
|
||||
db " mov rsi, .test_msg", 0x0A
|
||||
db " call print", 0x0A
|
||||
db " pop rsi", 0x0A
|
||||
db " jmp print ; tail call", 0x0A
|
||||
db " .warn:", 0x0A
|
||||
db " push rsi", 0x0A
|
||||
db " mov rsi, .warn_msg", 0x0A
|
||||
db " call print", 0x0A
|
||||
db " pop rsi", 0x0A
|
||||
db " jmp print ; tail call", 0x0A
|
||||
db " .debug_msg:", 0x0A
|
||||
db " db 0x1B", 0x0A
|
||||
db ' db "[36m"', 0x0A
|
||||
db ' db "[DEBUG]: "', 0x0A
|
||||
db " db 0x1B", 0x0A
|
||||
db ' db "[0m"', 0x0A
|
||||
db " db 0x00", 0x0A
|
||||
db " .error_msg:", 0x0A
|
||||
db " db 0x1B", 0x0A
|
||||
db ' db "[1;31m"', 0x0A
|
||||
db ' db "[ERROR]: "', 0x0A
|
||||
db " db 0x1B", 0x0A
|
||||
db ' db "[0m"', 0x0A
|
||||
db " db 0x00", 0x0A
|
||||
db " .test_msg:", 0x0A
|
||||
db " db 0x1B", 0x0A
|
||||
db ' db "[1;33m"', 0x0A
|
||||
db ' db "[TEST]: "', 0x0A
|
||||
db " db 0x1B", 0x0A
|
||||
db ' db "[0m"', 0x0A
|
||||
db " db 0x00", 0x0A
|
||||
db " .warn_msg:", 0x0A
|
||||
db " db 0x1B", 0x0A
|
||||
db ' db "[1;35m"', 0x0A
|
||||
db ' db "[WARN]: "', 0x0A
|
||||
db " db 0x1B", 0x0A
|
||||
db ' db "[0m"', 0x0A
|
||||
db " db 0x00", 0x0A
|
||||
.size dq $ - program
|
||||
|
||||
msg_end db "end of the binary ->|", 0x0A, 0x00
|
||||
|
||||
Reference in New Issue
Block a user