clean up some stuff

This commit is contained in:
andromeda
2026-03-25 21:14:34 +01:00
parent 31a438d1ee
commit 2960c1b795
3 changed files with 146 additions and 175 deletions

View File

@@ -160,7 +160,7 @@ the `type` hex digit is defined as the following:
| hex | meaning | examples |
|-----|----------|-|
| 0x0 | ignored | `; this entire comment is 1 token` |
| 0x0 | ignored | |
| 0x1 | operator | `mov`, `hlt` |
| 0x2 | register | `rsp`, `al` |
| 0xF | unknown | any token ID not represented in the lookup table |

View File

@@ -7,8 +7,6 @@ TEST_ARENA_SIZE equ 0x1000 ; maximum size tests can use
TOKEN_TABLE_ADDR equ 0x00060000 ; address the token table is loaded at
TOKEN_TABLE_SIZE equ 0x1000 ; max length of table
TOKEN_TABLE_ENTRY_SIZE equ 2 ; size of token table entry; things may break
; if this ever changes
OUTPUT_ADDR equ 0x00070000 ; address of outputed binary
OUTPUT_SIZE equ 0x1000 ; max length of outputed binary
@@ -49,11 +47,14 @@ start:
mov rdi, program ; -> program
mov rsi, [program.size] ; = size of program
call tokenise
; rax = number of tokens processed
; rax = number of tokens in token table
mov rdi, rax
push rdi
call clear_output_arena
pop rdi
pop rdi ; rdi = number of tokens in token table
call assemble
jmp halt
@@ -64,8 +65,6 @@ start:
; ------------------------------------------------------------------------------
; assemble
; TODO write tests
; TODO make it work :/ putting the cart before the horse
;
; description:
; assembles the program from tokens located at TOKEN_TABLE_ADDR into a flat
@@ -85,25 +84,33 @@ start:
; ------------------------------------------------------------------------------
assemble:
xor eax, eax ; rax = number of tokens processed
mov [.tokens_total], edi ; rdi = number of tokens in table
; TODO deal with src=imm and src=imm8
xor eax, eax
mov [.tokens_processed], eax ; eax = number of tokens processed
mov [.tokens_total], edi ; edi = total number of tokens in table
.loop:
xor edi, edi
mov di, [rax * TOKEN_TABLE_ENTRY_SIZE + TOKEN_TABLE_ADDR] ; next tte
call .get_next_tte
; di = tte
call get_tte_type
; al = type
cmp al, 0x1 ; check if next tte is an operator
je .operator ; if so, handle
jmp .unexpected_token ; otherwise, fail
; di = tte of operator
.operator: ; if next tte's type is an operator:
push rdi
; di = tte of operator
call get_tte_typed_metadata
call get_tte_typed_metadata
; al = tte typed metadata
pop rdi
cmp al, UNRECOGNISED_ID_METADATA ; make sure token has metadata on record
je .unexpected_token ; if not, fail
and al, 11b ; mask for # operands
cmp al, 0 ; check if operator has no operands
je .operator_0 ; if so, handle case of no operands
cmp al, 0 ; check if operator has no operands
je .operator_0 ; if so, handle case of no operands
cmp al, 1 ; check if operator has one operand
je .operator_1 ; if so, handle case of one operand
@@ -111,39 +118,25 @@ assemble:
cmp al, 2 ; check if operator has two operands
je .operator_2 ; if so, handle case of two operands
jmp .unexpected_token ; TODO actually check operator type or not first
; if get_tte_typed_metadata happens to return 0, 1,
; or 2 on a non-operator, it doesn't get caught
jmp .unexpected_token
; di = tte of operator
.operator_0:
push rsi
mov rsi, .msg_operator_0
call print.debug
pop rsi
push rdi
push rsi
; di = tte of operator
mov sil, 0b ; standard opcode
mov sil, 0b ; opcode
call get_opcode
; al = opcode
; dl = op flag (none)
; dl = 0x00
call .output_byte
pop rsi
pop rdi
jmp .loop_next_token
; di = tte of operator
.operator_1:
push rsi
mov rsi, .msg_operator_1
call print.debug
pop rsi
push rdi
push rsi
; di = tte of operator
mov sil, 0b ; dst=r/m
call get_opcode
@@ -152,13 +145,10 @@ assemble:
push rdx
call .output_byte
pop rdx ; dl = op flag
pop rsi
pop rdi ; di = tte of operator
call .next_token
jge .break
xor edi, edi
mov di, [rax * TOKEN_TABLE_ENTRY_SIZE + TOKEN_TABLE_ADDR] ; di = next tte
call .get_next_tte
push rdi
and di, 0xFF00
@@ -178,23 +168,19 @@ assemble:
jmp .loop_next_token
.operator_1_memory:
push rsi
mov rsi, .msg_operator_1_memory
call print.debug
pop rsi
jmp .unsupported_memory_access
.operator_1_register:
push rsi
mov rsi, .msg_operator_1_register
call print.debug
pop rsi
mov si, di ; si = `R/M` tte
and edx, 0xFF
or dx, 0xFE00 ; pass di as direct value
mov di, dx ; di = op flag
mov dl, 11b ; dl = mod bits
or edx, 0xFE00 ; pass di as direct value
mov edi, edx ; di = op flag
mov edx, 11b ; dl = mod bits
call get_ModRM
; al = Mod R/M byte
call .output_byte
@@ -202,17 +188,14 @@ assemble:
jmp .loop_next_token
.operator_2:
push rsi
mov rsi, .msg_operator_2
call print.debug
pop rsi
mov cx, di ; cx = tte of operator
call .next_token
jge .break
xor edi, edi
mov di, [rax * TOKEN_TABLE_ENTRY_SIZE + TOKEN_TABLE_ADDR] ; di = next tte
call .get_next_tte
push rdi
and di, 0xFF00
@@ -232,57 +215,49 @@ assemble:
jmp .loop_next_token
.operator_2_memory:
push rsi
mov rsi, .msg_operator_2_memory
call print.debug
pop rsi
cmp di, 0x1000 ; check if token is addressing a register
jne .unsupported_memory_access ; if not, unsupported :/
jne .unsupported_memory_access ; if not, unsupported
push rdi
mov di, cx ; di = tte of operator
mov sil, 0 ; dst = r/m
mov edi, ecx ; di = tte of operator
xor esi, esi ; dst=r/m; src=r
call get_opcode
; al = opcode
; dl = op flag
; TODO act accordingly if the op flag is present
call .output_byte
pop rdi
call .next_token
jge .break
xor edi, edi
mov di, [rax * TOKEN_TABLE_ENTRY_SIZE + TOKEN_TABLE_ADDR] ; di = next tte
call .get_next_tte
mov si, di ; si = dst tte
mov si, di ; si = dst register tte
call .next_token
jge .break
xor edi, edi
mov di, [rax * TOKEN_TABLE_ENTRY_SIZE + TOKEN_TABLE_ADDR] ; di = next tte
call .get_next_tte
push rdi
and di, 0xFF00
cmp di, 0x1000 ; check if token is a memory address
pop rdi ; di = next tte
je .unsupported_memory_access ; no case of *],[* in asm
je .unsupported_memory_access ; if so, fail; no case of *],[* in asm
; di = next tte
call get_tte_type
; al = type of token
cmp al, 0x02
je .operator_2_memory_register
cmp al, 0x02 ; check if token is a register
je .operator_2_memory_register ; if so, handle
jmp .loop_next_token
jmp .unexpected_token
.operator_2_memory_register:
push rsi
mov rsi, .msg_operator_2_memory_register
call print.debug
pop rsi
pop rsi ; si = r/m
; si = r/m; dst tte
; di = reg; src tte
@@ -294,10 +269,8 @@ assemble:
jmp .loop_next_token
.operator_2_register:
push rsi
mov rsi, .msg_operator_2_register
call print.debug
pop rsi
push rdi
mov di, cx ; di = tte of operator
@@ -307,14 +280,13 @@ assemble:
; dl = op flag
; TODO do something if the op flag is present
call .output_byte
pop rdi
pop rdi ; di = dst tte
mov si, di ; si = dst tte
call .next_token
jge .break
xor edi, edi
mov di, [rax * TOKEN_TABLE_ENTRY_SIZE + TOKEN_TABLE_ADDR] ; di = next tte
call .get_next_tte
push rdi
and di, 0xFF00
@@ -326,31 +298,30 @@ assemble:
call get_tte_type
; al = type of token
cmp al, 0x02
je .operator_2_register_register
cmp al, 0x02 ; check if token is a register
je .operator_2_register_register ; if so, handle
jmp .loop_next_token
jmp .unexpected_token
.operator_2_register_memory:
push rsi
mov rsi, .msg_operator_2_register_memory
call print.debug
pop rsi
pop rsi ; si = dst tte
cmp di, 0x1000 ; check if token is addressing to a register
jne .unsupported_memory_access ; if not, unsupported :/
jne .unsupported_memory_access ; if not, unsupported
call .next_token
jge .break
xor edi, edi
mov di, [rax * TOKEN_TABLE_ENTRY_SIZE + TOKEN_TABLE_ADDR] ; di = next tte
call .get_next_tte
; si = `R/M` tte
; di = `reg` tte
; si = r/m; dst tte
; di = reg; src tte
push rsi
mov si, di
pop rdi
mov dl, 00b ; dl = mod bits
mov esi, edi ; si = reg; src tte
pop rdi ; di = r/m; dst tte
mov edx, 00b ; dl = mod bits
call get_ModRM
; al = Mod R/M byte
call .output_byte
@@ -358,15 +329,13 @@ assemble:
jmp .loop_next_token
.operator_2_register_register:
push rsi
mov rsi, .msg_operator_2_register_register
call print.debug
pop rsi
push rsi
mov si, di ; si = reg; src tte
pop rdi ; di = r/m; dst tte
mov dl, 11b ; dl = mod bits
mov esi, edi ; si = reg; src tte
pop rdi ; di = r/m; dst tte
mov edx, 11b ; dl = mod bits
call get_ModRM
; al = Mod R/M byte
call .output_byte
@@ -402,6 +371,13 @@ assemble:
cmp eax, edi
ret
; eax = current entry index in token table
; returns di = next tte
.get_next_tte:
xor edi, edi
mov di, [eax * 2 + TOKEN_TABLE_ADDR]
ret
.tokens_processed dd 0
.tokens_total dd 0
@@ -444,27 +420,27 @@ assemble:
; ------------------------------------------------------------------------------
get_tte_type:
and rdi, 0xFFFF ; mask input so it behaves as expected
xor eax, eax
and edi, 0xFFFF ; di = token table entry
xor eax, eax ; eax = tokens.by_id index
.loop:
cmp rax, (tokens.by_id_end - tokens.by_id) / 4 ; make sure it's still in range
cmp eax, (tokens.by_id_end - tokens.by_id) / 4 ; index range check
jg .not_found
mov cx, [tokens.by_id + rax * 4] ; next entry in tokens.by_id
mov cx, [tokens.by_id + eax * 4] ; next entry in tokens.by_id
cmp cx, di
je .found
inc rax
inc eax
jmp .loop
.not_found:
mov al, UNRECOGNISED_ID_TYPE
and ax, 0xF ; mask as expected
mov eax, UNRECOGNISED_ID_TYPE
and eax, 0xF
ret
.found:
mov al, [2 + tokens.by_id + rax * 4]
and ax, 0xF ; mask as expected
mov al, [2 + tokens.by_id + eax * 4]
and eax, 0xF ; mask as expected
ret
; ------------------------------------------------------------------------------
@@ -483,27 +459,26 @@ get_tte_type:
; ------------------------------------------------------------------------------
get_tte_typed_metadata:
and rdi, 0xFFFF ; mask input so it behaves as expected
xor eax, eax
and edi, 0xFFFF ; di = token table entry
xor eax, eax ; eax = tokens.by_id index
.loop:
cmp rax, (tokens.by_id_end - tokens.by_id) / 4 ; make sure it's still in range
cmp eax, (tokens.by_id_end - tokens.by_id) / 4 ; index range check
jg .not_found
mov cx, [tokens.by_id + rax * 4] ; next entry in tokens.by_id
mov cx, [tokens.by_id + eax * 4] ; next entry in tokens.by_id
cmp cx, di
je .found
inc rax
inc eax
jmp .loop
.not_found:
xor eax, eax
mov al, UNRECOGNISED_ID_METADATA
mov eax, UNRECOGNISED_ID_METADATA
ret
.found:
mov al, [3 + tokens.by_id + rax * 4]
and rax, 0xFF
mov al, [3 + tokens.by_id + eax * 4]
and eax, 0xFF
ret
; ------------------------------------------------------------------------------
@@ -524,11 +499,14 @@ get_tte_typed_metadata:
get_ModRM:
push rbx
and dl, 11b ; mask for mod bits
shl dl, 6
and edi, 0xFFFF ; di = token table entry `reg`
and esi, 0xFFFF ; si = token table entry `R/M`
and edx, 11b ; dl = mod bits
shl edx, 6 ; and position
push rdi
shr di, 8
shr edi, 8
cmp dil, 0xFE
pop rdi
je .pass_di_as_op_flag
@@ -541,25 +519,25 @@ get_ModRM:
jmp .continue
.pass_di_as_op_flag:
mov bl, dil ; bl = op flag
and bl, 111b ; mask
mov ebx, edi ; bl = op flag
and ebx, 111b
.continue:
shl bl, 3
shl ebx, 3
mov rdi, rsi ; do the other one
mov edi, esi ; do the other one
; di = tte
call get_reg_bits
; al = reg bits
mov cl, al
mov ecx, eax
xor eax, eax
or al, dl ; mod bits
or al, bl ; reg bits
or al, cl ; R/M bits
and rax, 0xFF ; mask for byte
or eax, edx ; mod bits
or eax, ebx ; reg bits
or eax, ecx ; R/M bits
and eax, 0xFF ; mask for byte
pop rbx
ret
@@ -581,49 +559,45 @@ get_ModRM:
; ------------------------------------------------------------------------------
get_opcode:
and rdi, 0xFFFF
and edi, 0xFFFF ; di = token table entry
add rsi, 2
and rsi, 111b
sub rsi, 2
add esi, 2
and esi, 111b ; offset within opcode entry
sub esi, 2 ; between 0 and 5
xor eax, eax
xor eax, eax ; eax = opcodes.by_id index
.loop:
cmp rax, (opcodes.by_id_end - opcodes.by_id) / 16 ; make sure it's still in range
cmp eax, (opcodes.by_id_end - opcodes.by_id) / 16 ; make sure it's still in range
jg .not_found
shl rax, 4
mov cx, [opcodes.by_id + rax] ; next entry in opcodes.by_id
shr rax, 4
shl eax, 4
mov cx, [opcodes.by_id + eax] ; next entry in opcodes.by_id
shr eax, 4
cmp cx, di
je .found
inc rax
inc eax
jmp .loop
.not_found:
xor eax, eax
mov al, UNRECOGNISED_ID_OPCODE
mov eax, UNRECOGNISED_ID_OPCODE
ret
.found:
shl rax, 4
shl eax, 4
push rsi
shr rsi, 1
mov dl, [rsi + 8 + opcodes.by_id + rax]
shr esi, 1
mov dl, [esi + 8 + opcodes.by_id + eax]
pop rsi
push rsi
and rsi, 1
cmp esi, 1 ; check if offset is odd
pop rsi
jne .found_continue
shr dl, 4 ; if so, actually 1 further on dl byte
test esi, 1 ; check if offset is odd
jz .found_continue
shr edx, 4 ; if so, upper part of dl byte
.found_continue:
mov al, [rsi + 2 + opcodes.by_id + rax]
and rax, 0xFF ; mask
and rdx, 0x0F ; mask
mov al, [esi + 2 + opcodes.by_id + eax]
and eax, 0xFF
and edx, 0x0F
ret
; ------------------------------------------------------------------------------
@@ -644,8 +618,8 @@ get_reg_bits:
; di = tte
call get_tte_typed_metadata
; al = typed metadata
shr al, 2 ; discard type data
and al, 111b ; mask
shr eax, 2 ; discard type data
and eax, 111b ; mask
ret
; ------------------------------------------------------------------------------
@@ -823,7 +797,7 @@ tokenise:
pop rdi ; rdi = byte counter
pop rax ; rax = tokens processed
mov [TOKEN_TABLE_ADDR + rax * TOKEN_TABLE_ENTRY_SIZE], cx
mov [TOKEN_TABLE_ADDR + rax * 2], cx
inc rax ; plus 1 token processed
mov byte [.expecting], E_COMMENT | E_NEWLINE | E_WHITESPACE | E_OPERAND
@@ -891,15 +865,15 @@ tokenise:
; cx = token ID
.operand_register:
mov [TOKEN_TABLE_ADDR + rax * TOKEN_TABLE_ENTRY_SIZE], cx
mov [TOKEN_TABLE_ADDR + rax * 2], cx
inc rax ; another token processed
jmp .operand_break_continue
; cx = token ID
.operand_addr_register:
mov word [TOKEN_TABLE_ADDR + rax * TOKEN_TABLE_ENTRY_SIZE], 0x1000
mov word [TOKEN_TABLE_ADDR + rax * 2], 0x1000
inc rax ; 0x1000: addr reg token, next token is the register
mov [TOKEN_TABLE_ADDR + rax * TOKEN_TABLE_ENTRY_SIZE], cx
mov [TOKEN_TABLE_ADDR + rax * 2], cx
inc rax ; the register as returned by evaluate_operand
jmp .operand_break_continue
@@ -983,11 +957,8 @@ tokenise:
; ------------------------------------------------------------------------------
evaluate_operand:
push rdi
push rsi
mov rsi, rdi ; rsi -> start of operand
pop rdi ; rdi = size of operand
push rdi ; rdi -> start of operand
; rsi = size of operand
call trim_trailing_whitespace
pop rdi ; rdi -> first byte of operand
@@ -1467,38 +1438,38 @@ djb2:
; trims whitespace from the start and end of the given byte array.
;
; parameters:
; rdi = size of list
; rsi -> start of list
; rdi -> start of list
; rsi = size of list
;
; returned:
; rax = new size of list
; ------------------------------------------------------------------------------
trim_trailing_whitespace:
cmp rdi, 0 ; list of length zero
je .done ; already trimmed
test rsi, rsi ; list of length zero
jz .done ; already trimmed
push rdi
push rsi
push rdi
mov dl, [rsi + rdi - 1] ; last element of given list
mov dl, [rdi + rsi - 1] ; last element of given list
mov rsi, whitespace_2 ; pointer of whitespace list
mov rdi, 2 ; length of whitespace list
mov edi, 2 ; length of whitespace list
call elemb
pop rsi ; rsi -> start of list
pop rdi ; rdi = size of list
pop rdi ; rdi -> start of list
pop rsi ; rsi = size of list
cmp al, 0 ; if last element whitespace
je .done ; then break
test eax, eax ; if last element whitespace
jz .done ; then break
.trim: ; otherwise one shorter
dec rdi
dec rsi
call trim_trailing_whitespace
ret
.done:
mov rax, rdi
mov rax, rsi
ret
; ------------------------------------------------------------------------------
@@ -1510,8 +1481,8 @@ trim_trailing_whitespace:
clear_token_table:
xor eax, eax ; value to write
mov rcx, TOKEN_TABLE_SIZE / 4 ; number of double words
mov rdi, TOKEN_TABLE_ADDR ; address to start
mov ecx, TOKEN_TABLE_SIZE / 4 ; number of double words
mov edi, TOKEN_TABLE_ADDR ; address to start
rep stosd
ret
@@ -1524,8 +1495,8 @@ clear_token_table:
clear_test_arena:
xor eax, eax ; value to write
mov rcx, TOKEN_TABLE_SIZE / 4 ; number of double words
mov rdi, TOKEN_TABLE_ADDR ; address to start
mov ecx, TOKEN_TABLE_SIZE / 4 ; number of double words
mov edi, TOKEN_TABLE_ADDR ; address to start
rep stosd
ret
@@ -1538,8 +1509,8 @@ clear_test_arena:
clear_output_arena:
xor eax, eax ; value to write
mov rcx, OUTPUT_SIZE / 4 ; number of double words
mov rdi, OUTPUT_ADDR ; address to start
mov ecx, OUTPUT_SIZE / 4 ; number of double words
mov edi, OUTPUT_ADDR ; address to start
rep stosd
ret

View File

@@ -654,7 +654,7 @@ test_evaluate_operand:
msg_pass:
db 0x0A
times (TEST_LINE_LENGTH + .start + 5 - .end) db " ", ; right align
times (TEST_LINE_LENGTH + .start - .end) db " ", ; right align
db 0x1B, "[32m"
.start db "passed."
.end db 0x1B, "[0m", 0x0A, 0x00