fix some bugs, work on assembler

This commit is contained in:
andromeda
2026-03-09 11:00:59 +01:00
parent 33710a8ebe
commit 869420ef7a
2 changed files with 69 additions and 13 deletions

View File

@@ -37,8 +37,11 @@ start:
mov rdi, program ; -> program
mov rsi, [program.size] ; = size of program
call tokenise
; rax = number of tokens processed
mov rdi, rax
push rdi
call clear_output_arena
pop rdi
call assemble
jmp halt
@@ -49,30 +52,63 @@ start:
; ------------------------------------------------------------------------------
; assemble
; TODO write tests
; TODO write testsr
; TODO make it work :/ putting the cart before the horse
;
; description:
; assembles the program from tokens located at TOKEN_TABLE_ADDR into a flat
; binary located at OUTPUT_ADDR. It's probably desirable to clear the output
; arena before calling this function.
;
; parameters:
; rdi = number of tokens in the token table
; ------------------------------------------------------------------------------
assemble:
xor rax, rax ; number of tokens processed
.loop:
cmp rax, TOKEN_TABLE_SIZE / TOKEN_TABLE_ENTRY_SIZE ; check incrementer
; against the number of
; entries in the token
; table
jg .break ; if overflown, break
cmp rax, rdi ; check incrementer against the number of tokens in the token
jge .break ; table. If overflown, break
mov rdi, [rax * TOKEN_TABLE_ENTRY_SIZE + TOKEN_TABLE_ADDR] ; rdi -> next
push rdi
xor edi, edi
mov di, [rax * TOKEN_TABLE_ENTRY_SIZE + TOKEN_TABLE_ADDR] ; rdi = next tte
push rax
xor eax, eax
call get_tte_type
cmp ax, 0x01 ; check if it's an operator
je .operator
jne .continue_operator
.operator
push rsi
mov rsi, .msg_found_operator
call print
pop rsi
.continue_operator
cmp ax, 0x02 ; check if it's a register
je .register
jne .continue_register
.register
push rsi
mov rsi, .msg_found_register
call print
pop rsi
.continue_register
pop rax ; incrementer
pop rdi ; total number of tokens
inc rax ; move to next token
jmp .loop
.break:
ret
.msg_found_operator db "found operator", 0x0A, 0x00
.msg_found_register db "found register", 0x0A, 0x00
; ------------------------------------------------------------------------------
; get_tte_type
@@ -85,12 +121,13 @@ assemble:
; di = token table entry
;
; returned:
; al = type of token, or UNRECOGNISED_ID_TYPE. The upper 4 bytes of al are
; al = type of token, or UNRECOGNISED_ID_TYPE. The upper 4 bits of al are
; zeroed; the rest of rax is zeroed.
; ------------------------------------------------------------------------------
get_tte_type:
and rdi, 0xFFFF ; mask input so it behaves as expected
xor eax, eax
.loop:
cmp rax, (tokens.by_id_end - tokens.by_id) / 4 ; make sure it's still in range
@@ -104,11 +141,12 @@ get_tte_type:
inc rax
jmp .loop
.not_found:
xor rax, rax
mov al, UNRECOGNISED_ID_TYPE
and ax, 0xF ; mask as expected
ret
.found:
mov al, [2 + tokens.by_id + rax * 4]
and ax, 0xF ; mask as expected
ret
; ------------------------------------------------------------------------------
@@ -128,6 +166,7 @@ get_tte_type:
get_tte_typed_metadata:
and rdi, 0xFFFF ; mask input so it behaves as expected
xor eax, eax
.loop:
cmp rax, (tokens.by_id_end - tokens.by_id) / 4 ; make sure it's still in range
@@ -141,7 +180,7 @@ get_tte_typed_metadata:
inc rax
jmp .loop
.not_found:
xor rax, rax
xor eax, eax
mov al, UNRECOGNISED_ID_METADATA
ret
.found:
@@ -163,11 +202,14 @@ get_tte_typed_metadata:
; parameters:
; rdi -> first byte of program
; rsi = size of program in bytes
;
; returned:
; rax = number of tokens processed
; ------------------------------------------------------------------------------
tokenise:
add rsi, rdi ; last byte of program
xor rcx, rcx ; number of tokens processed
xor ecx, ecx ; number of tokens processed
.loop:
cmp rdi, rsi ; if current byte greater than last byte
jg .break ; then break
@@ -218,16 +260,20 @@ tokenise:
.close_bracket:
; rewrite open bracket token entry with a filled out one
push rcx
mov dl, [.data_open_bracket]
sub cl, dl
mov byte [TOKEN_TABLE_ADDR + rdx * TOKEN_TABLE_ENTRY_SIZE], cl
mov byte [1 + TOKEN_TABLE_ADDR + rdx * TOKEN_TABLE_ENTRY_SIZE], 0x10
add cl, dl
pop rcx
.continue_close_bracket:
inc rcx ; +1 token processed
jmp .loop
.break:
mov rax, rcx
ret
.data_open_bracket db 0x00 ; represents the token # of the latest open bracket

View File

@@ -403,6 +403,11 @@ test_get_tte_type:
cmp al, 0x02 ; register
jne .fail
mov di, 0x0056 ; mov
call get_tte_type
cmp al, 0x01 ; operator
jne .fail
mov di, 0xFFFF ; unrecognised token
call get_tte_type
cmp al, UNRECOGNISED_ID_TYPE
@@ -439,6 +444,11 @@ test_get_tte_typed_metadata:
cmp al, 0x03 ; width: 64 bits
jne .fail
mov di, 0x0056 ; mov
call get_tte_typed_metadata
cmp al, 0x02 ; # operands
jne .fail
mov di, 0xFFFF ; unrecognised token
call get_tte_typed_metadata
cmp al, UNRECOGNISED_ID_METADATA