fix some bugs, work on assembler

This commit is contained in:
andromeda
2026-03-09 11:00:59 +01:00
parent 33710a8ebe
commit 869420ef7a
2 changed files with 69 additions and 13 deletions

View File

@@ -37,8 +37,11 @@ start:
mov rdi, program ; -> program mov rdi, program ; -> program
mov rsi, [program.size] ; = size of program mov rsi, [program.size] ; = size of program
call tokenise call tokenise
; rax = number of tokens processed
mov rdi, rax
push rdi
call clear_output_arena call clear_output_arena
pop rdi
call assemble call assemble
jmp halt jmp halt
@@ -49,30 +52,63 @@ start:
; ------------------------------------------------------------------------------ ; ------------------------------------------------------------------------------
; assemble ; assemble
; TODO write tests ; TODO write testsr
; TODO make it work :/ putting the cart before the horse ; TODO make it work :/ putting the cart before the horse
; ;
; description: ; description:
; assembles the program from tokens located at TOKEN_TABLE_ADDR into a flat ; assembles the program from tokens located at TOKEN_TABLE_ADDR into a flat
; binary located at OUTPUT_ADDR. It's probably desirable to clear the output ; binary located at OUTPUT_ADDR. It's probably desirable to clear the output
; arena before calling this function. ; arena before calling this function.
;
; parameters:
; rdi = number of tokens in the token table
; ------------------------------------------------------------------------------ ; ------------------------------------------------------------------------------
assemble: assemble:
xor rax, rax ; number of tokens processed xor rax, rax ; number of tokens processed
.loop: .loop:
cmp rax, TOKEN_TABLE_SIZE / TOKEN_TABLE_ENTRY_SIZE ; check incrementer cmp rax, rdi ; check incrementer against the number of tokens in the token
; against the number of jge .break ; table. If overflown, break
; entries in the token
; table
jg .break ; if overflown, break
mov rdi, [rax * TOKEN_TABLE_ENTRY_SIZE + TOKEN_TABLE_ADDR] ; rdi -> next push rdi
xor edi, edi
mov di, [rax * TOKEN_TABLE_ENTRY_SIZE + TOKEN_TABLE_ADDR] ; rdi = next tte
push rax
xor eax, eax
call get_tte_type
cmp ax, 0x01 ; check if it's an operator
je .operator
jne .continue_operator
.operator
push rsi
mov rsi, .msg_found_operator
call print
pop rsi
.continue_operator
cmp ax, 0x02 ; check if it's a register
je .register
jne .continue_register
.register
push rsi
mov rsi, .msg_found_register
call print
pop rsi
.continue_register
pop rax ; incrementer
pop rdi ; total number of tokens
inc rax ; move to next token inc rax ; move to next token
jmp .loop jmp .loop
.break: .break:
ret ret
.msg_found_operator db "found operator", 0x0A, 0x00
.msg_found_register db "found register", 0x0A, 0x00
; ------------------------------------------------------------------------------ ; ------------------------------------------------------------------------------
; get_tte_type ; get_tte_type
@@ -85,12 +121,13 @@ assemble:
; di = token table entry ; di = token table entry
; ;
; returned: ; returned:
; al = type of token, or UNRECOGNISED_ID_TYPE. The upper 4 bytes of al are ; al = type of token, or UNRECOGNISED_ID_TYPE. The upper 4 bits of al are
; zeroed; the rest of rax is zeroed. ; zeroed; the rest of rax is zeroed.
; ------------------------------------------------------------------------------ ; ------------------------------------------------------------------------------
get_tte_type: get_tte_type:
and rdi, 0xFFFF ; mask input so it behaves as expected and rdi, 0xFFFF ; mask input so it behaves as expected
xor eax, eax
.loop: .loop:
cmp rax, (tokens.by_id_end - tokens.by_id) / 4 ; make sure it's still in range cmp rax, (tokens.by_id_end - tokens.by_id) / 4 ; make sure it's still in range
@@ -104,11 +141,12 @@ get_tte_type:
inc rax inc rax
jmp .loop jmp .loop
.not_found: .not_found:
xor rax, rax
mov al, UNRECOGNISED_ID_TYPE mov al, UNRECOGNISED_ID_TYPE
and ax, 0xF ; mask as expected
ret ret
.found: .found:
mov al, [2 + tokens.by_id + rax * 4] mov al, [2 + tokens.by_id + rax * 4]
and ax, 0xF ; mask as expected
ret ret
; ------------------------------------------------------------------------------ ; ------------------------------------------------------------------------------
@@ -128,6 +166,7 @@ get_tte_type:
get_tte_typed_metadata: get_tte_typed_metadata:
and rdi, 0xFFFF ; mask input so it behaves as expected and rdi, 0xFFFF ; mask input so it behaves as expected
xor eax, eax
.loop: .loop:
cmp rax, (tokens.by_id_end - tokens.by_id) / 4 ; make sure it's still in range cmp rax, (tokens.by_id_end - tokens.by_id) / 4 ; make sure it's still in range
@@ -141,7 +180,7 @@ get_tte_typed_metadata:
inc rax inc rax
jmp .loop jmp .loop
.not_found: .not_found:
xor rax, rax xor eax, eax
mov al, UNRECOGNISED_ID_METADATA mov al, UNRECOGNISED_ID_METADATA
ret ret
.found: .found:
@@ -163,11 +202,14 @@ get_tte_typed_metadata:
; parameters: ; parameters:
; rdi -> first byte of program ; rdi -> first byte of program
; rsi = size of program in bytes ; rsi = size of program in bytes
;
; returned:
; rax = number of tokens processed
; ------------------------------------------------------------------------------ ; ------------------------------------------------------------------------------
tokenise: tokenise:
add rsi, rdi ; last byte of program add rsi, rdi ; last byte of program
xor rcx, rcx ; number of tokens processed xor ecx, ecx ; number of tokens processed
.loop: .loop:
cmp rdi, rsi ; if current byte greater than last byte cmp rdi, rsi ; if current byte greater than last byte
jg .break ; then break jg .break ; then break
@@ -218,16 +260,20 @@ tokenise:
.close_bracket: .close_bracket:
; rewrite open bracket token entry with a filled out one ; rewrite open bracket token entry with a filled out one
push rcx
mov dl, [.data_open_bracket] mov dl, [.data_open_bracket]
sub cl, dl sub cl, dl
mov byte [TOKEN_TABLE_ADDR + rdx * TOKEN_TABLE_ENTRY_SIZE], cl mov byte [TOKEN_TABLE_ADDR + rdx * TOKEN_TABLE_ENTRY_SIZE], cl
mov byte [1 + TOKEN_TABLE_ADDR + rdx * TOKEN_TABLE_ENTRY_SIZE], 0x10 mov byte [1 + TOKEN_TABLE_ADDR + rdx * TOKEN_TABLE_ENTRY_SIZE], 0x10
add cl, dl
pop rcx
.continue_close_bracket: .continue_close_bracket:
inc rcx ; +1 token processed inc rcx ; +1 token processed
jmp .loop jmp .loop
.break: .break:
mov rax, rcx
ret ret
.data_open_bracket db 0x00 ; represents the token # of the latest open bracket .data_open_bracket db 0x00 ; represents the token # of the latest open bracket

View File

@@ -403,6 +403,11 @@ test_get_tte_type:
cmp al, 0x02 ; register cmp al, 0x02 ; register
jne .fail jne .fail
mov di, 0x0056 ; mov
call get_tte_type
cmp al, 0x01 ; operator
jne .fail
mov di, 0xFFFF ; unrecognised token mov di, 0xFFFF ; unrecognised token
call get_tte_type call get_tte_type
cmp al, UNRECOGNISED_ID_TYPE cmp al, UNRECOGNISED_ID_TYPE
@@ -439,6 +444,11 @@ test_get_tte_typed_metadata:
cmp al, 0x03 ; width: 64 bits cmp al, 0x03 ; width: 64 bits
jne .fail jne .fail
mov di, 0x0056 ; mov
call get_tte_typed_metadata
cmp al, 0x02 ; # operands
jne .fail
mov di, 0xFFFF ; unrecognised token mov di, 0xFFFF ; unrecognised token
call get_tte_typed_metadata call get_tte_typed_metadata
cmp al, UNRECOGNISED_ID_METADATA cmp al, UNRECOGNISED_ID_METADATA