From 869420ef7ab2bfa8bc04c44584e55bb337d83f32 Mon Sep 17 00:00:00 2001 From: andromeda Date: Mon, 9 Mar 2026 11:00:59 +0100 Subject: [PATCH] fix some bugs, work on assembler --- twasm/asm/main.asm | 72 +++++++++++++++++++++++++++++++++++++-------- twasm/asm/tests.asm | 10 +++++++ 2 files changed, 69 insertions(+), 13 deletions(-) diff --git a/twasm/asm/main.asm b/twasm/asm/main.asm index 7128a7f..57af625 100644 --- a/twasm/asm/main.asm +++ b/twasm/asm/main.asm @@ -37,8 +37,11 @@ start: mov rdi, program ; -> program mov rsi, [program.size] ; = size of program call tokenise - + ; rax = number of tokens processed + mov rdi, rax + push rdi call clear_output_arena + pop rdi call assemble jmp halt @@ -49,30 +52,63 @@ start: ; ------------------------------------------------------------------------------ ; assemble -; TODO write tests +; TODO write testsr ; TODO make it work :/ putting the cart before the horse ; ; description: ; assembles the program from tokens located at TOKEN_TABLE_ADDR into a flat ; binary located at OUTPUT_ADDR. It's probably desirable to clear the output ; arena before calling this function. +; +; parameters: +; rdi = number of tokens in the token table ; ------------------------------------------------------------------------------ assemble: xor rax, rax ; number of tokens processed .loop: - cmp rax, TOKEN_TABLE_SIZE / TOKEN_TABLE_ENTRY_SIZE ; check incrementer - ; against the number of - ; entries in the token - ; table - jg .break ; if overflown, break + cmp rax, rdi ; check incrementer against the number of tokens in the token + jge .break ; table. If overflown, break - mov rdi, [rax * TOKEN_TABLE_ENTRY_SIZE + TOKEN_TABLE_ADDR] ; rdi -> next + push rdi + xor edi, edi + mov di, [rax * TOKEN_TABLE_ENTRY_SIZE + TOKEN_TABLE_ADDR] ; rdi = next tte + push rax + xor eax, eax + call get_tte_type + + cmp ax, 0x01 ; check if it's an operator + je .operator + jne .continue_operator + + .operator + push rsi + mov rsi, .msg_found_operator + call print + pop rsi + + .continue_operator + cmp ax, 0x02 ; check if it's a register + je .register + jne .continue_register + + .register + push rsi + mov rsi, .msg_found_register + call print + pop rsi + + .continue_register + pop rax ; incrementer + pop rdi ; total number of tokens inc rax ; move to next token jmp .loop + .break: ret + .msg_found_operator db "found operator", 0x0A, 0x00 + .msg_found_register db "found register", 0x0A, 0x00 ; ------------------------------------------------------------------------------ ; get_tte_type @@ -85,12 +121,13 @@ assemble: ; di = token table entry ; ; returned: -; al = type of token, or UNRECOGNISED_ID_TYPE. The upper 4 bytes of al are +; al = type of token, or UNRECOGNISED_ID_TYPE. The upper 4 bits of al are ; zeroed; the rest of rax is zeroed. ; ------------------------------------------------------------------------------ get_tte_type: and rdi, 0xFFFF ; mask input so it behaves as expected + xor eax, eax .loop: cmp rax, (tokens.by_id_end - tokens.by_id) / 4 ; make sure it's still in range @@ -104,11 +141,12 @@ get_tte_type: inc rax jmp .loop .not_found: - xor rax, rax mov al, UNRECOGNISED_ID_TYPE + and ax, 0xF ; mask as expected ret .found: mov al, [2 + tokens.by_id + rax * 4] + and ax, 0xF ; mask as expected ret ; ------------------------------------------------------------------------------ @@ -128,6 +166,7 @@ get_tte_type: get_tte_typed_metadata: and rdi, 0xFFFF ; mask input so it behaves as expected + xor eax, eax .loop: cmp rax, (tokens.by_id_end - tokens.by_id) / 4 ; make sure it's still in range @@ -141,7 +180,7 @@ get_tte_typed_metadata: inc rax jmp .loop .not_found: - xor rax, rax + xor eax, eax mov al, UNRECOGNISED_ID_METADATA ret .found: @@ -163,11 +202,14 @@ get_tte_typed_metadata: ; parameters: ; rdi -> first byte of program ; rsi = size of program in bytes +; +; returned: +; rax = number of tokens processed ; ------------------------------------------------------------------------------ tokenise: add rsi, rdi ; last byte of program - xor rcx, rcx ; number of tokens processed + xor ecx, ecx ; number of tokens processed .loop: cmp rdi, rsi ; if current byte greater than last byte jg .break ; then break @@ -218,16 +260,20 @@ tokenise: .close_bracket: ; rewrite open bracket token entry with a filled out one + push rcx + mov dl, [.data_open_bracket] sub cl, dl mov byte [TOKEN_TABLE_ADDR + rdx * TOKEN_TABLE_ENTRY_SIZE], cl mov byte [1 + TOKEN_TABLE_ADDR + rdx * TOKEN_TABLE_ENTRY_SIZE], 0x10 - add cl, dl + + pop rcx .continue_close_bracket: inc rcx ; +1 token processed jmp .loop .break: + mov rax, rcx ret .data_open_bracket db 0x00 ; represents the token # of the latest open bracket diff --git a/twasm/asm/tests.asm b/twasm/asm/tests.asm index d5341f0..f48d8bf 100644 --- a/twasm/asm/tests.asm +++ b/twasm/asm/tests.asm @@ -403,6 +403,11 @@ test_get_tte_type: cmp al, 0x02 ; register jne .fail + mov di, 0x0056 ; mov + call get_tte_type + cmp al, 0x01 ; operator + jne .fail + mov di, 0xFFFF ; unrecognised token call get_tte_type cmp al, UNRECOGNISED_ID_TYPE @@ -439,6 +444,11 @@ test_get_tte_typed_metadata: cmp al, 0x03 ; width: 64 bits jne .fail + mov di, 0x0056 ; mov + call get_tte_typed_metadata + cmp al, 0x02 ; # operands + jne .fail + mov di, 0xFFFF ; unrecognised token call get_tte_typed_metadata cmp al, UNRECOGNISED_ID_METADATA