From 55c426631bab8e356d0246bb140deeb0b6fcfb89 Mon Sep 17 00:00:00 2001 From: andromeda Date: Sat, 21 Mar 2026 21:42:50 +0100 Subject: [PATCH] correctly handle [register]s as operands --- twasm/README.md | 17 +++++++++-------- twasm/asm/main.asm | 32 ++++++++++++++++++++++---------- 2 files changed, 31 insertions(+), 18 deletions(-) diff --git a/twasm/README.md b/twasm/README.md index c2cc263..3f63fe7 100644 --- a/twasm/README.md +++ b/twasm/README.md @@ -306,8 +306,8 @@ supported tokens are listed below | cr8 | 0x004E | | | hlt | 0x004F | | | int3 | 0x0050 | | -| [ | 0x0051 | open bracket placeholder; 0x10XX should be used in contexts where the surrounding tokens can be known | -| ] | 0x0052 | | +| | 0x0051 | deprecated; formerly `[`. Now `0x10XX` is used. | +| | 0x0052 | deprecated; formerly `]`. | | xor | 0x0053 | | | inc | 0x0054 | | | dec | 0x0055 | | @@ -323,14 +323,15 @@ supported tokens are listed below | jg | 0x005F | | | jle | 0x0060 | | | jl | 0x0061 | | -| + | 0x0062 | | -| - | 0x0063 | | -| * | 0x0064 | | -| / | 0x0065 | | -| [ | 0x10XX | open bracket with `XX` bytes until the closing bracket | -| | 0xFEXX | token terminator byte as token, where `XX` is the byte | +| | 0x10XX | some memory address; `XX` is as specified below | | | 0xFFFF | unrecognised token | +values of `XX` in `0x10XX`: + +| XX | description | +|------|-------------| +| 0x00 | following byte is the token ID of some register | + ### example program #### program in assembly diff --git a/twasm/asm/main.asm b/twasm/asm/main.asm index b0e7cad..a6885be 100644 --- a/twasm/asm/main.asm +++ b/twasm/asm/main.asm @@ -470,7 +470,7 @@ tokenise: .loop: cmp rdi, rsi ; if current byte greater than last byte - jg .break ; then break + jge .break ; then break mov dl, [rdi] ; dl = current byte @@ -488,8 +488,8 @@ tokenise: push rax push rdx ; TODO probably should not ignore null bytes - mov rsi, whitespace_3 ; rsi -> list of whitespace (ignored) bytes - mov rdi, 3 ; rdi = size of list in bytes + mov rsi, whitespace_2 ; rsi -> list of whitespace (ignored) bytes + mov rdi, 2 ; rdi = size of list in bytes ; dl = current byte call elemb ; al = 0 if not whitespace, 1 if whitespace @@ -671,18 +671,32 @@ tokenise: mov rcx, rax ; rcx = evaluate_operand's return value pop rax ; rax = number of tokens processed - ; operand is a register + ; operand is some reg ; cx = token ID cmp dl, 0x00 je .operand_register + ; operand is some [reg] + ; cx = token ID + cmp dl, 0x10 + je .operand_addr_register + jmp .unexpected_operand + ; cx = token ID .operand_register: mov [TOKEN_TABLE_ADDR + rax * TOKEN_TABLE_ENTRY_SIZE], cx inc rax ; another token processed jmp .operand_break_continue + ; cx = token ID + .operand_addr_register + mov word [TOKEN_TABLE_ADDR + rax * TOKEN_TABLE_ENTRY_SIZE], 0x1000 + inc rax ; 0x1000: addr reg token, next token is the register + mov [TOKEN_TABLE_ADDR + rax * TOKEN_TABLE_ENTRY_SIZE], cx + inc rax ; the register as returned by evaluate_operand + jmp .operand_break_continue + .operand_break_continue: mov byte [.expecting], E_COMMENT | E_NEWLINE | E_WHITESPACE | E_COMMA jmp .loop @@ -833,8 +847,6 @@ evaluate_operand: mov dl, 0xFF ret - .msg db "evaluate_operand", 0x0A, 0x00 - ; ------------------------------------------------------------------------------ ; evaluate_constant ; @@ -1232,8 +1244,8 @@ trim_trailing_whitespace: push rsi mov dl, [rsi + rdi - 1] ; last element of given list - mov rsi, whitespace_3 ; pointer of whitespace list - mov rdi, 3 ; length of whitespace list + mov rsi, whitespace_2 ; pointer of whitespace list + mov rdi, 2 ; length of whitespace list call elemb pop rsi ; rsi -> start of list @@ -1553,7 +1565,7 @@ msg_halt db "halted.", 0x0A, 0x00 token_terminator_8 db 0x00, " ", 0x0A, 0x0D, ",", 0x00, 0x00, 0x00 -whitespace_3 db " ", 0x0D, 0x00 +whitespace_2 db " ", 0x0D ; test program program: @@ -1561,5 +1573,5 @@ program: db "inc rax ; inline comment", 0x0A db "; one line comment", 0x0A db "mov [rax], rdx", 0x0A - db "hlt", 0x00 ; for the sake of being able to print it, I made it a string + db "hlt" .size db $ - program