correctly handle [register]s as operands

This commit is contained in:
andromeda
2026-03-21 21:42:50 +01:00
parent 73ea2bb2b5
commit 55c426631b
2 changed files with 31 additions and 18 deletions

View File

@@ -306,8 +306,8 @@ supported tokens are listed below
| cr8 | 0x004E | | | cr8 | 0x004E | |
| hlt | 0x004F | | | hlt | 0x004F | |
| int3 | 0x0050 | | | int3 | 0x0050 | |
| [ | 0x0051 | open bracket placeholder; 0x10XX should be used in contexts where the surrounding tokens can be known | | | 0x0051 | deprecated; formerly `[`. Now `0x10XX` is used. |
| ] | 0x0052 | | | | 0x0052 | deprecated; formerly `]`. |
| xor | 0x0053 | | | xor | 0x0053 | |
| inc | 0x0054 | | | inc | 0x0054 | |
| dec | 0x0055 | | | dec | 0x0055 | |
@@ -323,14 +323,15 @@ supported tokens are listed below
| jg | 0x005F | | | jg | 0x005F | |
| jle | 0x0060 | | | jle | 0x0060 | |
| jl | 0x0061 | | | jl | 0x0061 | |
| + | 0x0062 | | | | 0x10XX | some memory address; `XX` is as specified below |
| - | 0x0063 | |
| * | 0x0064 | |
| / | 0x0065 | |
| [ | 0x10XX | open bracket with `XX` bytes until the closing bracket |
| | 0xFEXX | token terminator byte as token, where `XX` is the byte |
| | 0xFFFF | unrecognised token | | | 0xFFFF | unrecognised token |
values of `XX` in `0x10XX`:
| XX | description |
|------|-------------|
| 0x00 | following byte is the token ID of some register |
### example program ### example program
#### program in assembly #### program in assembly

View File

@@ -470,7 +470,7 @@ tokenise:
.loop: .loop:
cmp rdi, rsi ; if current byte greater than last byte cmp rdi, rsi ; if current byte greater than last byte
jg .break ; then break jge .break ; then break
mov dl, [rdi] ; dl = current byte mov dl, [rdi] ; dl = current byte
@@ -488,8 +488,8 @@ tokenise:
push rax push rax
push rdx push rdx
; TODO probably should not ignore null bytes ; TODO probably should not ignore null bytes
mov rsi, whitespace_3 ; rsi -> list of whitespace (ignored) bytes mov rsi, whitespace_2 ; rsi -> list of whitespace (ignored) bytes
mov rdi, 3 ; rdi = size of list in bytes mov rdi, 2 ; rdi = size of list in bytes
; dl = current byte ; dl = current byte
call elemb call elemb
; al = 0 if not whitespace, 1 if whitespace ; al = 0 if not whitespace, 1 if whitespace
@@ -671,18 +671,32 @@ tokenise:
mov rcx, rax ; rcx = evaluate_operand's return value mov rcx, rax ; rcx = evaluate_operand's return value
pop rax ; rax = number of tokens processed pop rax ; rax = number of tokens processed
; operand is a register ; operand is some reg
; cx = token ID ; cx = token ID
cmp dl, 0x00 cmp dl, 0x00
je .operand_register je .operand_register
; operand is some [reg]
; cx = token ID
cmp dl, 0x10
je .operand_addr_register
jmp .unexpected_operand jmp .unexpected_operand
; cx = token ID
.operand_register: .operand_register:
mov [TOKEN_TABLE_ADDR + rax * TOKEN_TABLE_ENTRY_SIZE], cx mov [TOKEN_TABLE_ADDR + rax * TOKEN_TABLE_ENTRY_SIZE], cx
inc rax ; another token processed inc rax ; another token processed
jmp .operand_break_continue jmp .operand_break_continue
; cx = token ID
.operand_addr_register
mov word [TOKEN_TABLE_ADDR + rax * TOKEN_TABLE_ENTRY_SIZE], 0x1000
inc rax ; 0x1000: addr reg token, next token is the register
mov [TOKEN_TABLE_ADDR + rax * TOKEN_TABLE_ENTRY_SIZE], cx
inc rax ; the register as returned by evaluate_operand
jmp .operand_break_continue
.operand_break_continue: .operand_break_continue:
mov byte [.expecting], E_COMMENT | E_NEWLINE | E_WHITESPACE | E_COMMA mov byte [.expecting], E_COMMENT | E_NEWLINE | E_WHITESPACE | E_COMMA
jmp .loop jmp .loop
@@ -833,8 +847,6 @@ evaluate_operand:
mov dl, 0xFF mov dl, 0xFF
ret ret
.msg db "evaluate_operand", 0x0A, 0x00
; ------------------------------------------------------------------------------ ; ------------------------------------------------------------------------------
; evaluate_constant ; evaluate_constant
; ;
@@ -1232,8 +1244,8 @@ trim_trailing_whitespace:
push rsi push rsi
mov dl, [rsi + rdi - 1] ; last element of given list mov dl, [rsi + rdi - 1] ; last element of given list
mov rsi, whitespace_3 ; pointer of whitespace list mov rsi, whitespace_2 ; pointer of whitespace list
mov rdi, 3 ; length of whitespace list mov rdi, 2 ; length of whitespace list
call elemb call elemb
pop rsi ; rsi -> start of list pop rsi ; rsi -> start of list
@@ -1553,7 +1565,7 @@ msg_halt db "halted.", 0x0A, 0x00
token_terminator_8 db 0x00, " ", 0x0A, 0x0D, ",", 0x00, 0x00, 0x00 token_terminator_8 db 0x00, " ", 0x0A, 0x0D, ",", 0x00, 0x00, 0x00
whitespace_3 db " ", 0x0D, 0x00 whitespace_2 db " ", 0x0D
; test program ; test program
program: program:
@@ -1561,5 +1573,5 @@ program:
db "inc rax ; inline comment", 0x0A db "inc rax ; inline comment", 0x0A
db "; one line comment", 0x0A db "; one line comment", 0x0A
db "mov [rax], rdx", 0x0A db "mov [rax], rdx", 0x0A
db "hlt", 0x00 ; for the sake of being able to print it, I made it a string db "hlt"
.size db $ - program .size db $ - program