add get_opcode and its test, fix a couple bugs + semantics things

This commit is contained in:
andromeda
2026-03-10 16:32:06 +01:00
parent a972f38bb6
commit 08c39a2bd8
3 changed files with 142 additions and 5 deletions

View File

@@ -144,6 +144,20 @@ type metadata for the different types is as follows:
11b ; 64 bit
```
#### `opcodes.by_id`
entries are as follows:
```
+----------+--------+----------+
| 31 24 | 23 16 | 15 0 |
+----------+--------+----------+
| reserved | opcode | token ID |
+----------+--------+----------+
```
note the lack of support for multiple-byte opcodes or multiple opcodes for one token ID; these features will likely be added at some point after the parser accumulates too much jank.
### token IDs
supported tokens are listed below

View File

@@ -18,6 +18,7 @@ STACK_ADDR equ 0x00060000 ; address to put the 64-bit stack at
UNRECOGNISED_TOKEN_ID equ 0xFFFF ; id of an unrecognised token
UNRECOGNISED_ID_TYPE equ 0x0F ; type of an unrecognised id
UNRECOGNISED_ID_METADATA equ 0xFF ; metadata of an unrecognised id
UNRECOGNISED_ID_OPCODE equ 0x90 ; opcode of an unrecognised id (NOP)
TEST_LINE_LENGTH equ 80 ; right border of test suite results
@@ -81,24 +82,43 @@ assemble:
je .operator
jne .continue_operator
.operator
.operator:
push rsi
mov rsi, .msg_found_operator
call print
pop rsi
.continue_operator
push rdi
; di = tte
call get_tte_typed_metadata
; al = tte typed metadata
pop rdi
and al, 11b ; mask for # operands
cmp al, 0 ; 0 operands
je .operator_0
jne .continue_operator0
.operator_0:
jmp .continue_operator
.continue_operator0:
.continue_operator:
cmp ax, 0x02 ; check if it's a register
je .register
jne .continue_register
.register
.register:
push rsi
mov rsi, .msg_found_register
call print
pop rsi
.continue_register
.continue_register:
pop rax ; incrementer
pop rdi ; total number of tokens
@@ -109,6 +129,8 @@ assemble:
ret
.msg_found_operator db "found operator", 0x0A, 0x00
.msg_found_register db "found register", 0x0A, 0x00
.next_register dw UNRECOGNISED_TOKEN_ID
.next_next_register dw UNRECOGNISED_TOKEN_ID
; ------------------------------------------------------------------------------
; get_tte_type
@@ -185,6 +207,7 @@ get_tte_typed_metadata:
ret
.found:
mov al, [3 + tokens.by_id + rax * 4]
and rax, 0xFF
ret
; ------------------------------------------------------------------------------
@@ -199,7 +222,7 @@ get_tte_typed_metadata:
; si = token table entry `R/M`
;
; returned:
; al = ModR/M byte
; al = ModR/M byte; the rest of rax is zeroed
; ------------------------------------------------------------------------------
get_direct_addressing_ModRM:
@@ -244,6 +267,43 @@ get_direct_addressing_ModRM:
and rax, 0xFF ; mask for byte
ret
; ------------------------------------------------------------------------------
; get_opcode
;
; description:
; given an operator token, returns its opcode
;
; parameters:
; di = token table entry
;
; returned:
; al = opcode; the rest of rax is zeroed
; ------------------------------------------------------------------------------
get_opcode:
and rdi, 0xFFFF
xor eax, eax
.loop:
cmp rax, (opcodes.by_id_end - opcodes.by_id) / 4 ; make sure it's still in range
jg .not_found
mov cx, [opcodes.by_id + rax * 4] ; next entry in opcodes.by_id
cmp cx, di
je .found
inc rax
jmp .loop
.not_found:
xor eax, eax
mov al, UNRECOGNISED_ID_OPCODE
ret
.found:
mov al, [2 + opcodes.by_id + rax * 4]
and rax, 0xFF ; mask
ret
; ------------------------------------------------------------------------------
; tokenising
; ------------------------------------------------------------------------------
@@ -961,6 +1021,25 @@ tokens:
db 0x00 ; # operands
.by_id_end:
opcodes:
.by_id:
dw 0x0053 ; xor
db 0x33
db 0x00 ; reserved
dw 0x0054 ; inc
db 0xFF
db 0x00 ; reserved
dw 0x0056 ; mov
db 0x8B
db 0x00 ; reserved
dw 0x004F ; hlt
db 0xF4
db 0x00 ; reserved
.by_id_end:
msg_welcome db "Welcome to Twasm", 0x0A, 0x00
msg_halt db "halted.", 0x0A, 0x00

View File

@@ -37,6 +37,9 @@ run_tests:
call clear_test_arena
call test_get_direct_addressing_ModRM
call clear_test_arena
call test_get_opcode
ret
.msg db "running test suite...", 0x0A, 0x00
@@ -513,6 +516,47 @@ test_get_direct_addressing_ModRM:
ret
.msg db "test_get_direct_addressing_ModRM...", 0x00
; ------------------------------------------------------------------------------
; test_get_opcode
;
; description:
; tests get_opcode described functionality
; ------------------------------------------------------------------------------
test_get_opcode:
mov rsi, .msg
call print
mov di, 0x0053 ; xor
call get_opcode
cmp al, 0x33
jne .fail
mov di, 0x0054 ; inc
call get_opcode
cmp al, 0xFF
jne .fail
mov di, 0x004F ; hlt
call get_opcode
cmp al, 0xF4
jne .fail
mov di, 0x0003 ; rdx (not an operator)
call get_opcode
cmp al, UNRECOGNISED_ID_OPCODE
jne .fail
.pass:
mov rsi, msg_pass
call print
ret
.fail:
mov rsi, msg_fail
call print
ret
.msg db "test_get_opcode...", 0x00
msg_pass:
db 0x0A
times (TEST_LINE_LENGTH + .start - .end) db " ", ; right align