add get_reg_bits and test, streamline ModRM calculations, correct opcodes, fix typos...

This commit is contained in:
andromeda
2026-03-10 21:21:59 +01:00
parent 08c39a2bd8
commit e6d7cb2f21
2 changed files with 199 additions and 64 deletions

View File

@@ -53,7 +53,7 @@ start:
; ------------------------------------------------------------------------------
; assemble
; TODO write testsr
; TODO write tests
; TODO make it work :/ putting the cart before the horse
;
; description:
@@ -73,50 +73,111 @@ assemble:
push rdi
xor edi, edi
mov di, [rax * TOKEN_TABLE_ENTRY_SIZE + TOKEN_TABLE_ADDR] ; rdi = next tte
mov di, [rax * TOKEN_TABLE_ENTRY_SIZE + TOKEN_TABLE_ADDR] ; next tte
push rax
xor eax, eax
; di = next tte
call get_tte_type
; al = type of token
cmp al, 0x01 ; check if next tte's type is an operator
je .operator ; if so, handle case of operator
jne .continue_operator ; if not, jump past the case
cmp ax, 0x01 ; check if it's an operator
je .operator
jne .continue_operator
.operator:
push rsi
mov rsi, .msg_found_operator
call print
pop rsi
.operator: ; if next tte's type is an operator:
push rax ; MUST be popped BEFORE returning to .continue_operator; it
; contains the type of token, which still needs to be used.
push rdi
; di = tte
call get_tte_typed_metadata
; al = tte typed metadata
pop rdi
and al, 11b ; mask for # operands
cmp al, 0 ; 0 operands
je .operator_0
jne .continue_operator0
cmp al, 0 ; check if operator has no operands
je .operator_0 ; if so, handle case of no operands
jne .operator_with_args ; if not, jump to case of multiple operands
.operator_0:
push rdi
; di = next tte
call get_opcode
; al = opcode
call .output_byte
pop rdi
pop rax ; from start of label .operator
jmp .continue_operator
.continue_operator0:
.operator_with_args:
mov [.pending_operator_num_args], al ; save # args fttb
push rdi
; di = next tte
call get_opcode
; al = opcode
mov [.pending_operator_opcode], al ; save it fttb
pop rdi
pop rax ; from start of label .operator
.continue_operator:
cmp ax, 0x02 ; check if it's a register
je .register
jne .continue_register
cmp al, 0x02 ; check if next tte's type is a register
je .register ; if so, handle case of register
jne .continue_register ; if not, jump past the case
.register:
push rsi
mov rsi, .msg_found_register
call print
pop rsi
.register: ; if next tte's type is a register:
call .dec_num_args ; because we've found an argument, we need 1 fewer noch
cmp byte [.pending_operator_num_args], 1 ; check if this is 1st of 2 args
je .register_one_of_two ; if so, jump to handler
cmp byte [.pending_operator_num_args], 0 ; check if this is the last arg
je .register_last ; if so, jump to handler
; note: not necessarily the last
; of 2 args, it could also be the
; last of 1
; otherwise, quietly discard the token, reset things, and keep going :/
call .reset_state
jmp .continue_register
.register_one_of_two: ; if it's the first of 2 arguments:
mov [.first_argument], di ; ax = tte
jmp .continue_register
.register_last: ; if it's the last argument:
; swap so the first argument sits in .first_argument
push rax
mov ax, di
mov di, [.first_argument]
mov [.first_argument], ax
pop rax
cmp di, UNRECOGNISED_TOKEN_ID ; check if the second argument is defined
jne .operator_finalise_2 ; if so, there are 2 arguments
; if not, there is just 1
.operator_finalise_1:
mov di, 0x0000 ; id of rax. reg bits 000b
.operator_finalise_2:
; TODO avoid swapping earlier and now :/
mov cx, di
mov di, [.first_argument]
mov si, cx
call get_direct_addressing_ModRM
; al = ModR/M byte
push rax
mov al, [.pending_operator_opcode]
call .output_byte ; output operator's opcode
pop rax
call .output_byte ; output ModR/M byte
call .reset_state ; reset all the state parts of this function
jmp .continue_register
.continue_register:
pop rax ; incrementer
@@ -127,10 +188,43 @@ assemble:
.break:
ret
.msg_found_operator db "found operator", 0x0A, 0x00
.msg_found_register db "found register", 0x0A, 0x00
.next_register dw UNRECOGNISED_TOKEN_ID
.next_next_register dw UNRECOGNISED_TOKEN_ID
; procedures
; al = byte to write
.output_byte:
mov edx, [.next_output_byte] ; get output byte's address
mov [edx], al ; write byte to that address
inc edx ; increment address
mov [.next_output_byte], edx ; put output byte's address
ret
; runs dec on .pending_operator_num_args
.dec_num_args
push rax
mov al, [.pending_operator_num_args]
dec al
mov [.pending_operator_num_args], al
pop rax
ret
.reset_state
; I don't actually know if these `word` and `byte` directives are needed
; TODO check that. I think they are, becasue Nasm doesn't record the size
; of labels?
mov word [.pending_operator_opcode], UNRECOGNISED_TOKEN_ID
mov byte [.pending_operator_num_args], 0x00
mov word [.first_argument], UNRECOGNISED_TOKEN_ID
ret
; state variables
.pending_operator_opcode db 0x00 ; the operator seeking args
.pending_operator_num_args db 0x00 ; # of args it takes
.first_argument dw UNRECOGNISED_TOKEN_ID ; first argument if there are two
.next_output_byte dd OUTPUT_ADDR ; next empty byte in output
; ------------------------------------------------------------------------------
; get_tte_type
@@ -226,44 +320,24 @@ get_tte_typed_metadata:
; ------------------------------------------------------------------------------
get_direct_addressing_ModRM:
; TODO something is backwards in this function but I don't see it. If the test
; suite fails, it's too far gone; rewrite it.
push rdi
push rsi
; get metadata of reg
call get_tte_typed_metadata
; al = typed metadata of reg
pop rsi
pop rdi
; di = tte
call get_reg_bits
; al = reg bits
mov bl, al
shl bl, 3
mov bl, al ; bl = metadata of reg
mov rdi, rsi ; do the other one
push rdi
push rsi
push rbx
; get metadata of R/M
mov di, si
call get_tte_typed_metadata
; al = typed metadata of R/M
pop rbx
pop rsi
pop rdi
; di = tte
call get_reg_bits
; al = reg bits
mov dl, al
shr dl, 2
and dl, 111b ; mask
shr bl, 2
and bl, 111b ; mask
shl bl, 3
xor eax, eax
or al, 11b << 6 ; mod bits
or al, dl ; reg bits
or al, bl ; R/M bits
or al, bl ; reg bits
or al, dl ; R/M bits
and rax, 0xFF ; mask for byte
ret
@@ -304,6 +378,28 @@ get_opcode:
and rax, 0xFF ; mask
ret
; ------------------------------------------------------------------------------
; get_reg_bits
;
; description:
; given a register token, returns its reg bits metadata
;
; parameters:
; di = token table entry
;
; returned:
; al = register token; the rest of rax, including the upper 5 bits of al, are
; zeroed.
; ------------------------------------------------------------------------------
get_reg_bits:
; di = tte
call get_tte_typed_metadata
; al = typed metadata
shr al, 2 ; discard type data
and al, 111b ; mask
ret
; ------------------------------------------------------------------------------
; tokenising
; ------------------------------------------------------------------------------
@@ -1024,7 +1120,7 @@ tokens:
opcodes:
.by_id:
dw 0x0053 ; xor
db 0x33
db 0x31
db 0x00 ; reserved
dw 0x0054 ; inc
@@ -1032,7 +1128,7 @@ opcodes:
db 0x00 ; reserved
dw 0x0056 ; mov
db 0x8B
db 0x89
db 0x00 ; reserved
dw 0x004F ; hlt

View File

@@ -40,6 +40,9 @@ run_tests:
call clear_test_arena
call test_get_opcode
call clear_test_arena
call test_get_reg_bits
ret
.msg db "running test suite...", 0x0A, 0x00
@@ -529,7 +532,7 @@ test_get_opcode:
mov di, 0x0053 ; xor
call get_opcode
cmp al, 0x33
cmp al, 0x31
jne .fail
mov di, 0x0054 ; inc
@@ -557,6 +560,42 @@ test_get_opcode:
ret
.msg db "test_get_opcode...", 0x00
; ------------------------------------------------------------------------------
; test_get_reg_bits
;
; description:
; tests get_reg_bits described functionality
; ------------------------------------------------------------------------------
test_get_reg_bits:
mov rsi, .msg
call print
mov di, 0x0000 ; rax
call get_reg_bits
cmp al, 000b
jne .fail
mov di, 0x0010 ; eax
call get_reg_bits
cmp al, 000b
jne .fail
mov di, 0x0003 ; rdx
call get_reg_bits
cmp al, 010b
jne .fail
.pass:
mov rsi, msg_pass
call print
ret
.fail:
mov rsi, msg_fail
call print
ret
.msg db "test_get_reg_bits...", 0x00
msg_pass:
db 0x0A
times (TEST_LINE_LENGTH + .start - .end) db " ", ; right align