add get_reg_bits and test, streamline ModRM calculations, correct opcodes, fix typos...

This commit is contained in:
andromeda
2026-03-10 21:21:59 +01:00
parent 08c39a2bd8
commit e6d7cb2f21
2 changed files with 199 additions and 64 deletions

View File

@@ -53,7 +53,7 @@ start:
; ------------------------------------------------------------------------------ ; ------------------------------------------------------------------------------
; assemble ; assemble
; TODO write testsr ; TODO write tests
; TODO make it work :/ putting the cart before the horse ; TODO make it work :/ putting the cart before the horse
; ;
; description: ; description:
@@ -73,50 +73,111 @@ assemble:
push rdi push rdi
xor edi, edi xor edi, edi
mov di, [rax * TOKEN_TABLE_ENTRY_SIZE + TOKEN_TABLE_ADDR] ; rdi = next tte mov di, [rax * TOKEN_TABLE_ENTRY_SIZE + TOKEN_TABLE_ADDR] ; next tte
push rax push rax
xor eax, eax
; di = next tte
call get_tte_type call get_tte_type
; al = type of token
cmp al, 0x01 ; check if next tte's type is an operator
je .operator ; if so, handle case of operator
jne .continue_operator ; if not, jump past the case
cmp ax, 0x01 ; check if it's an operator .operator: ; if next tte's type is an operator:
je .operator push rax ; MUST be popped BEFORE returning to .continue_operator; it
jne .continue_operator ; contains the type of token, which still needs to be used.
.operator:
push rsi
mov rsi, .msg_found_operator
call print
pop rsi
push rdi push rdi
; di = tte ; di = tte
call get_tte_typed_metadata call get_tte_typed_metadata
; al = tte typed metadata ; al = tte typed metadata
pop rdi pop rdi
and al, 11b ; mask for # operands and al, 11b ; mask for # operands
cmp al, 0 ; 0 operands cmp al, 0 ; check if operator has no operands
je .operator_0 je .operator_0 ; if so, handle case of no operands
jne .continue_operator0 jne .operator_with_args ; if not, jump to case of multiple operands
.operator_0: .operator_0:
push rdi
; di = next tte
call get_opcode
; al = opcode
call .output_byte
pop rdi
pop rax ; from start of label .operator
jmp .continue_operator jmp .continue_operator
.continue_operator0: .operator_with_args:
mov [.pending_operator_num_args], al ; save # args fttb
push rdi
; di = next tte
call get_opcode
; al = opcode
mov [.pending_operator_opcode], al ; save it fttb
pop rdi
pop rax ; from start of label .operator
.continue_operator: .continue_operator:
cmp ax, 0x02 ; check if it's a register cmp al, 0x02 ; check if next tte's type is a register
je .register je .register ; if so, handle case of register
jne .continue_register jne .continue_register ; if not, jump past the case
.register: .register: ; if next tte's type is a register:
push rsi call .dec_num_args ; because we've found an argument, we need 1 fewer noch
mov rsi, .msg_found_register
call print cmp byte [.pending_operator_num_args], 1 ; check if this is 1st of 2 args
pop rsi je .register_one_of_two ; if so, jump to handler
cmp byte [.pending_operator_num_args], 0 ; check if this is the last arg
je .register_last ; if so, jump to handler
; note: not necessarily the last
; of 2 args, it could also be the
; last of 1
; otherwise, quietly discard the token, reset things, and keep going :/
call .reset_state
jmp .continue_register
.register_one_of_two: ; if it's the first of 2 arguments:
mov [.first_argument], di ; ax = tte
jmp .continue_register
.register_last: ; if it's the last argument:
; swap so the first argument sits in .first_argument
push rax
mov ax, di
mov di, [.first_argument]
mov [.first_argument], ax
pop rax
cmp di, UNRECOGNISED_TOKEN_ID ; check if the second argument is defined
jne .operator_finalise_2 ; if so, there are 2 arguments
; if not, there is just 1
.operator_finalise_1:
mov di, 0x0000 ; id of rax. reg bits 000b
.operator_finalise_2:
; TODO avoid swapping earlier and now :/
mov cx, di
mov di, [.first_argument]
mov si, cx
call get_direct_addressing_ModRM
; al = ModR/M byte
push rax
mov al, [.pending_operator_opcode]
call .output_byte ; output operator's opcode
pop rax
call .output_byte ; output ModR/M byte
call .reset_state ; reset all the state parts of this function
jmp .continue_register
.continue_register: .continue_register:
pop rax ; incrementer pop rax ; incrementer
@@ -127,10 +188,43 @@ assemble:
.break: .break:
ret ret
.msg_found_operator db "found operator", 0x0A, 0x00
.msg_found_register db "found register", 0x0A, 0x00 ; procedures
.next_register dw UNRECOGNISED_TOKEN_ID
.next_next_register dw UNRECOGNISED_TOKEN_ID ; al = byte to write
.output_byte:
mov edx, [.next_output_byte] ; get output byte's address
mov [edx], al ; write byte to that address
inc edx ; increment address
mov [.next_output_byte], edx ; put output byte's address
ret
; runs dec on .pending_operator_num_args
.dec_num_args
push rax
mov al, [.pending_operator_num_args]
dec al
mov [.pending_operator_num_args], al
pop rax
ret
.reset_state
; I don't actually know if these `word` and `byte` directives are needed
; TODO check that. I think they are, becasue Nasm doesn't record the size
; of labels?
mov word [.pending_operator_opcode], UNRECOGNISED_TOKEN_ID
mov byte [.pending_operator_num_args], 0x00
mov word [.first_argument], UNRECOGNISED_TOKEN_ID
ret
; state variables
.pending_operator_opcode db 0x00 ; the operator seeking args
.pending_operator_num_args db 0x00 ; # of args it takes
.first_argument dw UNRECOGNISED_TOKEN_ID ; first argument if there are two
.next_output_byte dd OUTPUT_ADDR ; next empty byte in output
; ------------------------------------------------------------------------------ ; ------------------------------------------------------------------------------
; get_tte_type ; get_tte_type
@@ -226,44 +320,24 @@ get_tte_typed_metadata:
; ------------------------------------------------------------------------------ ; ------------------------------------------------------------------------------
get_direct_addressing_ModRM: get_direct_addressing_ModRM:
; TODO something is backwards in this function but I don't see it. If the test ; di = tte
; suite fails, it's too far gone; rewrite it. call get_reg_bits
push rdi ; al = reg bits
push rsi mov bl, al
; get metadata of reg shl bl, 3
call get_tte_typed_metadata
; al = typed metadata of reg
pop rsi
pop rdi
mov bl, al ; bl = metadata of reg mov rdi, rsi ; do the other one
push rdi ; di = tte
push rsi call get_reg_bits
push rbx ; al = reg bits
; get metadata of R/M
mov di, si
call get_tte_typed_metadata
; al = typed metadata of R/M
pop rbx
pop rsi
pop rdi
mov dl, al mov dl, al
shr dl, 2
and dl, 111b ; mask
shr bl, 2
and bl, 111b ; mask
shl bl, 3
xor eax, eax xor eax, eax
or al, 11b << 6 ; mod bits or al, 11b << 6 ; mod bits
or al, dl ; reg bits or al, bl ; reg bits
or al, bl ; R/M bits or al, dl ; R/M bits
and rax, 0xFF ; mask for byte and rax, 0xFF ; mask for byte
ret ret
@@ -304,6 +378,28 @@ get_opcode:
and rax, 0xFF ; mask and rax, 0xFF ; mask
ret ret
; ------------------------------------------------------------------------------
; get_reg_bits
;
; description:
; given a register token, returns its reg bits metadata
;
; parameters:
; di = token table entry
;
; returned:
; al = register token; the rest of rax, including the upper 5 bits of al, are
; zeroed.
; ------------------------------------------------------------------------------
get_reg_bits:
; di = tte
call get_tte_typed_metadata
; al = typed metadata
shr al, 2 ; discard type data
and al, 111b ; mask
ret
; ------------------------------------------------------------------------------ ; ------------------------------------------------------------------------------
; tokenising ; tokenising
; ------------------------------------------------------------------------------ ; ------------------------------------------------------------------------------
@@ -1024,7 +1120,7 @@ tokens:
opcodes: opcodes:
.by_id: .by_id:
dw 0x0053 ; xor dw 0x0053 ; xor
db 0x33 db 0x31
db 0x00 ; reserved db 0x00 ; reserved
dw 0x0054 ; inc dw 0x0054 ; inc
@@ -1032,7 +1128,7 @@ opcodes:
db 0x00 ; reserved db 0x00 ; reserved
dw 0x0056 ; mov dw 0x0056 ; mov
db 0x8B db 0x89
db 0x00 ; reserved db 0x00 ; reserved
dw 0x004F ; hlt dw 0x004F ; hlt

View File

@@ -40,6 +40,9 @@ run_tests:
call clear_test_arena call clear_test_arena
call test_get_opcode call test_get_opcode
call clear_test_arena
call test_get_reg_bits
ret ret
.msg db "running test suite...", 0x0A, 0x00 .msg db "running test suite...", 0x0A, 0x00
@@ -529,7 +532,7 @@ test_get_opcode:
mov di, 0x0053 ; xor mov di, 0x0053 ; xor
call get_opcode call get_opcode
cmp al, 0x33 cmp al, 0x31
jne .fail jne .fail
mov di, 0x0054 ; inc mov di, 0x0054 ; inc
@@ -557,6 +560,42 @@ test_get_opcode:
ret ret
.msg db "test_get_opcode...", 0x00 .msg db "test_get_opcode...", 0x00
; ------------------------------------------------------------------------------
; test_get_reg_bits
;
; description:
; tests get_reg_bits described functionality
; ------------------------------------------------------------------------------
test_get_reg_bits:
mov rsi, .msg
call print
mov di, 0x0000 ; rax
call get_reg_bits
cmp al, 000b
jne .fail
mov di, 0x0010 ; eax
call get_reg_bits
cmp al, 000b
jne .fail
mov di, 0x0003 ; rdx
call get_reg_bits
cmp al, 010b
jne .fail
.pass:
mov rsi, msg_pass
call print
ret
.fail:
mov rsi, msg_fail
call print
ret
.msg db "test_get_reg_bits...", 0x00
msg_pass: msg_pass:
db 0x0A db 0x0A
times (TEST_LINE_LENGTH + .start - .end) db " ", ; right align times (TEST_LINE_LENGTH + .start - .end) db " ", ; right align