diff --git a/twasm/asm/main.asm b/twasm/asm/main.asm index 12f54fd..96d15bc 100644 --- a/twasm/asm/main.asm +++ b/twasm/asm/main.asm @@ -53,7 +53,7 @@ start: ; ------------------------------------------------------------------------------ ; assemble -; TODO write testsr +; TODO write tests ; TODO make it work :/ putting the cart before the horse ; ; description: @@ -73,50 +73,111 @@ assemble: push rdi xor edi, edi - mov di, [rax * TOKEN_TABLE_ENTRY_SIZE + TOKEN_TABLE_ADDR] ; rdi = next tte + mov di, [rax * TOKEN_TABLE_ENTRY_SIZE + TOKEN_TABLE_ADDR] ; next tte push rax - xor eax, eax + + ; di = next tte call get_tte_type + ; al = type of token + cmp al, 0x01 ; check if next tte's type is an operator + je .operator ; if so, handle case of operator + jne .continue_operator ; if not, jump past the case - cmp ax, 0x01 ; check if it's an operator - je .operator - jne .continue_operator - - .operator: - push rsi - mov rsi, .msg_found_operator - call print - pop rsi + .operator: ; if next tte's type is an operator: + push rax ; MUST be popped BEFORE returning to .continue_operator; it + ; contains the type of token, which still needs to be used. push rdi - ; di = tte call get_tte_typed_metadata ; al = tte typed metadata - pop rdi and al, 11b ; mask for # operands - cmp al, 0 ; 0 operands - je .operator_0 - jne .continue_operator0 + cmp al, 0 ; check if operator has no operands + je .operator_0 ; if so, handle case of no operands + jne .operator_with_args ; if not, jump to case of multiple operands .operator_0: + push rdi + ; di = next tte + call get_opcode + ; al = opcode + call .output_byte + pop rdi + + pop rax ; from start of label .operator jmp .continue_operator - .continue_operator0: + .operator_with_args: + mov [.pending_operator_num_args], al ; save # args fttb + + push rdi + ; di = next tte + call get_opcode + ; al = opcode + mov [.pending_operator_opcode], al ; save it fttb + pop rdi + + pop rax ; from start of label .operator .continue_operator: - cmp ax, 0x02 ; check if it's a register - je .register - jne .continue_register + cmp al, 0x02 ; check if next tte's type is a register + je .register ; if so, handle case of register + jne .continue_register ; if not, jump past the case - .register: - push rsi - mov rsi, .msg_found_register - call print - pop rsi + .register: ; if next tte's type is a register: + call .dec_num_args ; because we've found an argument, we need 1 fewer noch + + cmp byte [.pending_operator_num_args], 1 ; check if this is 1st of 2 args + je .register_one_of_two ; if so, jump to handler + + cmp byte [.pending_operator_num_args], 0 ; check if this is the last arg + je .register_last ; if so, jump to handler + ; note: not necessarily the last + ; of 2 args, it could also be the + ; last of 1 + + ; otherwise, quietly discard the token, reset things, and keep going :/ + call .reset_state + jmp .continue_register + + .register_one_of_two: ; if it's the first of 2 arguments: + mov [.first_argument], di ; ax = tte + jmp .continue_register + + .register_last: ; if it's the last argument: + ; swap so the first argument sits in .first_argument + push rax + mov ax, di + mov di, [.first_argument] + mov [.first_argument], ax + pop rax + + cmp di, UNRECOGNISED_TOKEN_ID ; check if the second argument is defined + jne .operator_finalise_2 ; if so, there are 2 arguments + ; if not, there is just 1 + + .operator_finalise_1: + mov di, 0x0000 ; id of rax. reg bits 000b + + .operator_finalise_2: + ; TODO avoid swapping earlier and now :/ + mov cx, di + mov di, [.first_argument] + mov si, cx + call get_direct_addressing_ModRM + ; al = ModR/M byte + push rax + mov al, [.pending_operator_opcode] + call .output_byte ; output operator's opcode + pop rax + + call .output_byte ; output ModR/M byte + + call .reset_state ; reset all the state parts of this function + jmp .continue_register .continue_register: pop rax ; incrementer @@ -127,10 +188,43 @@ assemble: .break: ret - .msg_found_operator db "found operator", 0x0A, 0x00 - .msg_found_register db "found register", 0x0A, 0x00 - .next_register dw UNRECOGNISED_TOKEN_ID - .next_next_register dw UNRECOGNISED_TOKEN_ID + + ; procedures + + ; al = byte to write + .output_byte: + mov edx, [.next_output_byte] ; get output byte's address + mov [edx], al ; write byte to that address + inc edx ; increment address + mov [.next_output_byte], edx ; put output byte's address + ret + + ; runs dec on .pending_operator_num_args + .dec_num_args + push rax + mov al, [.pending_operator_num_args] + dec al + mov [.pending_operator_num_args], al + pop rax + ret + + .reset_state + ; I don't actually know if these `word` and `byte` directives are needed + ; TODO check that. I think they are, becasue Nasm doesn't record the size + ; of labels? + mov word [.pending_operator_opcode], UNRECOGNISED_TOKEN_ID + mov byte [.pending_operator_num_args], 0x00 + mov word [.first_argument], UNRECOGNISED_TOKEN_ID + ret + + ; state variables + + .pending_operator_opcode db 0x00 ; the operator seeking args + .pending_operator_num_args db 0x00 ; # of args it takes + + .first_argument dw UNRECOGNISED_TOKEN_ID ; first argument if there are two + + .next_output_byte dd OUTPUT_ADDR ; next empty byte in output ; ------------------------------------------------------------------------------ ; get_tte_type @@ -226,44 +320,24 @@ get_tte_typed_metadata: ; ------------------------------------------------------------------------------ get_direct_addressing_ModRM: - ; TODO something is backwards in this function but I don't see it. If the test - ; suite fails, it's too far gone; rewrite it. - push rdi - push rsi - ; get metadata of reg - call get_tte_typed_metadata - ; al = typed metadata of reg - pop rsi - pop rdi + ; di = tte + call get_reg_bits + ; al = reg bits + mov bl, al + shl bl, 3 - mov bl, al ; bl = metadata of reg + mov rdi, rsi ; do the other one - push rdi - push rsi - push rbx - - ; get metadata of R/M - mov di, si - call get_tte_typed_metadata - ; al = typed metadata of R/M - - pop rbx - pop rsi - pop rdi + ; di = tte + call get_reg_bits + ; al = reg bits mov dl, al - shr dl, 2 - and dl, 111b ; mask - - shr bl, 2 - and bl, 111b ; mask - shl bl, 3 - xor eax, eax or al, 11b << 6 ; mod bits - or al, dl ; reg bits - or al, bl ; R/M bits + or al, bl ; reg bits + or al, dl ; R/M bits and rax, 0xFF ; mask for byte ret @@ -304,6 +378,28 @@ get_opcode: and rax, 0xFF ; mask ret +; ------------------------------------------------------------------------------ +; get_reg_bits +; +; description: +; given a register token, returns its reg bits metadata +; +; parameters: +; di = token table entry +; +; returned: +; al = register token; the rest of rax, including the upper 5 bits of al, are +; zeroed. +; ------------------------------------------------------------------------------ + +get_reg_bits: + ; di = tte + call get_tte_typed_metadata + ; al = typed metadata + shr al, 2 ; discard type data + and al, 111b ; mask + ret + ; ------------------------------------------------------------------------------ ; tokenising ; ------------------------------------------------------------------------------ @@ -1024,7 +1120,7 @@ tokens: opcodes: .by_id: dw 0x0053 ; xor - db 0x33 + db 0x31 db 0x00 ; reserved dw 0x0054 ; inc @@ -1032,7 +1128,7 @@ opcodes: db 0x00 ; reserved dw 0x0056 ; mov - db 0x8B + db 0x89 db 0x00 ; reserved dw 0x004F ; hlt diff --git a/twasm/asm/tests.asm b/twasm/asm/tests.asm index 22d626a..4705829 100644 --- a/twasm/asm/tests.asm +++ b/twasm/asm/tests.asm @@ -40,6 +40,9 @@ run_tests: call clear_test_arena call test_get_opcode + call clear_test_arena + call test_get_reg_bits + ret .msg db "running test suite...", 0x0A, 0x00 @@ -529,7 +532,7 @@ test_get_opcode: mov di, 0x0053 ; xor call get_opcode - cmp al, 0x33 + cmp al, 0x31 jne .fail mov di, 0x0054 ; inc @@ -557,6 +560,42 @@ test_get_opcode: ret .msg db "test_get_opcode...", 0x00 +; ------------------------------------------------------------------------------ +; test_get_reg_bits +; +; description: +; tests get_reg_bits described functionality +; ------------------------------------------------------------------------------ + +test_get_reg_bits: + mov rsi, .msg + call print + + mov di, 0x0000 ; rax + call get_reg_bits + cmp al, 000b + jne .fail + + mov di, 0x0010 ; eax + call get_reg_bits + cmp al, 000b + jne .fail + + mov di, 0x0003 ; rdx + call get_reg_bits + cmp al, 010b + jne .fail + + .pass: + mov rsi, msg_pass + call print + ret + .fail: + mov rsi, msg_fail + call print + ret + .msg db "test_get_reg_bits...", 0x00 + msg_pass: db 0x0A times (TEST_LINE_LENGTH + .start - .end) db " ", ; right align