diff --git a/twasm/asm/main.asm b/twasm/asm/main.asm index eef37cc..1ed4932 100644 --- a/twasm/asm/main.asm +++ b/twasm/asm/main.asm @@ -69,19 +69,24 @@ start: ; ; description: ; assembles the program from tokens located at TOKEN_TABLE_ADDR into a flat -; binary located at OUTPUT_ADDR. It's probably desirable to clear the output -; arena before calling this function. +; binary located at OUTPUT_ADDR. +; Behaviour is undefined when: +; - tokens are in an impossible order +; 0x1000 ; memory address, following byte should be a register +; 0x1000 ; not a register +; - operator tokens followed by the wrong number of arguments +; 0x004F ; hlt, expects 0 arguments +; 0x0000 ; rax, an argument +; - an undefined token is included, like 0x0051 ; ; parameters: ; rdi = number of tokens in the token table ; ------------------------------------------------------------------------------ assemble: - xor rax, rax ; number of tokens processed + xor eax, eax ; rax = number of tokens processed + ; rdi = number of tokens in table .loop: - cmp rax, rdi ; check incrementer against the number of tokens in the token - jge .break ; table. If overflown, break - push rdi xor edi, edi mov di, [rax * TOKEN_TABLE_ENTRY_SIZE + TOKEN_TABLE_ADDR] ; next tte @@ -90,16 +95,18 @@ assemble: ; di = next tte call get_tte_type ; al = type of token + + ; on stack: + ; rax = number of tokens processed + ; rdi = number of tokens in table cmp al, 0x01 ; check if next tte's type is an operator je .operator ; if so, handle case of operator jne .continue_operator ; if not, jump past the case + ; di = tte of operator .operator: ; if next tte's type is an operator: - push rax ; MUST be popped BEFORE returning to .continue_operator; it - ; contains the type of token, which still needs to be used. - push rdi - ; di = tte + ; di = tte of operator call get_tte_typed_metadata ; al = tte typed metadata pop rdi @@ -108,19 +115,219 @@ assemble: cmp al, 0 ; check if operator has no operands je .operator_0 ; if so, handle case of no operands - jne .operator_with_args ; if not, jump to case of multiple operands + cmp al, 1 ; check if operator has one operand + je .operator_1 ; if so, handle case of one operand + + cmp al, 2 ; check if operator has two operands + je .operator_2 ; if so, handle case of two operands + + jmp .operator_with_args ; of not, handle case of some operands + + ; di = tte of operator .operator_0: + push rsi + mov rsi, .msg_operator_0 + call print.debug + pop rsi + push rdi - ; di = next tte + ; di = tte of operator call get_opcode ; al = opcode call .output_byte pop rdi - pop rax ; from start of label .operator - jmp .continue_operator + jmp .continue + ; di = tte of operator + .operator_1: + push rsi + mov rsi, .msg_operator_1 + call print.debug + pop rsi + + push rdi + ; di = tte of operator + call get_opcode + ; al = opcode + call .output_byte + pop rdi ; di = tte of operator + + pop rax ; rax = number of tokens processed + pop rdi ; rdi = total number of tokens + inc rax + cmp rax, rdi + jge .break + push rdi + push rax + xor edi, edi + mov di, [rax * TOKEN_TABLE_ENTRY_SIZE + TOKEN_TABLE_ADDR] ; di = next tte + + push rdi + and di, 0xFF00 + cmp di, 0x1000 ; check if token is a memory address + pop rdi ; di = next tte + je .operator_1_memory_access + + ; di = next tte + call get_tte_type + ; al = type of token + + cmp al, 0x02 ; type: register + je .operator_1_register + + pop rax ; rax = number of tokens processed + pop rdi ; rdi = total number of tokens + inc rax + cmp rax, rdi + jge .break + jmp .loop + + ; TODO figure out if this is relevant + .operator_1_memory_access: + push rsi + mov rsi, .msg_operator_1_memory_access + call print.error + pop rsi + jmp halt + + .operator_1_register: + push rsi + mov rsi, .msg_operator_1_register + call print.debug + pop rsi + + mov si, di ; si = `R/M` tte + mov di, 0x0000 ; di = `reg` tte + mov dl, 11b ; dl bits + call get_ModRM + ; al = Mod R/M byte + call .output_byte + + pop rax ; rax = number of tokens processed + pop rdi ; rdi = total number of tokens + inc rax + cmp rax, rdi + jge .break + jmp .loop + + .operator_2: + push rsi + mov rsi, .msg_operator_2 + call print.debug + pop rsi + + push rdi + ; di = tte of operator + call get_opcode + ; al = opcode + call .output_byte + pop rdi ; di = tte of operator + + pop rax + pop rdi + inc rax + cmp rax, rdi + jge .break + push rdi + push rax + xor edi, edi + mov di, [rax * TOKEN_TABLE_ENTRY_SIZE + TOKEN_TABLE_ADDR] ; di = next tte + + push rdi + and di, 0xFF00 + cmp di, 0x1000 ; check if token is a memory address + pop rdi ; di = next tte + je .operator_2_memory_access + + ; di = next tte + call get_tte_type + ; al = type of token + + cmp al, 0x02 ; type: register + je .operator_2_register + + pop rax ; rax = number of tokens processed + pop rdi ; rdi = total number of tokens + inc rax + cmp rax, rdi + jge .break + jmp .loop + + .operator_2_memory_access: + push rsi + mov rsi, .msg_operator_2_memory_access + call print.error + pop rsi + jmp halt + + .operator_2_register: + push rsi + mov rsi, .msg_operator_2_register + call print.debug + pop rsi + + mov si, di ; si = `R/M` tte + + pop rax + pop rdi + inc rax + cmp rax, rdi + jge .break + push rdi + push rax + xor edi, edi + mov di, [rax * TOKEN_TABLE_ENTRY_SIZE + TOKEN_TABLE_ADDR] ; di = next tte + + push rdi + and di, 0xFF00 + cmp di, 0x1000 ; check if token is a memory address + pop rdi ; di = next tte + je .operator_2_register_memory_access + + ; di = next tte + call get_tte_type + ; al = type of token + + cmp al, 0x02 + je .operator_2_register_register + + pop rax ; rax = number of tokens processed + pop rdi ; rdi = total nnumber of tokens + inc rax + cmp rax, rdi + jge .break + jmp .loop + + .operator_2_register_memory_access: + push rsi + mov rsi, .msg_operator_2_register_memory_access + call print.error + pop rsi + jmp halt + + .operator_2_register_register: + push rsi + mov rsi, .msg_operator_2_register_register + call print.debug + pop rsi + + ; si = `R/M` tte + ; di = `reg` tte + mov dl, 11b ; dl = mod bits + call get_ModRM + ; al = Mod R/M byte + call .output_byte + + pop rax ; rax = number of tokens processed + pop rdi ; rdi = total number of tokens + inc rax + cmp rax, rdi + jge .break + jmp .loop + + ; di = tte of operator .operator_with_args: mov [.pending_operator_num_args], al ; save # args fttb @@ -131,13 +338,14 @@ assemble: mov [.pending_operator_opcode], al ; save opcode fttb pop rdi - pop rax ; from start of label .operator + jmp .continue .continue_operator: cmp al, 0x02 ; check if next tte's type is a register je .register ; if so, handle case of register jne .continue_register ; if not, jump past the case + ; di MUST be a valid register tte .register: ; if next tte's type is a register: call .dec_num_args ; because we've found an argument, we need 1 fewer noch @@ -195,10 +403,14 @@ assemble: jmp .continue_register .continue_register: + + .continue: pop rax ; rax = number of tokens processed pop rdi ; rdi = total number of tokens - inc rax ; move to next token + inc rax + cmp rax, rdi + jge .break ; at end of table, break jmp .loop .break: @@ -227,6 +439,8 @@ assemble: pop rax ret + ; resets sketchy memory-based state + ; TODO put this state in an accumulator or something .reset_state: ; I don't actually know if these `word` directives are needed ; TODO check that. I think they are, becasue Nasm doesn't record the size @@ -244,6 +458,15 @@ assemble: .first_argument dw UNRECOGNISED_TOKEN_ID ; first argument if there are two .next_output_byte dd OUTPUT_ADDR ; next empty byte in output + .msg_operator_0 db "operator_0", 0x0A, 0x00 + .msg_operator_1 db "operator_1", 0x0A, 0x00 + .msg_operator_1_memory_access db "operator_1_memory_access", 0x0A, 0x00 + .msg_operator_1_register db "operator_1_register", 0x0A, 0x00 + .msg_operator_2 db "operator_2", 0x0A, 0x00 + .msg_operator_2_memory_access db "operator_2_memory_access", 0x0A, 0x00 + .msg_operator_2_register db "operator_2_register", 0x0A, 0x00 + .msg_operator_2_register_memory_access db "operator_2_register_memory_access", 0x0A, 0x00 + .msg_operator_2_register_register db "operator_2_register_register", 0x0A, 0x00 ; ------------------------------------------------------------------------------ ; get_tte_type @@ -664,11 +887,11 @@ tokenise: mov cx, ax ; cx = length counter for safe keeping call evaluate_operand ; dl = return code - ; ax = register's token ID + ; rax = binary data pop rsi pop rdi ; rdi = first byte of operand add di, cx ; rdi = last byte of operand - mov rcx, rax ; rcx = evaluate_operand's return value + mov rcx, rax ; rcx = evaluate_operand's binary return data pop rax ; rax = number of tokens processed ; operand is some reg @@ -1563,8 +1786,6 @@ opcodes: msg_welcome db "Welcome to Twasm", 0x0A, 0x00 msg_halt db "halted.", 0x0A, 0x00 -token_terminator_8 db 0x00, " ", 0x0A, 0x0D, ",", 0x00, 0x00, 0x00 - whitespace_2 db " ", 0x0D ; test program